@botpress/zai 2.0.15 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context.js +131 -0
- package/dist/emitter.js +42 -0
- package/dist/index.d.ts +106 -10
- package/dist/operations/check.js +46 -27
- package/dist/operations/extract.js +115 -42
- package/dist/operations/filter.js +34 -19
- package/dist/operations/label.js +65 -42
- package/dist/operations/rewrite.js +37 -17
- package/dist/operations/summarize.js +32 -13
- package/dist/operations/text.js +28 -8
- package/dist/response.js +82 -0
- package/dist/tokenizer.js +11 -0
- package/e2e/client.ts +43 -29
- package/e2e/data/cache.jsonl +416 -0
- package/package.json +11 -3
- package/src/context.ts +197 -0
- package/src/emitter.ts +49 -0
- package/src/operations/check.ts +99 -49
- package/src/operations/extract.ts +138 -55
- package/src/operations/filter.ts +62 -35
- package/src/operations/label.ts +117 -62
- package/src/operations/rewrite.ts +50 -21
- package/src/operations/summarize.ts +40 -14
- package/src/operations/text.ts +32 -8
- package/src/response.ts +114 -0
- package/src/tokenizer.ts +14 -0
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { z } from "@bpinternal/zui";
|
|
2
2
|
import { chunk } from "lodash-es";
|
|
3
|
+
import { ZaiContext } from "../context";
|
|
4
|
+
import { Response } from "../response";
|
|
5
|
+
import { getTokenizer } from "../tokenizer";
|
|
3
6
|
import { Zai } from "../zai";
|
|
4
7
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
5
8
|
const Options = z.object({
|
|
@@ -17,20 +20,20 @@ const Options = z.object({
|
|
|
17
20
|
});
|
|
18
21
|
const START = "\u25A0START\u25A0";
|
|
19
22
|
const END = "\u25A0END\u25A0";
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
const tokenizer = await
|
|
23
|
-
await
|
|
24
|
-
const INPUT_COMPONENT_SIZE = Math.max(100, (
|
|
23
|
+
const summarize = async (original, options, ctx) => {
|
|
24
|
+
ctx.controller.signal.throwIfAborted();
|
|
25
|
+
const tokenizer = await getTokenizer();
|
|
26
|
+
const model = await ctx.getModel();
|
|
27
|
+
const INPUT_COMPONENT_SIZE = Math.max(100, (model.input.maxTokens - PROMPT_INPUT_BUFFER) / 4);
|
|
25
28
|
options.prompt = tokenizer.truncate(options.prompt, INPUT_COMPONENT_SIZE);
|
|
26
29
|
options.format = tokenizer.truncate(options.format, INPUT_COMPONENT_SIZE);
|
|
27
|
-
const maxOutputSize =
|
|
30
|
+
const maxOutputSize = model.output.maxTokens - PROMPT_OUTPUT_BUFFER;
|
|
28
31
|
if (options.length > maxOutputSize) {
|
|
29
32
|
throw new Error(
|
|
30
|
-
`The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${
|
|
33
|
+
`The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${model.output.maxTokens} tokens for this model (${model.name})`
|
|
31
34
|
);
|
|
32
35
|
}
|
|
33
|
-
options.sliding.window = Math.min(options.sliding.window,
|
|
36
|
+
options.sliding.window = Math.min(options.sliding.window, model.input.maxTokens - PROMPT_INPUT_BUFFER);
|
|
34
37
|
options.sliding.overlap = Math.min(options.sliding.overlap, options.sliding.window - 3 * options.sliding.overlap);
|
|
35
38
|
const format = (summary, newText) => {
|
|
36
39
|
return `
|
|
@@ -52,8 +55,8 @@ ${newText}
|
|
|
52
55
|
const chunkSize = Math.ceil(tokens.length / (parts * N));
|
|
53
56
|
if (useMergeSort) {
|
|
54
57
|
const chunks = chunk(tokens, chunkSize).map((x) => x.join(""));
|
|
55
|
-
const allSummaries = await Promise.
|
|
56
|
-
return
|
|
58
|
+
const allSummaries = (await Promise.allSettled(chunks.map((chunk2) => summarize(chunk2, options, ctx)))).filter((x) => x.status === "fulfilled").map((x) => x.value);
|
|
59
|
+
return summarize(allSummaries.join("\n\n============\n\n"), options, ctx);
|
|
57
60
|
}
|
|
58
61
|
const summaries = [];
|
|
59
62
|
let currentSummary = "";
|
|
@@ -103,7 +106,7 @@ ${newText}
|
|
|
103
106
|
);
|
|
104
107
|
}
|
|
105
108
|
}
|
|
106
|
-
|
|
109
|
+
let { extracted: result } = await ctx.generateContent({
|
|
107
110
|
systemPrompt: `
|
|
108
111
|
You are summarizing a text. The text is split into ${parts} parts, and you are currently working on part ${iteration}.
|
|
109
112
|
At every step, you will receive the current summary and a new part of the text. You need to amend the summary to include the new information (if needed).
|
|
@@ -117,9 +120,14 @@ ${options.format}
|
|
|
117
120
|
`.trim(),
|
|
118
121
|
messages: [{ type: "text", content: format(currentSummary, slice), role: "user" }],
|
|
119
122
|
maxTokens: generationLength,
|
|
120
|
-
stopSequences: [END]
|
|
123
|
+
stopSequences: [END],
|
|
124
|
+
transform: (text) => {
|
|
125
|
+
if (!text.trim().length) {
|
|
126
|
+
throw new Error("The model did not return a valid summary. The response was empty.");
|
|
127
|
+
}
|
|
128
|
+
return text;
|
|
129
|
+
}
|
|
121
130
|
});
|
|
122
|
-
let result = output?.choices[0]?.content;
|
|
123
131
|
if (result.includes(START)) {
|
|
124
132
|
result = result.slice(result.indexOf(START) + START.length);
|
|
125
133
|
}
|
|
@@ -131,3 +139,14 @@ ${options.format}
|
|
|
131
139
|
}
|
|
132
140
|
return currentSummary.trim();
|
|
133
141
|
};
|
|
142
|
+
Zai.prototype.summarize = function(original, _options) {
|
|
143
|
+
const options = Options.parse(_options ?? {});
|
|
144
|
+
const context = new ZaiContext({
|
|
145
|
+
client: this.client,
|
|
146
|
+
modelId: this.Model,
|
|
147
|
+
taskId: this.taskId,
|
|
148
|
+
taskType: "summarize",
|
|
149
|
+
adapter: this.adapter
|
|
150
|
+
});
|
|
151
|
+
return new Response(context, summarize(original, options, context), (value) => value);
|
|
152
|
+
};
|
package/dist/operations/text.js
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import { z } from "@bpinternal/zui";
|
|
2
2
|
import { clamp } from "lodash-es";
|
|
3
|
+
import { ZaiContext } from "../context";
|
|
4
|
+
import { Response } from "../response";
|
|
5
|
+
import { getTokenizer } from "../tokenizer";
|
|
3
6
|
import { Zai } from "../zai";
|
|
4
7
|
import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
|
|
5
8
|
const Options = z.object({
|
|
6
9
|
length: z.number().min(1).max(1e5).optional().describe("The maximum number of tokens to generate")
|
|
7
10
|
});
|
|
8
|
-
|
|
11
|
+
const text = async (prompt, _options, ctx) => {
|
|
12
|
+
ctx.controller.signal.throwIfAborted();
|
|
9
13
|
const options = Options.parse(_options ?? {});
|
|
10
|
-
const tokenizer = await
|
|
11
|
-
await
|
|
12
|
-
prompt = tokenizer.truncate(prompt, Math.max(
|
|
14
|
+
const tokenizer = await getTokenizer();
|
|
15
|
+
const model = await ctx.getModel();
|
|
16
|
+
prompt = tokenizer.truncate(prompt, Math.max(model.input.maxTokens - PROMPT_INPUT_BUFFER, 100));
|
|
13
17
|
if (options.length) {
|
|
14
|
-
options.length = Math.min(
|
|
18
|
+
options.length = Math.min(model.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length);
|
|
15
19
|
}
|
|
16
20
|
const instructions = [];
|
|
17
21
|
let chart = "";
|
|
@@ -33,7 +37,7 @@ Zai.prototype.text = async function(prompt, _options) {
|
|
|
33
37
|
| 200-300 tokens| A medium paragraph (150-200 words) |
|
|
34
38
|
| 300-500 tokens| A long paragraph (200-300 words) |`.trim();
|
|
35
39
|
}
|
|
36
|
-
const {
|
|
40
|
+
const { extracted } = await ctx.generateContent({
|
|
37
41
|
systemPrompt: `
|
|
38
42
|
Generate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.
|
|
39
43
|
${instructions.map((x) => `- ${x}`).join("\n")}
|
|
@@ -41,7 +45,23 @@ ${chart}
|
|
|
41
45
|
`.trim(),
|
|
42
46
|
temperature: 0.7,
|
|
43
47
|
messages: [{ type: "text", content: prompt, role: "user" }],
|
|
44
|
-
maxTokens: options.length
|
|
48
|
+
maxTokens: options.length,
|
|
49
|
+
transform: (text2) => {
|
|
50
|
+
if (!text2.trim().length) {
|
|
51
|
+
throw new Error("The model did not return a valid summary. The response was empty.");
|
|
52
|
+
}
|
|
53
|
+
return text2;
|
|
54
|
+
}
|
|
45
55
|
});
|
|
46
|
-
return
|
|
56
|
+
return extracted;
|
|
57
|
+
};
|
|
58
|
+
Zai.prototype.text = function(prompt, _options) {
|
|
59
|
+
const context = new ZaiContext({
|
|
60
|
+
client: this.client,
|
|
61
|
+
modelId: this.Model,
|
|
62
|
+
taskId: this.taskId,
|
|
63
|
+
taskType: "zai.text",
|
|
64
|
+
adapter: this.adapter
|
|
65
|
+
});
|
|
66
|
+
return new Response(context, text(prompt, _options, context), (result) => result);
|
|
47
67
|
};
|
package/dist/response.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import { EventEmitter } from "./emitter";
|
|
2
|
+
export class Response {
|
|
3
|
+
_promise;
|
|
4
|
+
_eventEmitter;
|
|
5
|
+
_context;
|
|
6
|
+
_elasped = null;
|
|
7
|
+
_simplify;
|
|
8
|
+
constructor(context, promise, simplify) {
|
|
9
|
+
this._context = context;
|
|
10
|
+
this._eventEmitter = new EventEmitter();
|
|
11
|
+
this._simplify = simplify;
|
|
12
|
+
this._promise = promise.then(
|
|
13
|
+
(value) => {
|
|
14
|
+
this._elasped ||= this._context.elapsedTime;
|
|
15
|
+
this._eventEmitter.emit("complete", value);
|
|
16
|
+
this._eventEmitter.clear();
|
|
17
|
+
this._context.clear();
|
|
18
|
+
return value;
|
|
19
|
+
},
|
|
20
|
+
(reason) => {
|
|
21
|
+
this._elasped ||= this._context.elapsedTime;
|
|
22
|
+
this._eventEmitter.emit("error", reason);
|
|
23
|
+
this._eventEmitter.clear();
|
|
24
|
+
this._context.clear();
|
|
25
|
+
throw reason;
|
|
26
|
+
}
|
|
27
|
+
);
|
|
28
|
+
this._context.on("update", (usage) => {
|
|
29
|
+
this._eventEmitter.emit("progress", usage);
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
// Event emitter methods
|
|
33
|
+
on(type, listener) {
|
|
34
|
+
this._eventEmitter.on(type, listener);
|
|
35
|
+
return this;
|
|
36
|
+
}
|
|
37
|
+
off(type, listener) {
|
|
38
|
+
this._eventEmitter.off(type, listener);
|
|
39
|
+
return this;
|
|
40
|
+
}
|
|
41
|
+
once(type, listener) {
|
|
42
|
+
this._eventEmitter.once(type, listener);
|
|
43
|
+
return this;
|
|
44
|
+
}
|
|
45
|
+
bindSignal(signal) {
|
|
46
|
+
if (signal.aborted) {
|
|
47
|
+
this.abort(signal.reason);
|
|
48
|
+
}
|
|
49
|
+
const signalAbort = () => {
|
|
50
|
+
this.abort(signal.reason);
|
|
51
|
+
};
|
|
52
|
+
signal.addEventListener("abort", () => signalAbort());
|
|
53
|
+
this.once("complete", () => signal.removeEventListener("abort", signalAbort));
|
|
54
|
+
this.once("error", () => signal.removeEventListener("abort", signalAbort));
|
|
55
|
+
return this;
|
|
56
|
+
}
|
|
57
|
+
abort(reason) {
|
|
58
|
+
this._context.controller.abort(reason);
|
|
59
|
+
}
|
|
60
|
+
then(onfulfilled, onrejected) {
|
|
61
|
+
return this._promise.then(
|
|
62
|
+
(value) => {
|
|
63
|
+
const simplified = this._simplify(value);
|
|
64
|
+
return onfulfilled ? onfulfilled(simplified) : simplified;
|
|
65
|
+
},
|
|
66
|
+
(reason) => {
|
|
67
|
+
if (onrejected) {
|
|
68
|
+
return onrejected(reason);
|
|
69
|
+
}
|
|
70
|
+
throw reason;
|
|
71
|
+
}
|
|
72
|
+
);
|
|
73
|
+
}
|
|
74
|
+
catch(onrejected) {
|
|
75
|
+
return this._promise.catch(onrejected);
|
|
76
|
+
}
|
|
77
|
+
async result() {
|
|
78
|
+
const output = await this._promise;
|
|
79
|
+
const usage = this._context.usage;
|
|
80
|
+
return { output, usage, elapsed: this._elasped };
|
|
81
|
+
}
|
|
82
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { getWasmTokenizer } from "@bpinternal/thicktoken";
|
|
2
|
+
let tokenizer = null;
|
|
3
|
+
export async function getTokenizer() {
|
|
4
|
+
if (!tokenizer) {
|
|
5
|
+
while (!getWasmTokenizer) {
|
|
6
|
+
await new Promise((resolve) => setTimeout(resolve, 25));
|
|
7
|
+
}
|
|
8
|
+
tokenizer = getWasmTokenizer();
|
|
9
|
+
}
|
|
10
|
+
return tokenizer;
|
|
11
|
+
}
|
package/e2e/client.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Client } from '@botpress/client'
|
|
2
|
-
import { Cognitive } from '@botpress/cognitive'
|
|
2
|
+
import { Cognitive, ModelProvider } from '@botpress/cognitive'
|
|
3
3
|
import { diffLines } from 'diff'
|
|
4
4
|
import fs from 'node:fs'
|
|
5
5
|
import path from 'node:path'
|
|
@@ -102,6 +102,47 @@ class CachedClient extends Client {
|
|
|
102
102
|
}
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
+
const cognitiveProvider: ModelProvider = {
|
|
106
|
+
deleteModelPreferences: async () => {},
|
|
107
|
+
saveModelPreferences: async () => {},
|
|
108
|
+
fetchInstalledModels: async () => [
|
|
109
|
+
{
|
|
110
|
+
ref: 'openai:gpt-4o-2024-11-20',
|
|
111
|
+
integration: 'openai',
|
|
112
|
+
id: 'gpt-4o-2024-11-20',
|
|
113
|
+
name: 'GPT-4o (November 2024)',
|
|
114
|
+
description:
|
|
115
|
+
"GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
|
|
116
|
+
input: {
|
|
117
|
+
costPer1MTokens: 2.5,
|
|
118
|
+
maxTokens: 128000,
|
|
119
|
+
},
|
|
120
|
+
output: {
|
|
121
|
+
costPer1MTokens: 10,
|
|
122
|
+
maxTokens: 16384,
|
|
123
|
+
},
|
|
124
|
+
tags: ['recommended', 'vision', 'general-purpose', 'coding', 'agents', 'function-calling'],
|
|
125
|
+
},
|
|
126
|
+
],
|
|
127
|
+
fetchModelPreferences: async () => ({
|
|
128
|
+
best: ['openai:gpt-4o-2024-11-20'] as const,
|
|
129
|
+
fast: ['openai:gpt-4o-2024-11-20'] as const,
|
|
130
|
+
downtimes: [],
|
|
131
|
+
}),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export const getCognitiveClient = () => {
|
|
135
|
+
const cognitive = new Cognitive({
|
|
136
|
+
client: new Client({
|
|
137
|
+
apiUrl: process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev',
|
|
138
|
+
botId: process.env.CLOUD_BOT_ID,
|
|
139
|
+
token: process.env.CLOUD_PAT,
|
|
140
|
+
}),
|
|
141
|
+
provider: cognitiveProvider,
|
|
142
|
+
})
|
|
143
|
+
return cognitive
|
|
144
|
+
}
|
|
145
|
+
|
|
105
146
|
export const getCachedCognitiveClient = () => {
|
|
106
147
|
const cognitive = new Cognitive({
|
|
107
148
|
client: new CachedClient({
|
|
@@ -109,34 +150,7 @@ export const getCachedCognitiveClient = () => {
|
|
|
109
150
|
botId: process.env.CLOUD_BOT_ID,
|
|
110
151
|
token: process.env.CLOUD_PAT,
|
|
111
152
|
}),
|
|
112
|
-
provider:
|
|
113
|
-
deleteModelPreferences: async () => {},
|
|
114
|
-
saveModelPreferences: async () => {},
|
|
115
|
-
fetchInstalledModels: async () => [
|
|
116
|
-
{
|
|
117
|
-
ref: 'openai:gpt-4o-2024-11-20',
|
|
118
|
-
integration: 'openai',
|
|
119
|
-
id: 'gpt-4o-2024-11-20',
|
|
120
|
-
name: 'GPT-4o (November 2024)',
|
|
121
|
-
description:
|
|
122
|
-
"GPT-4o (“o” for “omni”) is OpenAI's most advanced model. It is multimodal (accepting text or image inputs and outputting text), and it has the same high intelligence as GPT-4 Turbo but is cheaper and more efficient.",
|
|
123
|
-
input: {
|
|
124
|
-
costPer1MTokens: 2.5,
|
|
125
|
-
maxTokens: 128000,
|
|
126
|
-
},
|
|
127
|
-
output: {
|
|
128
|
-
costPer1MTokens: 10,
|
|
129
|
-
maxTokens: 16384,
|
|
130
|
-
},
|
|
131
|
-
tags: ['recommended', 'vision', 'general-purpose', 'coding', 'agents', 'function-calling'],
|
|
132
|
-
},
|
|
133
|
-
],
|
|
134
|
-
fetchModelPreferences: async () => ({
|
|
135
|
-
best: ['openai:gpt-4o-2024-11-20'] as const,
|
|
136
|
-
fast: ['openai:gpt-4o-2024-11-20'] as const,
|
|
137
|
-
downtimes: [],
|
|
138
|
-
}),
|
|
139
|
-
},
|
|
153
|
+
provider: cognitiveProvider,
|
|
140
154
|
})
|
|
141
155
|
return cognitive
|
|
142
156
|
}
|