@botpress/zai 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,16 +16,22 @@ const NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
16
16
  Zai.prototype.extract = async function(input, schema, _options) {
17
17
  const options = Options.parse(_options ?? {});
18
18
  const tokenizer = await this.getTokenizer();
19
+ await this.fetchModelDetails();
19
20
  const taskId = this.taskId;
20
21
  const taskType = "zai.extract";
21
- const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
22
+ const PROMPT_COMPONENT = Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
22
23
  let isArrayOfObjects = false;
23
24
  const originalSchema = schema;
24
- if (schema instanceof z.ZodObject) {
25
- } else if (schema instanceof z.ZodArray) {
26
- if (schema._def.type instanceof z.ZodObject) {
25
+ const baseType = (schema.naked ? schema.naked() : schema)?.constructor?.name ?? "unknown";
26
+ if (baseType === "ZodObject") {
27
+ } else if (baseType === "ZodArray") {
28
+ let elementType = schema.element;
29
+ if (elementType.naked) {
30
+ elementType = elementType.naked();
31
+ }
32
+ if (elementType?.constructor?.name === "ZodObject") {
27
33
  isArrayOfObjects = true;
28
- schema = schema._def.type;
34
+ schema = elementType;
29
35
  } else {
30
36
  throw new Error("Schema must be a ZodObject or a ZodArray<ZodObject>");
31
37
  }
@@ -34,7 +40,10 @@ Zai.prototype.extract = async function(input, schema, _options) {
34
40
  }
35
41
  const schemaTypescript = schema.toTypescript({ declaration: false });
36
42
  const schemaLength = tokenizer.count(schemaTypescript);
37
- options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength);
43
+ options.chunkLength = Math.min(
44
+ options.chunkLength,
45
+ this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength
46
+ );
38
47
  const keys = Object.keys(schema.shape);
39
48
  let inputAsString = stringify(input);
40
49
  if (tokenizer.count(inputAsString) > options.chunkLength) {
@@ -160,7 +169,7 @@ ${END}`.trim()
160
169
  EXAMPLES_TOKENS,
161
170
  (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
162
171
  ).map(formatExample).flat();
163
- const output = await this.callModel({
172
+ const { output, meta } = await this.callModel({
164
173
  systemPrompt: `
165
174
  Extract the following information from the input:
166
175
  ${schemaTypescript}
@@ -205,7 +214,18 @@ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
205
214
  instructions: options.instructions ?? "No specific instructions",
206
215
  input: inputAsString,
207
216
  output: final,
208
- metadata: output.metadata
217
+ metadata: {
218
+ cost: {
219
+ input: meta.cost.input,
220
+ output: meta.cost.output
221
+ },
222
+ latency: meta.latency,
223
+ model: this.Model,
224
+ tokens: {
225
+ input: meta.tokens.input,
226
+ output: meta.tokens.output
227
+ }
228
+ }
209
229
  });
210
230
  }
211
231
  return final;
@@ -16,10 +16,11 @@ const END = "\u25A0END\u25A0";
16
16
  Zai.prototype.filter = async function(input, condition, _options) {
17
17
  const options = Options.parse(_options ?? {});
18
18
  const tokenizer = await this.getTokenizer();
19
+ await this.fetchModelDetails();
19
20
  const taskId = this.taskId;
20
21
  const taskType = "zai.filter";
21
22
  const MAX_ITEMS_PER_CHUNK = 50;
22
- const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
23
+ const TOKENS_TOTAL_MAX = this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
23
24
  const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5));
24
25
  const TOKENS_CONDITION_MAX = clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition));
25
26
  const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX;
@@ -121,7 +122,7 @@ ${examples.map((x, idx) => `\u25A0${idx}:${!!x.filter ? "true" : "false"}:${x.re
121
122
  role: "assistant"
122
123
  }
123
124
  ];
124
- const output = await this.callModel({
125
+ const { output, meta } = await this.callModel({
125
126
  systemPrompt: `
126
127
  You are given a list of items. Your task is to filter out the items that meet the condition below.
127
128
  You need to return the full list of items with the format:
@@ -169,7 +170,18 @@ The condition is: "${condition}"
169
170
  input: JSON.stringify(chunk),
170
171
  output: partial,
171
172
  instructions: condition,
172
- metadata: output.metadata
173
+ metadata: {
174
+ cost: {
175
+ input: meta.cost.input,
176
+ output: meta.cost.output
177
+ },
178
+ latency: meta.latency,
179
+ model: this.Model,
180
+ tokens: {
181
+ input: meta.tokens.input,
182
+ output: meta.tokens.output
183
+ }
184
+ }
173
185
  });
174
186
  }
175
187
  return partial;
@@ -59,9 +59,10 @@ Zai.prototype.label = async function(input, _labels, _options) {
59
59
  const options = Options.parse(_options ?? {});
60
60
  const labels = Labels.parse(_labels);
61
61
  const tokenizer = await this.getTokenizer();
62
+ await this.fetchModelDetails();
62
63
  const taskId = this.taskId;
63
64
  const taskType = "zai.label";
64
- const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1e3, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
65
+ const TOTAL_MAX_TOKENS = clamp(options.chunkLength, 1e3, this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER);
65
66
  const CHUNK_EXAMPLES_MAX_TOKENS = clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 1e4);
66
67
  const CHUNK_INPUT_MAX_TOKENS = clamp(
67
68
  TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
@@ -152,7 +153,7 @@ ${END}
152
153
  \u25A0${key}:\u3010explanation (where "explanation" is answering the question "${labels[key]}")\u3011:x\u25A0 (where x is ${ALL_LABELS})
153
154
  `.trim();
154
155
  }).join("\n\n");
155
- const output = await this.callModel({
156
+ const { output, meta } = await this.callModel({
156
157
  stopSequences: [END],
157
158
  systemPrompt: `
158
159
  You need to tag the input with the following labels based on the question asked:
@@ -228,7 +229,18 @@ For example, you can say: "According to Expert Example #1, ..."`.trim()
228
229
  taskType,
229
230
  taskId,
230
231
  instructions: options.instructions ?? "",
231
- metadata: output.metadata,
232
+ metadata: {
233
+ cost: {
234
+ input: meta.cost.input,
235
+ output: meta.cost.output
236
+ },
237
+ latency: meta.latency,
238
+ model: this.Model,
239
+ tokens: {
240
+ input: meta.tokens.input,
241
+ output: meta.tokens.output
242
+ }
243
+ },
232
244
  input: inputAsString,
233
245
  output: final
234
246
  });
@@ -15,15 +15,16 @@ const END = "\u25A0END\u25A0";
15
15
  Zai.prototype.rewrite = async function(original, prompt, _options) {
16
16
  const options = Options.parse(_options ?? {});
17
17
  const tokenizer = await this.getTokenizer();
18
+ await this.fetchModelDetails();
18
19
  const taskId = this.taskId;
19
20
  const taskType = "zai.rewrite";
20
- const INPUT_COMPONENT_SIZE = Math.max(100, (this.Model.input.maxTokens - PROMPT_INPUT_BUFFER) / 2);
21
+ const INPUT_COMPONENT_SIZE = Math.max(100, (this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER) / 2);
21
22
  prompt = tokenizer.truncate(prompt, INPUT_COMPONENT_SIZE);
22
23
  const inputSize = tokenizer.count(original) + tokenizer.count(prompt);
23
- const maxInputSize = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
24
+ const maxInputSize = this.ModelDetails.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
24
25
  if (inputSize > maxInputSize) {
25
26
  throw new Error(
26
- `The input size is ${inputSize} tokens long, which is more than the maximum of ${maxInputSize} tokens for this model (${this.Model.name} = ${this.Model.input.maxTokens} tokens)`
27
+ `The input size is ${inputSize} tokens long, which is more than the maximum of ${maxInputSize} tokens for this model (${this.ModelDetails.name} = ${this.ModelDetails.input.maxTokens} tokens)`
27
28
  );
28
29
  }
29
30
  const instructions = [];
@@ -74,13 +75,13 @@ ${END}
74
75
  ...tableExamples.map((x) => ({ input: x.input, output: x.output })),
75
76
  ...options.examples
76
77
  ];
77
- const REMAINING_TOKENS = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
78
+ const REMAINING_TOKENS = this.ModelDetails.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER;
78
79
  const examples = takeUntilTokens(
79
80
  savedExamples.length ? savedExamples : defaultExamples,
80
81
  REMAINING_TOKENS,
81
82
  (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output))
82
83
  ).map(formatExample).flat();
83
- const output = await this.callModel({
84
+ const { output, meta } = await this.callModel({
84
85
  systemPrompt: `
85
86
  Rewrite the text between the ${START} and ${END} tags to match the user prompt.
86
87
  ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
@@ -99,7 +100,18 @@ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
99
100
  if (taskId) {
100
101
  await this.adapter.saveExample({
101
102
  key: Key,
102
- metadata: output.metadata,
103
+ metadata: {
104
+ cost: {
105
+ input: meta.cost.input,
106
+ output: meta.cost.output
107
+ },
108
+ latency: meta.latency,
109
+ model: this.Model,
110
+ tokens: {
111
+ input: meta.tokens.input,
112
+ output: meta.tokens.output
113
+ }
114
+ },
103
115
  instructions: prompt,
104
116
  input: original,
105
117
  output: result,
@@ -20,16 +20,17 @@ const END = "\u25A0END\u25A0";
20
20
  Zai.prototype.summarize = async function(original, _options) {
21
21
  const options = Options.parse(_options ?? {});
22
22
  const tokenizer = await this.getTokenizer();
23
- const INPUT_COMPONENT_SIZE = Math.max(100, (this.Model.input.maxTokens - PROMPT_INPUT_BUFFER) / 4);
23
+ await this.fetchModelDetails();
24
+ const INPUT_COMPONENT_SIZE = Math.max(100, (this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER) / 4);
24
25
  options.prompt = tokenizer.truncate(options.prompt, INPUT_COMPONENT_SIZE);
25
26
  options.format = tokenizer.truncate(options.format, INPUT_COMPONENT_SIZE);
26
- const maxOutputSize = this.Model.output.maxTokens - PROMPT_OUTPUT_BUFFER;
27
+ const maxOutputSize = this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER;
27
28
  if (options.length > maxOutputSize) {
28
29
  throw new Error(
29
- `The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${this.Model.output.maxTokens} tokens for this model (${this.Model.name})`
30
+ `The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${this.ModelDetails.output.maxTokens} tokens for this model (${this.ModelDetails.name})`
30
31
  );
31
32
  }
32
- options.sliding.window = Math.min(options.sliding.window, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER);
33
+ options.sliding.window = Math.min(options.sliding.window, this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER);
33
34
  options.sliding.overlap = Math.min(options.sliding.overlap, options.sliding.window - 3 * options.sliding.overlap);
34
35
  const format = (summary, newText) => {
35
36
  return `
@@ -102,7 +103,7 @@ ${newText}
102
103
  );
103
104
  }
104
105
  }
105
- const output = await this.callModel({
106
+ const { output } = await this.callModel({
106
107
  systemPrompt: `
107
108
  You are summarizing a text. The text is split into ${parts} parts, and you are currently working on part ${iteration}.
108
109
  At every step, you will receive the current summary and a new part of the text. You need to amend the summary to include the new information (if needed).
@@ -8,9 +8,10 @@ const Options = z.object({
8
8
  Zai.prototype.text = async function(prompt, _options) {
9
9
  const options = Options.parse(_options ?? {});
10
10
  const tokenizer = await this.getTokenizer();
11
- prompt = tokenizer.truncate(prompt, Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100));
11
+ await this.fetchModelDetails();
12
+ prompt = tokenizer.truncate(prompt, Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100));
12
13
  if (options.length) {
13
- options.length = Math.min(this.Model.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length);
14
+ options.length = Math.min(this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length);
14
15
  }
15
16
  const instructions = [];
16
17
  let chart = "";
@@ -32,7 +33,7 @@ Zai.prototype.text = async function(prompt, _options) {
32
33
  | 200-300 tokens| A medium paragraph (150-200 words) |
33
34
  | 300-500 tokens| A long paragraph (200-300 words) |`.trim();
34
35
  }
35
- const output = await this.callModel({
36
+ const { output } = await this.callModel({
36
37
  systemPrompt: `
37
38
  Generate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.
38
39
  ${instructions.map((x) => `- ${x}`).join("\n")}
package/dist/utils.js CHANGED
@@ -2,12 +2,6 @@ import { z } from "@bpinternal/zui";
2
2
  export const stringify = (input, beautify = true) => {
3
3
  return typeof input === "string" && !!input.length ? input : input ? JSON.stringify(input, beautify ? null : void 0, beautify ? 2 : void 0) : "<input is null, false, undefined or empty>";
4
4
  };
5
- export const BotpressClient = z.custom(
6
- (value) => typeof value === "object" && value !== null && "callAction" in value && typeof value.callAction === "function",
7
- {
8
- message: "Invalid Botpress Client. Make sure to pass an instance of @botpress/client"
9
- }
10
- );
11
5
  export function fastHash(str) {
12
6
  let hash = 0;
13
7
  for (let i = 0; i < str.length; i++) {
package/dist/zai.js CHANGED
@@ -1,9 +1,8 @@
1
+ import { Cognitive } from "@botpress/cognitive";
2
+ import { getWasmTokenizer } from "@bpinternal/thicktoken";
1
3
  import { z } from "@bpinternal/zui";
2
- import { getWasmTokenizer } from "@botpress/wasm";
3
4
  import { TableAdapter } from "./adapters/botpress-table";
4
5
  import { MemoryAdapter } from "./adapters/memory";
5
- import { Models } from "./models";
6
- import { BotpressClient } from "./utils";
7
6
  const ActiveLearning = z.object({
8
7
  enable: z.boolean().describe("Whether to enable active learning").default(false),
9
8
  tableName: z.string().regex(
@@ -16,12 +15,14 @@ const ActiveLearning = z.object({
16
15
  ).describe("The ID of the task").default("default")
17
16
  });
18
17
  const ZaiConfig = z.object({
19
- client: BotpressClient,
18
+ client: z.custom(),
20
19
  userId: z.string().describe("The ID of the user consuming the API").optional(),
21
- retry: z.object({ maxRetries: z.number().min(0).max(100) }).default({ maxRetries: 3 }),
22
20
  modelId: z.custom(
23
21
  (value) => {
24
- if (typeof value !== "string" || !value.includes("__")) {
22
+ if (typeof value !== "string") {
23
+ return false;
24
+ }
25
+ if (value !== "best" && value !== "fast" && !value.includes(":")) {
25
26
  return false;
26
27
  }
27
28
  return true;
@@ -29,7 +30,7 @@ const ZaiConfig = z.object({
29
30
  {
30
31
  message: "Invalid model ID"
31
32
  }
32
- ).describe("The ID of the model you want to use").default("openai__gpt-4o-mini-2024-07-18"),
33
+ ).describe("The ID of the model you want to use").default("best"),
33
34
  activeLearning: ActiveLearning.default({ enable: false }),
34
35
  namespace: z.string().regex(
35
36
  /^[A-Za-z0-9_/-]{1,100}$/,
@@ -39,76 +40,30 @@ const ZaiConfig = z.object({
39
40
  export class Zai {
40
41
  static tokenizer = null;
41
42
  client;
42
- originalConfig;
43
- userId;
44
- integration;
45
- model;
46
- retry;
43
+ _originalConfig;
44
+ _userId;
47
45
  Model;
46
+ ModelDetails;
48
47
  namespace;
49
48
  adapter;
50
49
  activeLearning;
51
50
  constructor(config) {
52
- this.originalConfig = config;
51
+ this._originalConfig = config;
53
52
  const parsed = ZaiConfig.parse(config);
54
- this.client = parsed.client;
55
- const [integration, modelId] = parsed.modelId.split("__");
56
- if (!integration?.length || !modelId?.length) {
57
- throw new Error(`Invalid model ID: ${parsed.modelId}. Expected format: <integration>__<modelId>`);
58
- }
59
- this.integration = integration;
60
- this.model = modelId;
53
+ this.client = Cognitive.isCognitiveClient(parsed.client) ? parsed.client : new Cognitive({ client: parsed.client });
61
54
  this.namespace = parsed.namespace;
62
- this.userId = parsed.userId;
63
- this.retry = parsed.retry;
64
- this.Model = Models.find((m) => m.id === parsed.modelId);
55
+ this._userId = parsed.userId;
56
+ this.Model = parsed.modelId;
65
57
  this.activeLearning = parsed.activeLearning;
66
- this.adapter = parsed.activeLearning?.enable ? new TableAdapter({ client: this.client, tableName: parsed.activeLearning.tableName }) : new MemoryAdapter([]);
58
+ this.adapter = parsed.activeLearning?.enable ? new TableAdapter({ client: this.client.client, tableName: parsed.activeLearning.tableName }) : new MemoryAdapter([]);
67
59
  }
68
60
  /** @internal */
69
61
  async callModel(props) {
70
- let retries = this.retry.maxRetries;
71
- while (retries-- >= 0) {
72
- try {
73
- return await this._callModel(props);
74
- } catch (e) {
75
- if (retries >= 0) {
76
- await new Promise((resolve) => setTimeout(resolve, 1e3));
77
- } else {
78
- throw new Error("Failed to call model after multiple retries");
79
- }
80
- }
81
- }
82
- throw new Error("Failed to call model after multiple retries");
83
- }
84
- /** @internal */
85
- async _callModel(props) {
86
- let retries = this.retry.maxRetries;
87
- do {
88
- const start = Date.now();
89
- const input = {
90
- messages: [],
91
- temperature: 0,
92
- topP: 1,
93
- model: { id: this.model },
94
- userId: this.userId,
95
- ...props
96
- };
97
- const { output } = await this.client.callAction({
98
- type: `${this.integration}:generateContent`,
99
- input
100
- });
101
- const latency = Date.now() - start;
102
- return {
103
- ...output,
104
- metadata: {
105
- model: this.model,
106
- latency,
107
- cost: { input: output.usage.inputCost, output: output.usage.outputCost },
108
- tokens: { input: output.usage.inputTokens, output: output.usage.outputTokens }
109
- }
110
- };
111
- } while (--retries > 0);
62
+ return this.client.generateContent({
63
+ ...props,
64
+ model: this.Model,
65
+ userId: this._userId
66
+ });
112
67
  }
113
68
  async getTokenizer() {
114
69
  Zai.tokenizer ??= await (async () => {
@@ -119,6 +74,11 @@ export class Zai {
119
74
  })();
120
75
  return Zai.tokenizer;
121
76
  }
77
+ async fetchModelDetails() {
78
+ if (!this.ModelDetails) {
79
+ this.ModelDetails = await this.client.getModelDetails(this.Model);
80
+ }
81
+ }
122
82
  get taskId() {
123
83
  if (!this.activeLearning.enable) {
124
84
  return void 0;
@@ -127,13 +87,13 @@ export class Zai {
127
87
  }
128
88
  with(options) {
129
89
  return new Zai({
130
- ...this.originalConfig,
90
+ ...this._originalConfig,
131
91
  ...options
132
92
  });
133
93
  }
134
94
  learn(taskId) {
135
95
  return new Zai({
136
- ...this.originalConfig,
96
+ ...this._originalConfig,
137
97
  activeLearning: { ...this.activeLearning, taskId, enable: true }
138
98
  });
139
99
  }