@botpress/zai 2.0.15 → 2.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -172,16 +172,17 @@ type Options$1 = {
172
172
  instructions?: string;
173
173
  /** The maximum number of tokens per chunk */
174
174
  chunkLength?: number;
175
+ /** Whether to strictly follow the schema or not */
176
+ strict?: boolean;
175
177
  };
176
178
  type __Z<T extends any = any> = {
177
179
  _output: T;
178
180
  };
179
181
  type OfType<O, T extends __Z = __Z<O>> = T extends __Z<O> ? T : never;
180
- type AnyObjectOrArray = Record<string, unknown> | Array<unknown>;
181
182
  declare module '@botpress/zai' {
182
183
  interface Zai {
183
184
  /** Extracts one or many elements from an arbitrary input */
184
- extract<S extends OfType<AnyObjectOrArray>>(input: unknown, schema: S, options?: Options$1): Promise<S['_output']>;
185
+ extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$1): Promise<S['_output']>;
185
186
  }
186
187
  }
187
188
 
@@ -8,7 +8,8 @@ import { PROMPT_INPUT_BUFFER } from "./constants";
8
8
  import { JsonParsingError } from "./errors";
9
9
  const Options = z.object({
10
10
  instructions: z.string().optional().describe("Instructions to guide the user on how to extract the data"),
11
- chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3)
11
+ chunkLength: z.number().min(100).max(1e5).optional().describe("The maximum number of tokens per chunk").default(16e3),
12
+ strict: z.boolean().optional().default(true).describe("Whether to strictly follow the schema or not")
12
13
  });
13
14
  const START = "\u25A0json_start\u25A0";
14
15
  const END = "\u25A0json_end\u25A0";
@@ -22,22 +23,34 @@ Zai.prototype.extract = async function(input, _schema, _options) {
22
23
  const taskType = "zai.extract";
23
24
  const PROMPT_COMPONENT = Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100);
24
25
  let isArrayOfObjects = false;
26
+ let wrappedValue = false;
25
27
  const originalSchema = schema;
26
28
  const baseType = (schema.naked ? schema.naked() : schema)?.constructor?.name ?? "unknown";
27
- if (baseType === "ZodObject") {
28
- } else if (baseType === "ZodArray") {
29
+ if (baseType === "ZodArray") {
30
+ isArrayOfObjects = true;
29
31
  let elementType = schema.element;
30
32
  if (elementType.naked) {
31
33
  elementType = elementType.naked();
32
34
  }
33
35
  if (elementType?.constructor?.name === "ZodObject") {
34
- isArrayOfObjects = true;
35
36
  schema = elementType;
36
37
  } else {
37
- throw new Error("Schema must be a ZodObject or a ZodArray<ZodObject>");
38
+ wrappedValue = true;
39
+ schema = z.object({
40
+ value: elementType
41
+ });
42
+ }
43
+ } else if (baseType !== "ZodObject") {
44
+ wrappedValue = true;
45
+ schema = z.object({
46
+ value: originalSchema
47
+ });
48
+ }
49
+ if (!options.strict) {
50
+ try {
51
+ schema = schema.partial();
52
+ } catch {
38
53
  }
39
- } else {
40
- throw new Error("Schema must be either a ZuiObject or a ZuiArray<ZuiObject>");
41
54
  }
42
55
  const schemaTypescript = schema.toTypescriptType({ declaration: false });
43
56
  const schemaLength = tokenizer.count(schemaTypescript);
@@ -50,8 +63,34 @@ Zai.prototype.extract = async function(input, _schema, _options) {
50
63
  if (tokenizer.count(inputAsString) > options.chunkLength) {
51
64
  const tokens = tokenizer.split(inputAsString);
52
65
  const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(""));
53
- const all = await Promise.all(chunks.map((chunk2) => this.extract(chunk2, originalSchema)));
54
- return this.extract(all, originalSchema, options);
66
+ const all = await Promise.allSettled(
67
+ chunks.map(
68
+ (chunk2) => this.extract(chunk2, originalSchema, {
69
+ ...options,
70
+ strict: false
71
+ // We don't want to fail on strict mode for sub-chunks
72
+ })
73
+ )
74
+ ).then(
75
+ (results) => results.filter((x) => x.status === "fulfilled").map((x) => x.value)
76
+ );
77
+ const rows = all.map((x, idx) => `<part-${idx + 1}>
78
+ ${stringify(x, true)}
79
+ </part-${idx + 1}>`).join("\n");
80
+ return this.extract(
81
+ `
82
+ The result has been split into ${all.length} parts. Recursively merge the result into the final result.
83
+ When merging arrays, take unique values.
84
+ When merging conflictual (but defined) information, take the most reasonable and frequent value.
85
+ Non-defined values are OK and normal. Don't delete fields because of null values. Focus on defined values.
86
+
87
+ Here's the data:
88
+ ${rows}
89
+
90
+ Merge it back into a final result.`.trim(),
91
+ originalSchema,
92
+ options
93
+ );
55
94
  }
56
95
  const instructions = [];
57
96
  if (options.instructions) {
@@ -72,6 +111,9 @@ Zai.prototype.extract = async function(input, _schema, _options) {
72
111
  instructions.push("You may have exactly one element in the input.");
73
112
  instructions.push(`The element must be a JSON object with exactly the format: ${START}${shape}${END}`);
74
113
  }
114
+ if (!options.strict) {
115
+ instructions.push("You may ignore any fields that are not present in the input. All keys are optional.");
116
+ }
75
117
  const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join("\n"));
76
118
  const Key = fastHash(
77
119
  JSON.stringify({
@@ -184,13 +226,20 @@ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
184
226
  }
185
227
  ]
186
228
  });
187
- const answer = output.choices[0]?.content;
188
- const elements = answer.split(START).filter((x) => x.trim().length > 0).map((x) => {
229
+ const answer = output.choices[0]?.content ?? "{}";
230
+ const elements = answer?.split(START).filter((x) => x.trim().length > 0 && x.includes("}")).map((x) => {
189
231
  try {
190
232
  const json = x.slice(0, x.indexOf(END)).trim();
191
233
  const repairedJson = jsonrepair(json);
192
234
  const parsedJson = JSON5.parse(repairedJson);
193
- return schema.parse(parsedJson);
235
+ const safe = schema.safeParse(parsedJson);
236
+ if (safe.success) {
237
+ return safe.data;
238
+ }
239
+ if (options.strict) {
240
+ throw new JsonParsingError(x, safe.error);
241
+ }
242
+ return parsedJson;
194
243
  } catch (error) {
195
244
  throw new JsonParsingError(x, error instanceof Error ? error : new Error("Unknown error"));
196
245
  }
@@ -199,10 +248,17 @@ ${instructions.map((x) => `\u2022 ${x}`).join("\n")}
199
248
  if (isArrayOfObjects) {
200
249
  final = elements;
201
250
  } else if (elements.length === 0) {
202
- final = schema.parse({});
251
+ final = options.strict ? schema.parse({}) : {};
203
252
  } else {
204
253
  final = elements[0];
205
254
  }
255
+ if (wrappedValue) {
256
+ if (Array.isArray(final)) {
257
+ final = final.map((x) => "value" in x ? x.value : x);
258
+ } else {
259
+ final = "value" in final ? final.value : final;
260
+ }
261
+ }
206
262
  if (taskId) {
207
263
  await this.adapter.saveExample({
208
264
  key: Key,