npm - @botpress/zai - Versions diffs - 2.1.19 → 2.2.0 - Mend

@botpress/zai 2.1.19 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CLAUDE.md +696 -0
package/README.md +28 -2
package/dist/index.d.ts +39 -18
package/dist/index.js +1 -0
package/dist/operations/errors.js +112 -8
package/dist/operations/extract.js +20 -12
package/dist/operations/filter.js +3 -1
package/dist/operations/group.js +278 -0
package/dist/operations/label.js +3 -1
package/dist/operations/summarize.js +3 -1
package/e2e/data/cache.jsonl +219 -0
package/package.json +4 -3
package/src/index.ts +1 -0
package/src/operations/errors.ts +96 -1
package/src/operations/extract.ts +21 -11
package/src/operations/filter.ts +3 -1
package/src/operations/group.ts +421 -0
package/src/operations/label.ts +3 -1
package/src/operations/summarize.ts +3 -2
package/src/zai.ts +7 -9

package/README.md CHANGED Viewed

@@ -104,7 +104,32 @@ const techCompanies = await zai.filter(companies, 'are technology companies')
 const recentPosts = await zai.filter(posts, 'were published this week')
 ```
-### 6. Text - Generate content
+### 6. Group - Organize items into categories
+```typescript
+// Group items automatically
+const grouped = await zai.group(tasks, {
+  instructions: 'Group by priority level',
+})
+// Result: { 'High Priority': [...], 'Medium Priority': [...], 'Low Priority': [...] }
+// Group with initial categories
+const categorized = await zai.group(emails, {
+  instructions: 'Group by topic',
+  initialGroups: [
+    { id: 'work', label: 'Work' },
+    { id: 'personal', label: 'Personal' },
+  ],
+})
+// Group large datasets efficiently
+const organized = await zai.group(largeArray, {
+  instructions: 'Group by date',
+  chunkLength: 8000, // Process in chunks for better performance
+})
+```
+### 7. Text - Generate content
 ```typescript
 const blogPost = await zai.text('Write about the future of AI', {
@@ -113,7 +138,7 @@ const blogPost = await zai.text('Write about the future of AI', {
 })
 ```
-### 7. Summarize - Create summaries
+### 8. Summarize - Create summaries
 ```typescript
 // Simple summary
@@ -237,6 +262,7 @@ setTimeout(() => controller.abort(), 5000)
 - `.label(content, criteria, options?)` - Apply multiple labels
 - `.rewrite(content, instruction, options?)` - Transform text
 - `.filter(items, condition, options?)` - Filter array items
+- `.group(items, options?)` - Organize items into categories
 - `.text(prompt, options?)` - Generate text
 - `.summarize(content, options?)` - Create summary

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { Cognitive, Model, BotpressClientLike, GenerateContentInput, GenerateContentOutput } from '@botpress/cognitive';
+import { Cognitive, Models, Model, BotpressClientLike, GenerateContentInput, GenerateContentOutput } from '@botpress/cognitive';
 import { TextTokenizer } from '@bpinternal/thicktoken';
 type GenerationMetadata = {
@@ -41,7 +41,6 @@ declare abstract class Adapter {
     abstract saveExample<TInput, TOutput>(props: SaveExampleProps<TInput, TOutput>): Promise<void>;
 }
-type ModelId = Required<Parameters<Cognitive['generateContent']>[0]['model']>;
 type ActiveLearning = {
     enable: boolean;
     tableName: string;
@@ -50,7 +49,7 @@ type ActiveLearning = {
 type ZaiConfig = {
     client: BotpressClientLike | Cognitive;
     userId?: string;
-    modelId?: ModelId | string;
+    modelId?: Models;
     activeLearning?: ActiveLearning;
     namespace?: string;
 };
@@ -59,7 +58,7 @@ declare class Zai {
     protected client: Cognitive;
     private _originalConfig;
     private _userId;
-    protected Model: ModelId;
+    protected Model: Models;
     protected ModelDetails: Model;
     protected namespace: string;
     protected adapter: Adapter;
@@ -167,14 +166,14 @@ declare class Response<T = any, S = T> implements PromiseLike<S> {
     }>;
 }
-type Options$6 = {
+type Options$7 = {
     /** The maximum number of tokens to generate */
     length?: number;
 };
 declare module '@botpress/zai' {
     interface Zai {
         /** Generates a text of the desired length according to the prompt */
-        text(prompt: string, options?: Options$6): Response<string>;
+        text(prompt: string, options?: Options$7): Response<string>;
     }
 }
@@ -183,7 +182,7 @@ type Example$3 = {
     output: string;
     instructions?: string;
 };
-type Options$5 = {
+type Options$6 = {
     /** Examples to guide the rewriting */
     examples?: Array<Example$3>;
     /** The maximum number of tokens to generate */
@@ -192,11 +191,11 @@ type Options$5 = {
 declare module '@botpress/zai' {
     interface Zai {
         /** Rewrites a string according to match the prompt */
-        rewrite(original: string, prompt: string, options?: Options$5): Response<string>;
+        rewrite(original: string, prompt: string, options?: Options$6): Response<string>;
     }
 }
-type Options$4 = {
+type Options$5 = {
     /** What should the text be summarized to? */
     prompt?: string;
     /** How to format the example text */
@@ -216,7 +215,7 @@ type Options$4 = {
 declare module '@botpress/zai' {
     interface Zai {
         /** Summarizes a text of any length to a summary of the desired length */
-        summarize(original: string, options?: Options$4): Response<string>;
+        summarize(original: string, options?: Options$5): Response<string>;
     }
 }
@@ -226,14 +225,14 @@ type Example$2 = {
     reason?: string;
     condition?: string;
 };
-type Options$3 = {
+type Options$4 = {
     /** Examples to check the condition against */
     examples?: Array<Example$2>;
 };
 declare module '@botpress/zai' {
     interface Zai {
         /** Checks wether a condition is true or not */
-        check(input: unknown, condition: string, options?: Options$3): Response<{
+        check(input: unknown, condition: string, options?: Options$4): Response<{
             /** Whether the condition is true or not */
             value: boolean;
             /** The explanation of the decision */
@@ -247,7 +246,7 @@ type Example$1 = {
     filter: boolean;
     reason?: string;
 };
-type Options$2 = {
+type Options$3 = {
     /** The maximum number of tokens per item */
     tokensPerItem?: number;
     /** Examples to filter the condition against */
@@ -256,11 +255,11 @@ type Options$2 = {
 declare module '@botpress/zai' {
     interface Zai {
         /** Filters elements of an array against a condition */
-        filter<T>(input: Array<T>, condition: string, options?: Options$2): Response<Array<T>>;
+        filter<T>(input: Array<T>, condition: string, options?: Options$3): Response<Array<T>>;
     }
 }
-type Options$1 = {
+type Options$2 = {
     /** Instructions to guide the user on how to extract the data */
     instructions?: string;
     /** The maximum number of tokens per chunk */
@@ -275,7 +274,7 @@ type OfType<O, T extends __Z = __Z<O>> = T extends __Z<O> ? T : never;
 declare module '@botpress/zai' {
     interface Zai {
         /** Extracts one or many elements from an arbitrary input */
-        extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$1): Response<S['_output']>;
+        extract<S extends OfType<any>>(input: unknown, schema: S, options?: Options$2): Response<S['_output']>;
     }
 }
@@ -294,7 +293,7 @@ type Example<T extends string> = {
         explanation?: string;
     }>>;
 };
-type Options<T extends string> = {
+type Options$1<T extends string> = {
     /** Examples to help the user make a decision */
     examples?: Array<Example<T>>;
     /** Instructions to guide the user on how to extract the data */
@@ -306,7 +305,7 @@ type Labels<T extends string> = Record<T, string>;
 declare module '@botpress/zai' {
     interface Zai {
         /** Tags the provided input with a list of predefined labels */
-        label<T extends string>(input: unknown, labels: Labels<T>, options?: Options<T>): Response<{
+        label<T extends string>(input: unknown, labels: Labels<T>, options?: Options$1<T>): Response<{
             [K in T]: {
                 explanation: string;
                 value: boolean;
@@ -318,4 +317,26 @@ declare module '@botpress/zai' {
     }
 }
+type Group<T> = {
+    id: string;
+    label: string;
+    elements: T[];
+};
+type InitialGroup = {
+    id: string;
+    label: string;
+    elements?: unknown[];
+};
+type Options = {
+    instructions?: string;
+    tokensPerElement?: number;
+    chunkLength?: number;
+    initialGroups?: Array<InitialGroup>;
+};
+declare module '@botpress/zai' {
+    interface Zai {
+        group<T>(input: Array<T>, options?: Options): Response<Array<Group<T>>, Record<string, T[]>>;
+    }
+}
 export { Zai };

package/dist/index.js CHANGED Viewed

@@ -6,4 +6,5 @@ import "./operations/check";
 import "./operations/filter";
 import "./operations/extract";
 import "./operations/label";
+import "./operations/group";
 export { Zai };

package/dist/operations/errors.js CHANGED Viewed

@@ -1,15 +1,119 @@
+import { ZodError } from "@bpinternal/zui";
 export class JsonParsingError extends Error {
   constructor(json, error) {
-    const message = `Error parsing JSON:
----JSON---
-${json}
----Error---
- ${error}`;
+    const message = JsonParsingError._formatError(json, error);
     super(message);
     this.json = json;
     this.error = error;
   }
+  static _formatError(json, error) {
+    let errorMessage = "Error parsing JSON:\n\n";
+    errorMessage += `---JSON---
+${json}
+`;
+    if (error instanceof ZodError) {
+      errorMessage += "---Validation Errors---\n\n";
+      errorMessage += JsonParsingError._formatZodError(error);
+    } else {
+      errorMessage += "---Error---\n\n";
+      errorMessage += "The JSON provided is not valid JSON.\n";
+      errorMessage += `Details: ${error.message}
+`;
+    }
+    return errorMessage;
+  }
+  static _formatZodError(zodError) {
+    const issues = zodError.issues;
+    if (issues.length === 0) {
+      return "Unknown validation error\n";
+    }
+    let message = "";
+    for (let i = 0; i < issues.length; i++) {
+      const issue = issues[i];
+      const path = issue.path.length > 0 ? issue.path.join(".") : "root";
+      message += `${i + 1}. Field: "${path}"
+`;
+      switch (issue.code) {
+        case "invalid_type":
+          message += `   Problem: Expected ${issue.expected}, but received ${issue.received}
+`;
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "invalid_string":
+          if ("validation" in issue) {
+            message += `   Problem: Invalid ${issue.validation} format
+`;
+          }
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "too_small":
+          if (issue.type === "string") {
+            if (issue.exact) {
+              message += `   Problem: String must be exactly ${issue.minimum} characters
+`;
+            } else {
+              message += `   Problem: String must be at least ${issue.minimum} characters
+`;
+            }
+          } else if (issue.type === "number") {
+            message += `   Problem: Number must be ${issue.inclusive ? "at least" : "greater than"} ${issue.minimum}
+`;
+          } else if (issue.type === "array") {
+            message += `   Problem: Array must contain ${issue.inclusive ? "at least" : "more than"} ${issue.minimum} items
+`;
+          }
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "too_big":
+          if (issue.type === "string") {
+            if (issue.exact) {
+              message += `   Problem: String must be exactly ${issue.maximum} characters
+`;
+            } else {
+              message += `   Problem: String must be at most ${issue.maximum} characters
+`;
+            }
+          } else if (issue.type === "number") {
+            message += `   Problem: Number must be ${issue.inclusive ? "at most" : "less than"} ${issue.maximum}
+`;
+          } else if (issue.type === "array") {
+            message += `   Problem: Array must contain ${issue.inclusive ? "at most" : "fewer than"} ${issue.maximum} items
+`;
+          }
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "invalid_enum_value":
+          message += `   Problem: Invalid value "${issue.received}"
+`;
+          message += `   Allowed values: ${issue.options.map((o) => `"${o}"`).join(", ")}
+`;
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "invalid_literal":
+          message += `   Problem: Expected the literal value "${issue.expected}", but received "${issue.received}"
+`;
+          message += `   Message: ${issue.message}
+`;
+          break;
+        case "invalid_union":
+          message += "   Problem: Value doesn't match any of the expected formats\n";
+          message += `   Message: ${issue.message}
+`;
+          break;
+        default:
+          message += `   Problem: ${issue.message}
+`;
+      }
+      if (i < issues.length - 1) {
+        message += "\n";
+      }
+    }
+    return message;
+  }
 }

package/dist/operations/extract.js CHANGED Viewed

@@ -1,7 +1,8 @@
-import { z } from "@bpinternal/zui";
+import { z, transforms } from "@bpinternal/zui";
 import JSON5 from "json5";
 import { jsonrepair } from "jsonrepair";
 import { chunk, isArray } from "lodash-es";
+import pLimit from "p-limit";
 import { ZaiContext } from "../context";
 import { Response } from "../response";
 import { getTokenizer } from "../tokenizer";
@@ -17,9 +18,10 @@ const Options = z.object({
 const START = "\u25A0json_start\u25A0";
 const END = "\u25A0json_end\u25A0";
 const NO_MORE = "\u25A0NO_MORE_ELEMENT\u25A0";
+const ZERO_ELEMENTS = "\u25A0ZERO_ELEMENTS\u25A0";
 const extract = async (input, _schema, _options, ctx) => {
   ctx.controller.signal.throwIfAborted();
-  let schema = _schema;
+  let schema = transforms.fromJSONSchema(transforms.toJSONSchema(_schema));
   const options = Options.parse(_options ?? {});
   const tokenizer = await getTokenizer();
   const model = await ctx.getModel();
@@ -62,19 +64,22 @@ const extract = async (input, _schema, _options, ctx) => {
   const keys = Object.keys(schema.shape);
   const inputAsString = stringify(input);
   if (tokenizer.count(inputAsString) > options.chunkLength) {
+    const limit = pLimit(10);
     const tokens = tokenizer.split(inputAsString);
     const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(""));
     const all = await Promise.allSettled(
       chunks.map(
-        (chunk2) => extract(
-          chunk2,
-          originalSchema,
-          {
-            ...options,
-            strict: false
-            // We don't want to fail on strict mode for sub-chunks
-          },
-          ctx
+        (chunk2) => limit(
+          () => extract(
+            chunk2,
+            originalSchema,
+            {
+              ...options,
+              strict: false
+              // We don't want to fail on strict mode for sub-chunks
+            },
+            ctx
+          )
         )
       )
     ).then(
@@ -110,8 +115,11 @@ Merge it back into a final result.`.trim(),
     instructions.push("You may have multiple elements, or zero elements in the input.");
     instructions.push("You must extract each element separately.");
     instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`);
+    instructions.push(`If there are no elements to extract, respond with ${ZERO_ELEMENTS}.`);
     instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`);
-    instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`);
+    instructions.push(
+      `For example, if you have zero elements, the output should look like this: ${ZERO_ELEMENTS}${NO_MORE}`
+    );
     instructions.push(
       `For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
     );

package/dist/operations/filter.js CHANGED Viewed

@@ -1,5 +1,6 @@
 import { z } from "@bpinternal/zui";
 import { clamp } from "lodash-es";
+import pLimit from "p-limit";
 import { ZaiContext } from "../context";
 import { Response } from "../response";
 import { getTokenizer } from "../tokenizer";
@@ -191,7 +192,8 @@ The condition is: "${condition}"
     }
     return partial;
   };
-  const filteredChunks = await Promise.all(chunks.map(filterChunk));
+  const limit = pLimit(10);
+  const filteredChunks = await Promise.all(chunks.map((chunk) => limit(() => filterChunk(chunk))));
   return filteredChunks.flat();
 };
 Zai.prototype.filter = function(input, condition, _options) {