npm - @botpress/zai - Versions diffs - 1.0.1 → 1.2.0 - Mend

@botpress/zai 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/README.md +1 -1
package/build.ts +9 -0
package/dist/adapters/adapter.js +2 -0
package/dist/adapters/botpress-table.js +168 -0
package/dist/adapters/memory.js +12 -0
package/dist/index.d.ts +111 -609
package/dist/index.js +9 -1873
package/dist/operations/check.js +153 -0
package/dist/operations/constants.js +2 -0
package/dist/operations/errors.js +15 -0
package/dist/operations/extract.js +232 -0
package/dist/operations/filter.js +191 -0
package/dist/operations/label.js +249 -0
package/dist/operations/rewrite.js +123 -0
package/dist/operations/summarize.js +133 -0
package/dist/operations/text.js +47 -0
package/dist/utils.js +37 -0
package/dist/zai.js +100 -0
package/e2e/data/botpress_docs.txt +26040 -0
package/e2e/data/cache.jsonl +107 -0
package/e2e/utils.ts +89 -0
package/package.json +33 -29
package/src/adapters/adapter.ts +35 -0
package/src/adapters/botpress-table.ts +210 -0
package/src/adapters/memory.ts +13 -0
package/src/index.ts +11 -0
package/src/operations/check.ts +201 -0
package/src/operations/constants.ts +2 -0
package/src/operations/errors.ts +9 -0
package/src/operations/extract.ts +309 -0
package/src/operations/filter.ts +244 -0
package/src/operations/label.ts +345 -0
package/src/operations/rewrite.ts +161 -0
package/src/operations/summarize.ts +195 -0
package/src/operations/text.ts +65 -0
package/src/utils.ts +52 -0
package/src/zai.ts +147 -0
package/tsconfig.json +3 -23
package/dist/index.cjs +0 -1903
package/dist/index.cjs.map +0 -1
package/dist/index.d.cts +0 -916
package/dist/index.js.map +0 -1
package/tsup.config.ts +0 -16
package/vitest.config.ts +0 -9
package/vitest.setup.ts +0 -24

package/e2e/utils.ts ADDED Viewed

@@ -0,0 +1,89 @@
+import { Client } from '@botpress/client'
+import { type TextTokenizer, getWasmTokenizer } from '@bpinternal/thicktoken'
+import fs from 'node:fs'
+import path from 'node:path'
+import { beforeAll } from 'vitest'
+import { Zai } from '../src'
+import { fastHash } from '../src/utils'
+const DATA_PATH = path.join(__dirname, 'data')
+const CACHE_PATH = path.join(DATA_PATH, 'cache.jsonl')
+const DOC_PATH = path.join(DATA_PATH, 'botpress_docs.txt')
+export const getClient = () => {
+  return new Client({
+    apiUrl: process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev',
+    botId: process.env.CLOUD_BOT_ID,
+    token: process.env.CLOUD_PAT,
+  })
+}
+function readJSONL<T>(filePath: string, keyProperty: keyof T): Map<string, T> {
+  const lines = fs.readFileSync(filePath, 'utf-8').split(/\r?\n/).filter(Boolean)
+  const map = new Map<string, T>()
+  for (const line of lines) {
+    const obj = JSON.parse(line) as T
+    const key = String(obj[keyProperty])
+    map.set(key, obj)
+  }
+  return map
+}
+const cache: Map<string, { key: string; value: any }> = readJSONL(CACHE_PATH, 'key')
+export const getCachedClient = () => {
+  const client = getClient()
+  const proxy = new Proxy(client, {
+    get(target, prop) {
+      if (prop === 'callAction') {
+        return async (...args: Parameters<Client['callAction']>) => {
+          const key = fastHash(JSON.stringify(args))
+          const cached = cache.get(key)
+          if (cached) {
+            return cached.value
+          }
+          const response = await target.callAction(...args)
+          cache.set(key, { key, value: response })
+          fs.appendFileSync(
+            CACHE_PATH,
+            JSON.stringify({
+              key,
+              value: response,
+            }) + '\n'
+          )
+          return response
+        }
+      }
+      return Reflect.get(target, prop)
+    },
+  })
+  ;(proxy as any).clone = () => {
+    return getCachedClient()
+  }
+  return proxy
+}
+export const getZai = () => {
+  const client = getCachedClient()
+  return new Zai({ client })
+}
+export let tokenizer: TextTokenizer = null!
+beforeAll(async () => {
+  tokenizer = (await getWasmTokenizer()) as TextTokenizer
+})
+export const BotpressDocumentation = fs.readFileSync(DOC_PATH, 'utf-8').trim()
+export const metadata = { cost: { input: 1, output: 1 }, latency: 0, model: '', tokens: { input: 1, output: 1 } }

package/package.json CHANGED Viewed

@@ -1,46 +1,50 @@
 {
   "name": "@botpress/zai",
-  "version": "1.0.1",
-  "type": "module",
-  "private": false,
   "description": "Zui AI (zai) – An LLM utility library written on top of Zui and the Botpress API",
+  "version": "1.2.0",
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
   "exports": {
-    ".": {
-      "types": "./dist/index.d.ts",
-      "import": "./dist/index.js",
-      "require": "./dist/index.cjs"
-    }
+    "types": "./dist/index.d.ts",
+    "require": "./dist/index.js",
+    "import": "./dist/index.js"
   },
   "scripts": {
-    "build": "tsup",
+    "check:type": "tsc --noEmit",
+    "build": "bp add -y && pnpm run build:types && pnpm run build:neutral",
+    "build:neutral": "ts-node -T ./build.ts",
+    "build:types": "tsup",
     "watch": "tsup --watch",
-    "test": "vitest run --config vitest.config.ts",
-    "test:update": "vitest -u run --config vitest.config.ts",
-    "test:watch": "vitest --config vitest.config.ts",
-    "build-with-latest-models": "pnpm run update-types && pnpm run update-models && pnpm run build",
-    "update-models": "ts-node ./src/scripts/update-models.ts",
-    "update-types": "ts-node ./src/scripts/update-types.ts"
+    "test:e2e": "vitest run --config vitest.config.ts",
+    "test:e2e:update": "vitest -u run --config vitest.config.ts",
+    "test:e2e:watch": "vitest --config vitest.config.ts"
   },
   "keywords": [],
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "json5": "^2.2.1",
-    "jsonrepair": "^3.2.0"
+    "@botpress/cognitive": "^0.1.6",
+    "json5": "^2.2.3",
+    "jsonrepair": "^3.10.0",
+    "lodash-es": "^4.17.21"
   },
   "devDependencies": {
-    "@botpress/vai": "0.0.1-beta.7",
-    "@types/lodash": "^4.17.0",
-    "dotenv": "^16.3.1",
-    "ts-node": "^10.9.2",
-    "tsup": "^8.3.5",
-    "typescript": "^5.7.2",
-    "vitest": "^2.0.5"
+    "@botpress/client": "workspace:^",
+    "@botpress/common": "workspace:*",
+    "@botpress/vai": "workspace:*",
+    "@types/lodash-es": "^4.17.12",
+    "dotenv": "^16.4.4",
+    "esbuild": "^0.16.12",
+    "glob": "^9.3.4",
+    "lodash": "^4.17.21",
+    "tsup": "^8.0.2"
   },
   "peerDependencies": {
-    "@botpress/client": "^0.36.2",
-    "@botpress/sdk": "^1.6.1",
-    "@botpress/wasm": "^1.0.0",
-    "lodash": "^4.17.21"
-  }
+    "@bpinternal/thicktoken": "^1.0.0",
+    "@bpinternal/zui": "^0.17.1"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "packageManager": "pnpm@8.6.2"
 }

package/src/adapters/adapter.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import { GenerationMetadata } from '../utils'
+export type SaveExampleProps<TInput, TOutput> = {
+  key: string
+  taskType: string
+  taskId: string
+  instructions: string
+  input: TInput
+  output: TOutput
+  explanation?: string
+  metadata: GenerationMetadata
+  status?: 'pending' | 'approved'
+}
+export type GetExamplesProps<TInput> = {
+  taskType: string
+  taskId: string
+  input: TInput
+}
+export abstract class Adapter {
+  public abstract getExamples<TInput, TOutput>(
+    props: GetExamplesProps<TInput>
+  ): Promise<
+    Array<{
+      key: string
+      input: TInput
+      output: TOutput
+      explanation?: string
+      similarity: number
+    }>
+  >
+  public abstract saveExample<TInput, TOutput>(props: SaveExampleProps<TInput, TOutput>): Promise<void>
+}

package/src/adapters/botpress-table.ts ADDED Viewed

@@ -0,0 +1,210 @@
+import { type Client } from '@botpress/client'
+import { z } from '@bpinternal/zui'
+import { GenerationMetadata } from '../utils'
+import { Adapter, GetExamplesProps, SaveExampleProps } from './adapter'
+const CRITICAL_TAGS = {
+  system: 'true',
+  'schema-purpose': 'active-learning',
+  'schema-version': 'Oct-2024',
+} as const
+const OPTIONAL_TAGS = {
+  'x-studio-title': 'Active Learning',
+  'x-studio-description': 'Table for storing active learning tasks and examples',
+  'x-studio-readonly': 'true',
+  'x-studio-icon': 'lucide://atom',
+  'x-studio-color': 'green',
+} as const
+const FACTOR = 30
+const Props = z.object({
+  client: z.custom(() => true),
+  tableName: z
+    .string()
+    .regex(
+      /^[a-zA-Z0-9_]{1,45}Table$/,
+      'Table name must be lowercase and contain only letters, numbers and underscores'
+    ),
+})
+export type TableSchema = z.input<typeof TableSchema>
+const TableSchema = z.object({
+  taskType: z.string().describe('The type of the task (filter, extract, etc.)'),
+  taskId: z.string(),
+  key: z.string().describe('A unique key for the task (e.g. a hash of the input, taskId, taskType and instructions)'),
+  instructions: z.string(),
+  input: z.object({}).passthrough().describe('The input to the task'),
+  output: z.object({}).passthrough().describe('The expected output'),
+  explanation: z.string().nullable(),
+  metadata: GenerationMetadata,
+  status: z.enum(['pending', 'rejected', 'approved']),
+  feedback: z
+    .object({
+      rating: z.enum(['very-bad', 'bad', 'good', 'very-good']),
+      comment: z.string().nullable(),
+    })
+    .nullable()
+    .default(null),
+})
+const searchableColumns = ['input'] as const satisfies Array<keyof typeof TableSchema.shape> as string[]
+const TableJsonSchema = Object.entries(TableSchema.shape).reduce((acc, [key, value]) => {
+  acc[key] = value.toJsonSchema()
+  acc[key]['x-zui'] ??= {}
+  acc[key]['x-zui'].searchable = searchableColumns.includes(key)
+  return acc
+}, {})
+export class TableAdapter extends Adapter {
+  private _client: Client
+  private _tableName: string
+  private _status: 'initialized' | 'ready' | 'error'
+  public constructor(props: z.input<typeof Props>) {
+    super()
+    props = Props.parse(props)
+    this._client = props.client
+    this._tableName = props.tableName
+    this._status = 'ready'
+  }
+  public async getExamples<TInput, TOutput>({ taskType, taskId, input }: GetExamplesProps<TInput>) {
+    await this._assertTableExists()
+    const { rows } = await this._client
+      .findTableRows({
+        table: this._tableName,
+        search: JSON.stringify({ value: input }).substring(0, 1023), // Search is limited to 1024 characters
+        limit: 10, // TODO
+        filter: {
+          // Proximity match of approved examples
+          taskType,
+          taskId,
+          status: 'approved',
+        } satisfies Partial<TableSchema>,
+      })
+      .catch((err) => {
+        // TODO: handle error
+        console.error(`Error fetching examples: ${err.message}`)
+        return { rows: [] }
+      })
+    return rows.map((row) => ({
+      key: row.key,
+      input: row.input.value as TInput,
+      output: row.output.value as TOutput,
+      explanation: row.explanation,
+      similarity: row.similarity ?? 0,
+    }))
+  }
+  public async saveExample<TInput, TOutput>({
+    key,
+    taskType,
+    taskId,
+    instructions,
+    input,
+    output,
+    explanation,
+    metadata,
+    status = 'pending',
+  }: SaveExampleProps<TInput, TOutput>) {
+    await this._assertTableExists()
+    await this._client
+      .upsertTableRows({
+        table: this._tableName,
+        keyColumn: 'key',
+        rows: [
+          {
+            key,
+            taskType,
+            taskId,
+            instructions,
+            input: { value: input },
+            output: { value: output },
+            explanation: explanation ?? null,
+            status,
+            metadata,
+          } satisfies TableSchema,
+        ],
+      })
+      .catch(() => {
+        // TODO: handle error
+      })
+  }
+  private async _assertTableExists() {
+    if (this._status !== 'ready') {
+      return
+    }
+    const { table, created } = await this._client
+      .getOrCreateTable({
+        table: this._tableName,
+        factor: FACTOR,
+        frozen: true,
+        isComputeEnabled: false,
+        tags: {
+          ...CRITICAL_TAGS,
+          ...OPTIONAL_TAGS,
+        },
+        schema: TableJsonSchema,
+      })
+      .catch(() => {
+        this._status = 'error'
+        return { table: null, created: false }
+      })
+    if (!table) {
+      return
+    }
+    if (!created) {
+      const issues: string[] = []
+      if (table.factor !== FACTOR) {
+        issues.push(`Factor is ${table.factor} instead of ${FACTOR}`)
+      }
+      if (table.frozen !== true) {
+        issues.push('Table is not frozen')
+      }
+      for (const [key, value] of Object.entries(CRITICAL_TAGS)) {
+        if (table.tags?.[key] !== value) {
+          issues.push(`Tag ${key} is ${table.tags?.[key]} instead of ${value}`)
+        }
+      }
+      for (const key of Object.keys(TableJsonSchema)) {
+        const column = table.schema?.properties[key]
+        const expected = TableJsonSchema[key] as { type: string }
+        if (!column) {
+          issues.push(`Column ${key} is missing`)
+          continue
+        }
+        if (column.type !== expected.type) {
+          issues.push(`Column ${key} has type ${column.type} instead of ${expected.type}`)
+        }
+        if (expected['x-zui'].searchable && !column['x-zui'].searchable) {
+          issues.push(`Column ${key} is not searchable but should be`)
+        }
+      }
+      if (issues.length) {
+        this._status = 'error'
+      }
+    }
+    this._status = 'initialized'
+  }
+}

package/src/adapters/memory.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import { Adapter } from './adapter'
+export class MemoryAdapter extends Adapter {
+  public constructor(public examples: any[]) {
+    super()
+  }
+  public async getExamples() {
+    return this.examples
+  }
+  public async saveExample() {}
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import { Zai } from './zai'
+import './operations/text'
+import './operations/rewrite'
+import './operations/summarize'
+import './operations/check'
+import './operations/filter'
+import './operations/extract'
+import './operations/label'
+export { Zai }

package/src/operations/check.ts ADDED Viewed

@@ -0,0 +1,201 @@
+// eslint-disable consistent-type-definitions
+import { z } from '@bpinternal/zui'
+import { fastHash, stringify, takeUntilTokens } from '../utils'
+import { Zai } from '../zai'
+import { PROMPT_INPUT_BUFFER } from './constants'
+const Example = z.object({
+  input: z.any(),
+  check: z.boolean(),
+  reason: z.string().optional(),
+})
+export type Options = z.input<typeof Options>
+const Options = z.object({
+  examples: z.array(Example).describe('Examples to check the condition against').default([]),
+})
+declare module '@botpress/zai' {
+  interface Zai {
+    /** Checks wether a condition is true or not */
+    check(input: unknown, condition: string, options?: Options): Promise<boolean>
+  }
+}
+const TRUE = '■TRUE■'
+const FALSE = '■FALSE■'
+const END = '■END■'
+Zai.prototype.check = async function (this: Zai, input, condition, _options) {
+  const options = Options.parse(_options ?? {})
+  const tokenizer = await this.getTokenizer()
+  await this.fetchModelDetails()
+  const PROMPT_COMPONENT = Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
+  const taskId = this.taskId
+  const taskType = 'zai.check'
+  const PROMPT_TOKENS = {
+    INPUT: Math.floor(0.5 * PROMPT_COMPONENT),
+    CONDITION: Math.floor(0.2 * PROMPT_COMPONENT),
+  }
+  // Truncate the input to fit the model's input size
+  const inputAsString = tokenizer.truncate(stringify(input), PROMPT_TOKENS.INPUT)
+  condition = tokenizer.truncate(condition, PROMPT_TOKENS.CONDITION)
+  // All tokens remaining after the input and condition are accounted can be used for examples
+  const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(condition)
+  const Key = fastHash(
+    JSON.stringify({
+      taskType,
+      taskId,
+      input: inputAsString,
+      condition,
+    })
+  )
+  const examples = taskId
+    ? await this.adapter.getExamples<string, boolean>({
+        input: inputAsString,
+        taskType,
+        taskId,
+      })
+    : []
+  const exactMatch = examples.find((x) => x.key === Key)
+  if (exactMatch) {
+    return exactMatch.output
+  }
+  const defaultExamples = [
+    { input: '50 Cent', check: true, reason: '50 Cent is widely recognized as a public personality.' },
+    {
+      input: ['apple', 'banana', 'carrot', 'house'],
+      check: false,
+      reason:
+        'The list contains a house, which is not a fruit. Also, the list contains a carrot, which is a vegetable.',
+    },
+  ]
+  const userExamples = [
+    ...examples.map((e) => ({ input: e.input, check: e.output, reason: e.explanation })),
+    ...options.examples,
+  ]
+  let exampleId = 1
+  const formatInput = (input: string, condition: string) => {
+    const header = userExamples.length ? `Expert Example #${exampleId++}` : `Example of condition: "${condition}"`
+    return `
+${header}
+<|start_input|>
+${input.trim()}
+<|end_input|>
+`.trim()
+  }
+  const formatOutput = (answer: boolean, justification: string) => {
+    return `
+Analysis: ${justification}
+Final Answer: ${answer ? TRUE : FALSE}
+${END}
+`.trim()
+  }
+  const formatExample = (example: { input?: any; check: boolean; reason?: string }) => [
+    { type: 'text' as const, content: formatInput(stringify(example.input ?? null), condition), role: 'user' as const },
+    {
+      type: 'text' as const,
+      content: formatOutput(example.check, example.reason ?? ''),
+      role: 'assistant' as const,
+    },
+  ]
+  const allExamples = takeUntilTokens(
+    userExamples.length ? userExamples : defaultExamples,
+    EXAMPLES_TOKENS,
+    (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(el.reason ?? '')
+  )
+    .map(formatExample)
+    .flat()
+  const specialInstructions = userExamples.length
+    ? `
+- You have been provided with examples from previous experts. Make sure to read them carefully before making your decision.
+- Make sure to refer to the examples provided by the experts to justify your decision (when applicable).
+- When in doubt, ground your decision on the examples provided by the experts instead of your own intuition.
+- When no example is similar to the input, make sure to provide a clear justification for your decision while inferring the decision-making process from the examples provided by the experts.
+`.trim()
+    : ''
+  const { output, meta } = await this.callModel({
+    systemPrompt: `
+Check if the following condition is true or false for the given input. Before answering, make sure to read the input and the condition carefully.
+Justify your answer, then answer with either ${TRUE} or ${FALSE} at the very end, then add ${END} to finish the response.
+IMPORTANT: Make sure to answer with either ${TRUE} or ${FALSE} at the end of your response, but NOT both.
+---
+Expert Examples (#1 to #${exampleId - 1}):
+${specialInstructions}
+`.trim(),
+    stopSequences: [END],
+    messages: [
+      ...allExamples,
+      {
+        type: 'text',
+        content: `
+Considering the below input and above examples, is the following condition true or false?
+${formatInput(inputAsString, condition)}
+In your "Analysis", please refer to the Expert Examples # to justify your decision.`.trim(),
+        role: 'user',
+      },
+    ],
+  })
+  const answer = output.choices[0]?.content as string
+  const hasTrue = answer.includes(TRUE)
+  const hasFalse = answer.includes(FALSE)
+  if (!hasTrue && !hasFalse) {
+    throw new Error(`The model did not return a valid answer. The response was: ${answer}`)
+  }
+  let finalAnswer: boolean
+  if (hasTrue && hasFalse) {
+    // If both TRUE and FALSE are present, we need to check which one was answered last
+    finalAnswer = answer.lastIndexOf(TRUE) > answer.lastIndexOf(FALSE)
+  } else {
+    finalAnswer = hasTrue
+  }
+  if (taskId) {
+    await this.adapter.saveExample({
+      key: Key,
+      taskType,
+      taskId,
+      input: inputAsString,
+      instructions: condition,
+      metadata: {
+        cost: {
+          input: meta.cost.input,
+          output: meta.cost.output,
+        },
+        latency: meta.latency,
+        model: this.Model,
+        tokens: {
+          input: meta.tokens.input,
+          output: meta.tokens.output,
+        },
+      },
+      output: finalAnswer,
+      explanation: answer.replace(TRUE, '').replace(FALSE, '').replace(END, '').replace('Final Answer:', '').trim(),
+    })
+  }
+  return finalAnswer
+}

package/src/operations/constants.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ export const PROMPT_INPUT_BUFFER = 1048
2	+ export const PROMPT_OUTPUT_BUFFER = 512

package/src/operations/errors.ts ADDED Viewed

@@ -0,0 +1,9 @@
+export class JsonParsingError extends Error {
+  public constructor(
+    public json: unknown,
+    public error: Error
+  ) {
+    const message = `Error parsing JSON:\n\n---JSON---\n${json}\n\n---Error---\n\n ${error}`
+    super(message)
+  }
+}