npm - @botpress/zai - Versions diffs - 2.0.16 → 2.1.1 - Mend

@botpress/zai 2.0.16 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/dist/context.js +131 -0
package/dist/emitter.js +42 -0
package/dist/index.d.ts +104 -9
package/dist/operations/check.js +46 -27
package/dist/operations/extract.js +63 -46
package/dist/operations/filter.js +34 -19
package/dist/operations/label.js +65 -42
package/dist/operations/rewrite.js +37 -17
package/dist/operations/summarize.js +32 -13
package/dist/operations/text.js +28 -8
package/dist/response.js +82 -0
package/dist/tokenizer.js +11 -0
package/e2e/client.ts +43 -29
package/e2e/data/cache.jsonl +276 -0
package/package.json +11 -3
package/src/context.ts +197 -0
package/src/emitter.ts +49 -0
package/src/operations/check.ts +99 -49
package/src/operations/extract.ts +85 -60
package/src/operations/filter.ts +62 -35
package/src/operations/label.ts +117 -62
package/src/operations/rewrite.ts +50 -21
package/src/operations/summarize.ts +40 -14
package/src/operations/text.ts +32 -8
package/src/response.ts +114 -0
package/src/tokenizer.ts +14 -0

package/src/operations/rewrite.ts CHANGED Viewed

@@ -1,6 +1,9 @@
 // eslint-disable consistent-type-definitions
 import { z } from '@bpinternal/zui'
+import { ZaiContext } from '../context'
+import { Response } from '../response'
+import { getTokenizer } from '../tokenizer'
 import { fastHash, stringify, takeUntilTokens } from '../utils'
 import { Zai } from '../zai'
 import { PROMPT_INPUT_BUFFER } from './constants'
@@ -31,29 +34,35 @@ const Options = z.object({
 declare module '@botpress/zai' {
   interface Zai {
     /** Rewrites a string according to match the prompt */
-    rewrite(original: string, prompt: string, options?: Options): Promise<string>
+    rewrite(original: string, prompt: string, options?: Options): Response<string>
   }
 }
 const START = '■START■'
 const END = '■END■'
-Zai.prototype.rewrite = async function (this: Zai, original, prompt, _options) {
+const rewrite = async (
+  original: string,
+  prompt: string,
+  _options: Options | undefined,
+  ctx: ZaiContext
+): Promise<string> => {
+  ctx.controller.signal.throwIfAborted()
   const options = Options.parse(_options ?? {}) as Options
-  const tokenizer = await this.getTokenizer()
-  await this.fetchModelDetails()
+  const tokenizer = await getTokenizer()
+  const model = await ctx.getModel()
-  const taskId = this.taskId
+  const taskId = ctx.taskId
   const taskType = 'zai.rewrite'
-  const INPUT_COMPONENT_SIZE = Math.max(100, (this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER) / 2)
+  const INPUT_COMPONENT_SIZE = Math.max(100, (model.input.maxTokens - PROMPT_INPUT_BUFFER) / 2)
   prompt = tokenizer.truncate(prompt, INPUT_COMPONENT_SIZE)
   const inputSize = tokenizer.count(original) + tokenizer.count(prompt)
-  const maxInputSize = this.ModelDetails.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
+  const maxInputSize = model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
   if (inputSize > maxInputSize) {
     throw new Error(
-      `The input size is ${inputSize} tokens long, which is more than the maximum of ${maxInputSize} tokens for this model (${this.ModelDetails.name} = ${this.ModelDetails.input.maxTokens} tokens)`
+      `The input size is ${inputSize} tokens long, which is more than the maximum of ${maxInputSize} tokens for this model (${model.name} = ${model.input.maxTokens} tokens)`
     )
   }
@@ -98,13 +107,14 @@ ${END}
     { input: '1\n2\n3', output: '3\n2\n1', instructions: 'reverse the order' },
   ]
-  const tableExamples = taskId
-    ? await this.adapter.getExamples<string, string>({
-        input: original,
-        taskId,
-        taskType,
-      })
-    : []
+  const tableExamples =
+    taskId && ctx.adapter
+      ? await ctx.adapter.getExamples<string, string>({
+          input: original,
+          taskId,
+          taskType,
+        })
+      : []
   const exactMatch = tableExamples.find((x) => x.key === Key)
   if (exactMatch) {
@@ -116,7 +126,7 @@ ${END}
     ...options.examples,
   ]
-  const REMAINING_TOKENS = this.ModelDetails.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
+  const REMAINING_TOKENS = model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
   const examples = takeUntilTokens(
     savedExamples.length ? savedExamples : defaultExamples,
     REMAINING_TOKENS,
@@ -125,7 +135,7 @@ ${END}
     .map(formatExample)
     .flat()
-  const { output, meta } = await this.callModel({
+  const { extracted, meta } = await ctx.generateContent({
     systemPrompt: `
 Rewrite the text between the ${START} and ${END} tags to match the user prompt.
 ${instructions.map((x) => `• ${x}`).join('\n')}
@@ -133,9 +143,16 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
     messages: [...examples, { type: 'text', content: format(original, prompt), role: 'user' }],
     maxTokens: options.length,
     stopSequences: [END],
+    transform: (text) => {
+      if (!text.trim().length) {
+        throw new Error('The model did not return a valid rewrite. The response was empty.')
+      }
+      return text
+    },
   })
-  let result = output.choices[0]?.content as string
+  let result = extracted
   if (result.includes(START)) {
     result = result.slice(result.indexOf(START) + START.length)
@@ -145,8 +162,8 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
     result = result.slice(0, result.indexOf(END))
   }
-  if (taskId) {
-    await this.adapter.saveExample({
+  if (taskId && ctx.adapter && !ctx.controller.signal.aborted) {
+    await ctx.adapter.saveExample({
       key: Key,
       metadata: {
         cost: {
@@ -154,7 +171,7 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
           output: meta.cost.output,
         },
         latency: meta.latency,
-        model: this.Model,
+        model: ctx.modelId,
         tokens: {
           input: meta.tokens.input,
           output: meta.tokens.output,
@@ -170,3 +187,15 @@ ${instructions.map((x) => `• ${x}`).join('\n')}
   return result
 }
+Zai.prototype.rewrite = function (this: Zai, original: string, prompt: string, _options?: Options): Response<string> {
+  const context = new ZaiContext({
+    client: this.client,
+    modelId: this.Model,
+    taskId: this.taskId,
+    taskType: 'zai.rewrite',
+    adapter: this.adapter,
+  })
+  return new Response<string>(context, rewrite(original, prompt, _options, context), (result) => result)
+}

package/src/operations/summarize.ts CHANGED Viewed

@@ -2,6 +2,10 @@
 import { z } from '@bpinternal/zui'
 import { chunk } from 'lodash-es'
+import { ZaiContext } from '../context'
+import { Response } from '../response'
+import { getTokenizer } from '../tokenizer'
 import { Zai } from '../zai'
 import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
@@ -54,31 +58,31 @@ const Options = z.object({
 declare module '@botpress/zai' {
   interface Zai {
     /** Summarizes a text of any length to a summary of the desired length */
-    summarize(original: string, options?: Options): Promise<string>
+    summarize(original: string, options?: Options): Response<string>
   }
 }
 const START = '■START■'
 const END = '■END■'
-Zai.prototype.summarize = async function (this: Zai, original, _options) {
-  const options = Options.parse(_options ?? {}) as Options
-  const tokenizer = await this.getTokenizer()
-  await this.fetchModelDetails()
+const summarize = async (original: string, options: Options, ctx: ZaiContext): Promise<string> => {
+  ctx.controller.signal.throwIfAborted()
+  const tokenizer = await getTokenizer()
+  const model = await ctx.getModel()
-  const INPUT_COMPONENT_SIZE = Math.max(100, (this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER) / 4)
+  const INPUT_COMPONENT_SIZE = Math.max(100, (model.input.maxTokens - PROMPT_INPUT_BUFFER) / 4)
   options.prompt = tokenizer.truncate(options.prompt, INPUT_COMPONENT_SIZE)
   options.format = tokenizer.truncate(options.format, INPUT_COMPONENT_SIZE)
-  const maxOutputSize = this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER
+  const maxOutputSize = model.output.maxTokens - PROMPT_OUTPUT_BUFFER
   if (options.length > maxOutputSize) {
     throw new Error(
-      `The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${this.ModelDetails.output.maxTokens} tokens for this model (${this.ModelDetails.name})`
+      `The desired output length is ${maxOutputSize} tokens long, which is more than the maximum of ${model.output.maxTokens} tokens for this model (${model.name})`
     )
   }
   // Ensure the sliding window is not bigger than the model input size
-  options.sliding.window = Math.min(options.sliding.window, this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER)
+  options.sliding.window = Math.min(options.sliding.window, model.input.maxTokens - PROMPT_INPUT_BUFFER)
   // Ensure the overlap is not bigger than the window
   // Most extreme case possible (all 3 same size)
@@ -111,9 +115,12 @@ ${newText}
   const chunkSize = Math.ceil(tokens.length / (parts * N))
   if (useMergeSort) {
+    // TODO: use pLimit here to not have too many chunks
     const chunks = chunk(tokens, chunkSize).map((x) => x.join(''))
-    const allSummaries = await Promise.all(chunks.map((chunk) => this.summarize(chunk, options)))
-    return this.summarize(allSummaries.join('\n\n============\n\n'), options)
+    const allSummaries = (await Promise.allSettled(chunks.map((chunk) => summarize(chunk, options, ctx))))
+      .filter((x) => x.status === 'fulfilled')
+      .map((x) => x.value)
+    return summarize(allSummaries.join('\n\n============\n\n'), options, ctx)
   }
   const summaries: string[] = []
@@ -176,7 +183,7 @@ ${newText}
       }
     }
-    const { output } = await this.callModel({
+    let { extracted: result } = await ctx.generateContent({
       systemPrompt: `
 You are summarizing a text. The text is split into ${parts} parts, and you are currently working on part ${iteration}.
 At every step, you will receive the current summary and a new part of the text. You need to amend the summary to include the new information (if needed).
@@ -191,9 +198,14 @@ ${options.format}
       messages: [{ type: 'text', content: format(currentSummary, slice), role: 'user' }],
       maxTokens: generationLength,
       stopSequences: [END],
-    })
+      transform: (text) => {
+        if (!text.trim().length) {
+          throw new Error('The model did not return a valid summary. The response was empty.')
+        }
-    let result = output?.choices[0]?.content as string
+        return text
+      },
+    })
     if (result.includes(START)) {
       result = result.slice(result.indexOf(START) + START.length)
@@ -210,3 +222,17 @@ ${options.format}
   return currentSummary.trim()
 }
+Zai.prototype.summarize = function (this: Zai, original, _options): Response<string> {
+  const options = Options.parse(_options ?? {}) as Options
+  const context = new ZaiContext({
+    client: this.client,
+    modelId: this.Model,
+    taskId: this.taskId,
+    taskType: 'summarize',
+    adapter: this.adapter,
+  })
+  return new Response<string, string>(context, summarize(original, options, context), (value) => value)
+}

package/src/operations/text.ts CHANGED Viewed

@@ -2,6 +2,9 @@
 import { z } from '@bpinternal/zui'
 import { clamp } from 'lodash-es'
+import { ZaiContext } from '../context'
+import { Response } from '../response'
+import { getTokenizer } from '../tokenizer'
 import { Zai } from '../zai'
 import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
@@ -17,19 +20,20 @@ const Options = z.object({
 declare module '@botpress/zai' {
   interface Zai {
     /** Generates a text of the desired length according to the prompt */
-    text(prompt: string, options?: Options): Promise<string>
+    text(prompt: string, options?: Options): Response<string>
   }
 }
-Zai.prototype.text = async function (this: Zai, prompt, _options) {
+const text = async (prompt: string, _options: Options | undefined, ctx: ZaiContext): Promise<string> => {
+  ctx.controller.signal.throwIfAborted()
   const options = Options.parse(_options ?? {})
-  const tokenizer = await this.getTokenizer()
-  await this.fetchModelDetails()
+  const tokenizer = await getTokenizer()
+  const model = await ctx.getModel()
-  prompt = tokenizer.truncate(prompt, Math.max(this.ModelDetails.input.maxTokens - PROMPT_INPUT_BUFFER, 100))
+  prompt = tokenizer.truncate(prompt, Math.max(model.input.maxTokens - PROMPT_INPUT_BUFFER, 100))
   if (options.length) {
-    options.length = Math.min(this.ModelDetails.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length)
+    options.length = Math.min(model.output.maxTokens - PROMPT_OUTPUT_BUFFER, options.length)
   }
   const instructions: string[] = []
@@ -55,7 +59,7 @@ Zai.prototype.text = async function (this: Zai, prompt, _options) {
 | 300-500 tokens| A long paragraph (200-300 words)   |`.trim()
   }
-  const { output } = await this.callModel({
+  const { extracted } = await ctx.generateContent({
     systemPrompt: `
 Generate a text that fulfills the user prompt below. Answer directly to the prompt, without any acknowledgements or fluff. Also, make sure the text is standalone and complete.
 ${instructions.map((x) => `- ${x}`).join('\n')}
@@ -64,6 +68,26 @@ ${chart}
     temperature: 0.7,
     messages: [{ type: 'text', content: prompt, role: 'user' }],
     maxTokens: options.length,
+    transform: (text) => {
+      if (!text.trim().length) {
+        throw new Error('The model did not return a valid summary. The response was empty.')
+      }
+      return text
+    },
+  })
+  return extracted
+}
+Zai.prototype.text = function (this: Zai, prompt: string, _options?: Options): Response<string> {
+  const context = new ZaiContext({
+    client: this.client,
+    modelId: this.Model,
+    taskId: this.taskId,
+    taskType: 'zai.text',
+    adapter: this.adapter,
   })
-  return output?.choices?.[0]?.content! as string
+  return new Response<string>(context, text(prompt, _options, context), (result) => result)
 }

package/src/response.ts ADDED Viewed

@@ -0,0 +1,114 @@
+import { Usage, ZaiContext } from './context'
+import { EventEmitter } from './emitter'
+// Event types for the Response class
+export type ResponseEvents<TComplete = any> = {
+  progress: Usage
+  complete: TComplete
+  error: unknown
+}
+export class Response<T = any, S = T> implements PromiseLike<S> {
+  private _promise: Promise<T>
+  private _eventEmitter: EventEmitter<ResponseEvents<T>>
+  private _context: ZaiContext
+  private _elasped: number | null = null
+  private _simplify: (value: T) => S
+  public constructor(context: ZaiContext, promise: Promise<T>, simplify: (value: T) => S) {
+    this._context = context
+    this._eventEmitter = new EventEmitter<ResponseEvents<T>>()
+    this._simplify = simplify
+    this._promise = promise.then(
+      (value) => {
+        this._elasped ||= this._context.elapsedTime
+        this._eventEmitter.emit('complete', value)
+        this._eventEmitter.clear()
+        this._context.clear()
+        return value
+      },
+      (reason) => {
+        this._elasped ||= this._context.elapsedTime
+        this._eventEmitter.emit('error', reason)
+        this._eventEmitter.clear()
+        this._context.clear()
+        throw reason
+      }
+    )
+    this._context.on('update', (usage) => {
+      this._eventEmitter.emit('progress', usage)
+    })
+  }
+  // Event emitter methods
+  public on<K extends keyof ResponseEvents<T>>(type: K, listener: (event: ResponseEvents<T>[K]) => void) {
+    this._eventEmitter.on(type, listener)
+    return this
+  }
+  public off<K extends keyof ResponseEvents<T>>(type: K, listener: (event: ResponseEvents<T>[K]) => void) {
+    this._eventEmitter.off(type, listener)
+    return this
+  }
+  public once<K extends keyof ResponseEvents<T>>(type: K, listener: (event: ResponseEvents<T>[K]) => void) {
+    this._eventEmitter.once(type, listener)
+    return this
+  }
+  public bindSignal(signal: AbortSignal): this {
+    if (signal.aborted) {
+      this.abort(signal.reason)
+    }
+    const signalAbort = () => {
+      this.abort(signal.reason)
+    }
+    signal.addEventListener('abort', () => signalAbort())
+    this.once('complete', () => signal.removeEventListener('abort', signalAbort))
+    this.once('error', () => signal.removeEventListener('abort', signalAbort))
+    return this
+  }
+  public abort(reason?: string | Error) {
+    this._context.controller.abort(reason)
+  }
+  public then<TResult1 = S, TResult2 = never>(
+    onfulfilled?: ((value: S) => TResult1 | PromiseLike<TResult1>) | null,
+    onrejected?: ((reason: any) => TResult2 | PromiseLike<TResult2>) | null
+  ): PromiseLike<TResult1 | TResult2> {
+    return this._promise.then(
+      (value: T) => {
+        const simplified = this._simplify(value)
+        return onfulfilled ? onfulfilled(simplified) : simplified
+      },
+      (reason) => {
+        if (onrejected) {
+          return onrejected(reason)
+        }
+        throw reason
+      }
+    ) as PromiseLike<TResult1 | TResult2>
+  }
+  public catch<TResult = never>(
+    onrejected?: ((reason: any) => TResult | PromiseLike<TResult>) | null
+  ): PromiseLike<S | TResult> {
+    return this._promise.catch(onrejected) as PromiseLike<S | TResult>
+  }
+  public async result(): Promise<{
+    output: T
+    usage: Usage
+    elapsed: number
+  }> {
+    const output = await this._promise
+    const usage = this._context.usage
+    return { output, usage, elapsed: this._elasped }
+  }
+}

package/src/tokenizer.ts ADDED Viewed

@@ -0,0 +1,14 @@
+import { getWasmTokenizer, TextTokenizer } from '@bpinternal/thicktoken'
+let tokenizer: TextTokenizer | null = null
+export async function getTokenizer(): Promise<TextTokenizer> {
+  if (!tokenizer) {
+    while (!getWasmTokenizer) {
+      // there's an issue with wasm, it doesn't load immediately
+      await new Promise((resolve) => setTimeout(resolve, 25))
+    }
+    tokenizer = getWasmTokenizer() as TextTokenizer
+  }
+  return tokenizer
+}