@botpress/zai 1.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/adapters/adapter.js +2 -0
  2. package/dist/adapters/botpress-table.js +168 -0
  3. package/dist/adapters/memory.js +12 -0
  4. package/dist/index.d.ts +99 -98
  5. package/dist/index.js +9 -1873
  6. package/dist/models.js +387 -0
  7. package/dist/operations/check.js +141 -0
  8. package/dist/operations/constants.js +2 -0
  9. package/dist/operations/errors.js +15 -0
  10. package/dist/operations/extract.js +212 -0
  11. package/dist/operations/filter.js +179 -0
  12. package/dist/operations/label.js +237 -0
  13. package/dist/operations/rewrite.js +111 -0
  14. package/dist/operations/summarize.js +132 -0
  15. package/dist/operations/text.js +46 -0
  16. package/dist/utils.js +43 -0
  17. package/dist/zai.js +140 -0
  18. package/package.json +21 -19
  19. package/src/adapters/adapter.ts +35 -0
  20. package/src/adapters/botpress-table.ts +210 -0
  21. package/src/adapters/memory.ts +13 -0
  22. package/src/index.ts +11 -0
  23. package/src/models.ts +394 -0
  24. package/src/operations/__tests/botpress_docs.txt +26040 -0
  25. package/src/operations/__tests/cache.jsonl +101 -0
  26. package/src/operations/__tests/index.ts +87 -0
  27. package/src/operations/check.ts +187 -0
  28. package/src/operations/constants.ts +2 -0
  29. package/src/operations/errors.ts +9 -0
  30. package/src/operations/extract.ts +291 -0
  31. package/src/operations/filter.ts +231 -0
  32. package/src/operations/label.ts +332 -0
  33. package/src/operations/rewrite.ts +148 -0
  34. package/src/operations/summarize.ts +193 -0
  35. package/src/operations/text.ts +63 -0
  36. package/src/sdk-interfaces/llm/generateContent.ts +127 -0
  37. package/src/sdk-interfaces/llm/listLanguageModels.ts +19 -0
  38. package/src/utils.ts +61 -0
  39. package/src/zai.ts +193 -0
  40. package/tsconfig.json +2 -2
  41. package/dist/index.cjs +0 -1903
  42. package/dist/index.cjs.map +0 -1
  43. package/dist/index.d.cts +0 -916
  44. package/dist/index.js.map +0 -1
  45. package/tsup.config.ts +0 -16
  46. package/vitest.config.ts +0 -9
  47. package/vitest.setup.ts +0 -24
@@ -0,0 +1,87 @@
1
+ import { Client } from '@botpress/client'
2
+ import { type TextTokenizer, getWasmTokenizer } from '@botpress/wasm'
3
+
4
+ import fs from 'node:fs'
5
+ import path from 'node:path'
6
+ import { beforeAll } from 'vitest'
7
+
8
+ import { Zai } from '../..'
9
+
10
+ import { fastHash } from '../../utils'
11
+
12
+ export const getClient = () => {
13
+ return new Client({
14
+ apiUrl: process.env.CLOUD_API_ENDPOINT ?? 'https://api.botpress.dev',
15
+ botId: process.env.CLOUD_BOT_ID,
16
+ token: process.env.CLOUD_PAT
17
+ })
18
+ }
19
+
20
+ function readJSONL<T>(filePath: string, keyProperty: keyof T): Map<string, T> {
21
+ const lines = fs.readFileSync(filePath, 'utf-8').split(/\r?\n/).filter(Boolean)
22
+
23
+ const map = new Map<string, T>()
24
+
25
+ for (const line of lines) {
26
+ const obj = JSON.parse(line) as T
27
+ const key = String(obj[keyProperty])
28
+ map.set(key, obj)
29
+ }
30
+
31
+ return map
32
+ }
33
+
34
+ const cache: Map<string, { key: string; value: any }> = readJSONL(
35
+ path.resolve(import.meta.dirname, './cache.jsonl'),
36
+ 'key'
37
+ )
38
+
39
+ export const getCachedClient = () => {
40
+ const client = getClient()
41
+
42
+ const proxy = new Proxy(client, {
43
+ get(target, prop) {
44
+ if (prop === 'callAction') {
45
+ return async (...args: Parameters<Client['callAction']>) => {
46
+ const key = fastHash(JSON.stringify(args))
47
+ const cached = cache.get(key)
48
+
49
+ if (cached) {
50
+ return cached.value
51
+ }
52
+
53
+ const response = await target.callAction(...args)
54
+ cache.set(key, { key, value: response })
55
+
56
+ fs.appendFileSync(
57
+ path.resolve(import.meta.dirname, './cache.jsonl'),
58
+ JSON.stringify({
59
+ key,
60
+ value: response
61
+ }) + '\n'
62
+ )
63
+
64
+ return response
65
+ }
66
+ }
67
+ return Reflect.get(target, prop)
68
+ }
69
+ })
70
+
71
+ return proxy
72
+ }
73
+
74
+ export const getZai = () => {
75
+ const client = getCachedClient()
76
+ return new Zai({ client, retry: { maxRetries: 0 } })
77
+ }
78
+
79
+ export let tokenizer: TextTokenizer = null!
80
+
81
+ beforeAll(async () => {
82
+ tokenizer = await getWasmTokenizer()
83
+ })
84
+
85
+ export const BotpressDocumentation = fs.readFileSync(path.join(__dirname, './botpress_docs.txt'), 'utf-8').trim()
86
+
87
+ export const metadata = { cost: { input: 1, output: 1 }, latency: 0, model: '', tokens: { input: 1, output: 1 } }
@@ -0,0 +1,187 @@
1
+ import { z } from '@bpinternal/zui'
2
+
3
+ import { fastHash, stringify, takeUntilTokens } from '../utils'
4
+ import { Zai } from '../zai'
5
+ import { PROMPT_INPUT_BUFFER } from './constants'
6
+
7
+ const Example = z.object({
8
+ input: z.any(),
9
+ check: z.boolean(),
10
+ reason: z.string().optional()
11
+ })
12
+
13
+ export type Options = z.input<typeof Options>
14
+ const Options = z.object({
15
+ examples: z.array(Example).describe('Examples to check the condition against').default([])
16
+ })
17
+
18
+ declare module '@botpress/zai' {
19
+ interface Zai {
20
+ /** Checks wether a condition is true or not */
21
+ check(input: unknown, condition: string, options?: Options): Promise<boolean>
22
+ }
23
+ }
24
+
25
+ const TRUE = '■TRUE■'
26
+ const FALSE = '■FALSE■'
27
+ const END = '■END■'
28
+
29
+ Zai.prototype.check = async function (this: Zai, input, condition, _options) {
30
+ const options = Options.parse(_options ?? {})
31
+ const tokenizer = await this.getTokenizer()
32
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
33
+
34
+ const taskId = this.taskId
35
+ const taskType = 'zai.check'
36
+
37
+ const PROMPT_TOKENS = {
38
+ INPUT: Math.floor(0.5 * PROMPT_COMPONENT),
39
+ CONDITION: Math.floor(0.2 * PROMPT_COMPONENT)
40
+ }
41
+
42
+ // Truncate the input to fit the model's input size
43
+ const inputAsString = tokenizer.truncate(stringify(input), PROMPT_TOKENS.INPUT)
44
+ condition = tokenizer.truncate(condition, PROMPT_TOKENS.CONDITION)
45
+
46
+ // All tokens remaining after the input and condition are accounted can be used for examples
47
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(condition)
48
+
49
+ const Key = fastHash(
50
+ JSON.stringify({
51
+ taskType,
52
+ taskId,
53
+ input: inputAsString,
54
+ condition
55
+ })
56
+ )
57
+
58
+ const examples = taskId
59
+ ? await this.adapter.getExamples<string, boolean>({
60
+ input: inputAsString,
61
+ taskType,
62
+ taskId
63
+ })
64
+ : []
65
+
66
+ const exactMatch = examples.find((x) => x.key === Key)
67
+ if (exactMatch) {
68
+ return exactMatch.output
69
+ }
70
+
71
+ const defaultExamples = [
72
+ { input: '50 Cent', check: true, reason: '50 Cent is widely recognized as a public personality.' },
73
+ {
74
+ input: ['apple', 'banana', 'carrot', 'house'],
75
+ check: false,
76
+ reason: 'The list contains a house, which is not a fruit. Also, the list contains a carrot, which is a vegetable.'
77
+ }
78
+ ]
79
+
80
+ const userExamples = [
81
+ ...examples.map((e) => ({ input: e.input, check: e.output, reason: e.explanation })),
82
+ ...options.examples
83
+ ]
84
+
85
+ let exampleId = 1
86
+
87
+ const formatInput = (input: string, condition: string) => {
88
+ const header = userExamples.length ? `Expert Example #${exampleId++}` : `Example of condition: "${condition}"`
89
+
90
+ return `
91
+ ${header}
92
+ <|start_input|>
93
+ ${input.trim()}
94
+ <|end_input|>
95
+ `.trim()
96
+ }
97
+
98
+ const formatOutput = (answer: boolean, justification: string) => {
99
+ return `
100
+ Analysis: ${justification}
101
+ Final Answer: ${answer ? TRUE : FALSE}
102
+ ${END}
103
+ `.trim()
104
+ }
105
+
106
+ const formatExample = (example: { input?: any; check: boolean; reason?: string }) => [
107
+ { type: 'text' as const, content: formatInput(stringify(example.input ?? null), condition), role: 'user' as const },
108
+ {
109
+ type: 'text' as const,
110
+ content: formatOutput(example.check, example.reason ?? ''),
111
+ role: 'assistant' as const
112
+ }
113
+ ]
114
+
115
+ const allExamples = takeUntilTokens(
116
+ userExamples.length ? userExamples : defaultExamples,
117
+ EXAMPLES_TOKENS,
118
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(el.reason ?? '')
119
+ )
120
+ .map(formatExample)
121
+ .flat()
122
+
123
+ const specialInstructions = userExamples.length
124
+ ? `
125
+ - You have been provided with examples from previous experts. Make sure to read them carefully before making your decision.
126
+ - Make sure to refer to the examples provided by the experts to justify your decision (when applicable).
127
+ - When in doubt, ground your decision on the examples provided by the experts instead of your own intuition.
128
+ - When no example is similar to the input, make sure to provide a clear justification for your decision while inferring the decision-making process from the examples provided by the experts.
129
+ `.trim()
130
+ : ''
131
+
132
+ const output = await this.callModel({
133
+ systemPrompt: `
134
+ Check if the following condition is true or false for the given input. Before answering, make sure to read the input and the condition carefully.
135
+ Justify your answer, then answer with either ${TRUE} or ${FALSE} at the very end, then add ${END} to finish the response.
136
+ IMPORTANT: Make sure to answer with either ${TRUE} or ${FALSE} at the end of your response, but NOT both.
137
+ ---
138
+ Expert Examples (#1 to #${exampleId - 1}):
139
+ ${specialInstructions}
140
+ `.trim(),
141
+ stopSequences: [END],
142
+ messages: [
143
+ ...allExamples,
144
+ {
145
+ type: 'text',
146
+ content: `
147
+ Considering the below input and above examples, is the following condition true or false?
148
+ ${formatInput(inputAsString, condition)}
149
+ In your "Analysis", please refer to the Expert Examples # to justify your decision.`.trim(),
150
+ role: 'user'
151
+ }
152
+ ]
153
+ })
154
+
155
+ const answer = output.choices[0]?.content as string
156
+
157
+ const hasTrue = answer.includes(TRUE)
158
+ const hasFalse = answer.includes(FALSE)
159
+
160
+ if (!hasTrue && !hasFalse) {
161
+ throw new Error(`The model did not return a valid answer. The response was: ${answer}`)
162
+ }
163
+
164
+ let finalAnswer: boolean
165
+
166
+ if (hasTrue && hasFalse) {
167
+ // If both TRUE and FALSE are present, we need to check which one was answered last
168
+ finalAnswer = answer.lastIndexOf(TRUE) > answer.lastIndexOf(FALSE)
169
+ } else {
170
+ finalAnswer = hasTrue
171
+ }
172
+
173
+ if (taskId) {
174
+ await this.adapter.saveExample({
175
+ key: Key,
176
+ taskType,
177
+ taskId,
178
+ input: inputAsString,
179
+ instructions: condition,
180
+ metadata: output.metadata,
181
+ output: finalAnswer,
182
+ explanation: answer.replace(TRUE, '').replace(FALSE, '').replace(END, '').replace('Final Answer:', '').trim()
183
+ })
184
+ }
185
+
186
+ return finalAnswer
187
+ }
@@ -0,0 +1,2 @@
1
+ export const PROMPT_INPUT_BUFFER = 1048
2
+ export const PROMPT_OUTPUT_BUFFER = 512
@@ -0,0 +1,9 @@
1
+ export class JsonParsingError extends Error {
2
+ constructor(
3
+ public json: unknown,
4
+ public error: Error
5
+ ) {
6
+ const message = `Error parsing JSON:\n\n---JSON---\n${json}\n\n---Error---\n\n ${error}`
7
+ super(message)
8
+ }
9
+ }
@@ -0,0 +1,291 @@
1
+ import { z } from '@bpinternal/zui'
2
+
3
+ import JSON5 from 'json5'
4
+ import { jsonrepair } from 'jsonrepair'
5
+
6
+ import { chunk, isArray } from 'lodash-es'
7
+ import { fastHash, stringify, takeUntilTokens } from '../utils'
8
+ import { Zai } from '../zai'
9
+ import { PROMPT_INPUT_BUFFER } from './constants'
10
+ import { JsonParsingError } from './errors'
11
+
12
+ export type Options = z.input<typeof Options>
13
+ const Options = z.object({
14
+ instructions: z.string().optional().describe('Instructions to guide the user on how to extract the data'),
15
+ chunkLength: z
16
+ .number()
17
+ .min(100)
18
+ .max(100_000)
19
+ .optional()
20
+ .describe('The maximum number of tokens per chunk')
21
+ .default(16_000)
22
+ })
23
+
24
+ declare module '@botpress/zai' {
25
+ interface Zai {
26
+ /** Extracts one or many elements from an arbitrary input */
27
+ extract<S extends z.AnyZodObject>(input: unknown, schema: S, options?: Options): Promise<z.infer<S>>
28
+ extract<S extends z.AnyZodObject>(
29
+ input: unknown,
30
+ schema: z.ZodArray<S>,
31
+ options?: Options
32
+ ): Promise<Array<z.infer<S>>>
33
+ }
34
+ }
35
+
36
+ const START = '■json_start■'
37
+ const END = '■json_end■'
38
+ const NO_MORE = '■NO_MORE_ELEMENT■'
39
+
40
+ Zai.prototype.extract = async function (this: Zai, input, schema, _options) {
41
+ const options = Options.parse(_options ?? {})
42
+ const tokenizer = await this.getTokenizer()
43
+
44
+ const taskId = this.taskId
45
+ const taskType = 'zai.extract'
46
+
47
+ const PROMPT_COMPONENT = Math.max(this.Model.input.maxTokens - PROMPT_INPUT_BUFFER, 100)
48
+
49
+ let isArrayOfObjects = false
50
+ const originalSchema = schema
51
+
52
+ if (schema instanceof z.ZodObject) {
53
+ // Do nothing
54
+ } else if (schema instanceof z.ZodArray) {
55
+ if (schema._def.type instanceof z.ZodObject) {
56
+ isArrayOfObjects = true
57
+ schema = schema._def.type
58
+ } else {
59
+ throw new Error('Schema must be a ZodObject or a ZodArray<ZodObject>')
60
+ }
61
+ } else {
62
+ throw new Error('Schema must be either a ZuiObject or a ZuiArray<ZuiObject>')
63
+ }
64
+
65
+ const schemaTypescript = schema.toTypescript({ declaration: false })
66
+ const schemaLength = tokenizer.count(schemaTypescript)
67
+
68
+ options.chunkLength = Math.min(options.chunkLength, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - schemaLength)
69
+
70
+ const keys = Object.keys(schema.shape)
71
+
72
+ let inputAsString = stringify(input)
73
+
74
+ if (tokenizer.count(inputAsString) > options.chunkLength) {
75
+ // If we want to extract an array of objects, we will run this function recursively
76
+ if (isArrayOfObjects) {
77
+ const tokens = tokenizer.split(inputAsString)
78
+ const chunks = chunk(tokens, options.chunkLength).map((x) => x.join(''))
79
+ const all = await Promise.all(chunks.map((chunk) => this.extract(chunk, originalSchema as z.AnyZodObject)))
80
+
81
+ return all.flat()
82
+ } else {
83
+ // Truncate the input to fit the model's input size
84
+ inputAsString = tokenizer.truncate(stringify(input), options.chunkLength)
85
+ }
86
+ }
87
+
88
+ const instructions: string[] = []
89
+
90
+ if (options.instructions) {
91
+ instructions.push(options.instructions)
92
+ }
93
+
94
+ const shape = `{ ${keys.map((key) => `"${key}": ...`).join(', ')} }`
95
+ const abbv = '{ ... }'
96
+
97
+ if (isArrayOfObjects) {
98
+ instructions.push('You may have multiple elements, or zero elements in the input.')
99
+ instructions.push('You must extract each element separately.')
100
+ instructions.push(`Each element must be a JSON object with exactly the format: ${START}${shape}${END}`)
101
+ instructions.push(`When you are done extracting all elements, type "${NO_MORE}" to finish.`)
102
+ instructions.push(`For example, if you have zero elements, the output should look like this: ${NO_MORE}`)
103
+ instructions.push(
104
+ `For example, if you have two elements, the output should look like this: ${START}${abbv}${END}${START}${abbv}${END}${NO_MORE}`
105
+ )
106
+ } else {
107
+ instructions.push('You may have exactly one element in the input.')
108
+ instructions.push(`The element must be a JSON object with exactly the format: ${START}${shape}${END}`)
109
+ }
110
+
111
+ // All tokens remaining after the input and condition are accounted can be used for examples
112
+ const EXAMPLES_TOKENS = PROMPT_COMPONENT - tokenizer.count(inputAsString) - tokenizer.count(instructions.join('\n'))
113
+
114
+ const Key = fastHash(
115
+ JSON.stringify({
116
+ taskType,
117
+ taskId,
118
+ input: inputAsString,
119
+ instructions: options.instructions
120
+ })
121
+ )
122
+
123
+ const examples = taskId
124
+ ? await this.adapter.getExamples<string, unknown>({
125
+ input: inputAsString,
126
+ taskType,
127
+ taskId
128
+ })
129
+ : []
130
+
131
+ const exactMatch = examples.find((x) => x.key === Key)
132
+ if (exactMatch) {
133
+ return exactMatch.output
134
+ }
135
+
136
+ const defaultExample = isArrayOfObjects
137
+ ? {
138
+ input: `The story goes as follow.
139
+ Once upon a time, there was a person named Alice who was 30 years old.
140
+ Then, there was a person named Bob who was 25 years old.
141
+ The end.`,
142
+ schema: 'Array<{ name: string, age: number }>',
143
+ instructions: 'Extract all people',
144
+ extracted: [
145
+ {
146
+ name: 'Alice',
147
+ age: 30
148
+ },
149
+ {
150
+ name: 'Bob',
151
+ age: 25
152
+ }
153
+ ]
154
+ }
155
+ : {
156
+ input: `The story goes as follow.
157
+ Once upon a time, there was a person named Alice who was 30 years old.
158
+ The end.`,
159
+ schema: '{ name: string, age: number }',
160
+ instructions: 'Extract the person',
161
+ extracted: { name: 'Alice', age: 30 }
162
+ }
163
+
164
+ const userExamples = examples.map((e) => ({
165
+ input: e.input,
166
+ extracted: e.output,
167
+ schema: schemaTypescript,
168
+ instructions: options.instructions
169
+ }))
170
+
171
+ let exampleId = 1
172
+
173
+ const formatInput = (input: string, schema: string, instructions?: string) => {
174
+ const header = userExamples.length
175
+ ? `Expert Example #${exampleId++}`
176
+ : "Here's an example to help you understand the format:"
177
+
178
+ return `
179
+ ${header}
180
+
181
+ <|start_schema|>
182
+ ${schema}
183
+ <|end_schema|>
184
+
185
+ <|start_instructions|>
186
+ ${instructions ?? 'No specific instructions, just follow the schema above.'}
187
+ <|end_instructions|>
188
+
189
+ <|start_input|>
190
+ ${input.trim()}
191
+ <|end_input|>
192
+ `.trim()
193
+ }
194
+
195
+ const formatOutput = (extracted: any) => {
196
+ extracted = isArray(extracted) ? extracted : [extracted]
197
+
198
+ return (
199
+ extracted
200
+ .map((x: string) =>
201
+ `
202
+ ${START}
203
+ ${JSON.stringify(x, null, 2)}
204
+ ${END}`.trim()
205
+ )
206
+ .join('\n') + NO_MORE
207
+ )
208
+ }
209
+
210
+ const formatExample = (example: { input?: any; schema: string; instructions?: string; extracted: any }) => [
211
+ {
212
+ type: 'text' as const,
213
+ content: formatInput(stringify(example.input ?? null), example.schema, example.instructions),
214
+ role: 'user' as const
215
+ },
216
+ {
217
+ type: 'text' as const,
218
+ content: formatOutput(example.extracted),
219
+ role: 'assistant' as const
220
+ }
221
+ ]
222
+
223
+ const allExamples = takeUntilTokens(
224
+ userExamples.length ? userExamples : [defaultExample],
225
+ EXAMPLES_TOKENS,
226
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.extracted))
227
+ )
228
+ .map(formatExample)
229
+ .flat()
230
+
231
+ const output = await this.callModel({
232
+ systemPrompt: `
233
+ Extract the following information from the input:
234
+ ${schemaTypescript}
235
+ ====
236
+
237
+ ${instructions.map((x) => `• ${x}`).join('\n')}
238
+ `.trim(),
239
+ stopSequences: [isArrayOfObjects ? NO_MORE : END],
240
+ messages: [
241
+ ...allExamples,
242
+ {
243
+ role: 'user',
244
+ type: 'text',
245
+ content: formatInput(inputAsString, schemaTypescript, options.instructions ?? '')
246
+ }
247
+ ]
248
+ })
249
+
250
+ const answer = output.choices[0]?.content as string
251
+
252
+ const elements = answer
253
+ .split(START)
254
+ .filter((x) => x.trim().length > 0)
255
+ .map((x) => {
256
+ try {
257
+ const json = x.slice(0, x.indexOf(END)).trim()
258
+ const repairedJson = jsonrepair(json)
259
+ const parsedJson = JSON5.parse(repairedJson)
260
+
261
+ return schema.parse(parsedJson)
262
+ } catch (error) {
263
+ throw new JsonParsingError(x, error instanceof Error ? error : new Error('Unknown error'))
264
+ }
265
+ })
266
+ .filter((x) => x !== null)
267
+
268
+ let final: any
269
+
270
+ if (isArrayOfObjects) {
271
+ final = elements
272
+ } else if (elements.length === 0) {
273
+ final = schema.parse({})
274
+ } else {
275
+ final = elements[0]
276
+ }
277
+
278
+ if (taskId) {
279
+ await this.adapter.saveExample({
280
+ key: Key,
281
+ taskId: `zai/${taskId}`,
282
+ taskType,
283
+ instructions: options.instructions ?? 'No specific instructions',
284
+ input: inputAsString,
285
+ output: final,
286
+ metadata: output.metadata
287
+ })
288
+ }
289
+
290
+ return final
291
+ }