@botpress/zai 1.0.0-beta.8 → 1.0.1-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/csj/adapters/adapter.js +2 -0
  2. package/dist/csj/adapters/botpress-table.js +173 -0
  3. package/dist/csj/adapters/memory.js +12 -0
  4. package/dist/csj/index.js +9 -0
  5. package/dist/csj/models.js +387 -0
  6. package/dist/csj/operations/check.js +143 -0
  7. package/dist/csj/operations/constants.js +2 -0
  8. package/dist/csj/operations/errors.js +15 -0
  9. package/dist/csj/operations/extract.js +214 -0
  10. package/dist/csj/operations/filter.js +182 -0
  11. package/dist/csj/operations/label.js +242 -0
  12. package/dist/csj/operations/rewrite.js +113 -0
  13. package/dist/csj/operations/summarize.js +134 -0
  14. package/dist/csj/operations/text.js +48 -0
  15. package/dist/csj/utils.js +44 -0
  16. package/dist/csj/zai.js +142 -0
  17. package/dist/esm/adapters/adapter.js +5 -0
  18. package/dist/esm/adapters/botpress-table.js +194 -0
  19. package/dist/esm/adapters/memory.js +15 -0
  20. package/dist/esm/index.js +11 -0
  21. package/dist/esm/models.js +390 -0
  22. package/dist/esm/operations/check.js +149 -0
  23. package/dist/esm/operations/constants.js +6 -0
  24. package/dist/esm/operations/errors.js +18 -0
  25. package/dist/esm/operations/extract.js +217 -0
  26. package/dist/esm/operations/filter.js +189 -0
  27. package/dist/esm/operations/label.js +246 -0
  28. package/dist/esm/operations/rewrite.js +113 -0
  29. package/dist/esm/operations/summarize.js +134 -0
  30. package/dist/esm/operations/text.js +48 -0
  31. package/dist/esm/utils.js +51 -0
  32. package/dist/esm/zai.js +161 -0
  33. package/package.json +17 -13
  34. package/scripts/update-models.mts +76 -0
  35. package/scripts/update-types.mts +59 -0
  36. package/src/adapters/adapter.ts +35 -0
  37. package/src/adapters/botpress-table.ts +214 -0
  38. package/src/adapters/memory.ts +13 -0
  39. package/src/index.ts +11 -0
  40. package/src/models.ts +394 -0
  41. package/src/operations/__tests/botpress_docs.txt +26040 -0
  42. package/src/operations/__tests/cache.jsonl +101 -0
  43. package/src/operations/__tests/index.ts +86 -0
  44. package/src/operations/check.ts +188 -0
  45. package/src/operations/constants.ts +2 -0
  46. package/src/operations/errors.ts +9 -0
  47. package/src/operations/extract.ts +292 -0
  48. package/src/operations/filter.ts +232 -0
  49. package/src/operations/label.ts +333 -0
  50. package/src/operations/rewrite.ts +149 -0
  51. package/src/operations/summarize.ts +194 -0
  52. package/src/operations/text.ts +64 -0
  53. package/src/sdk-interfaces/llm/generateContent.ts +127 -0
  54. package/src/sdk-interfaces/llm/listLanguageModels.ts +19 -0
  55. package/src/utils.ts +62 -0
  56. package/src/zai.ts +193 -0
  57. package/dist/index.cjs +0 -1903
  58. package/dist/index.cjs.map +0 -1
  59. package/dist/index.d.cts +0 -916
  60. package/dist/index.d.ts +0 -916
  61. package/dist/index.js +0 -1873
  62. package/dist/index.js.map +0 -1
@@ -0,0 +1,232 @@
1
+ import sdk from '@botpress/sdk'
2
+ const { z } = sdk
3
+
4
+ import _ from 'lodash'
5
+ import { fastHash, stringify, takeUntilTokens } from '../utils'
6
+ import { Zai } from '../zai'
7
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
8
+
9
+ type Example = sdk.z.input<typeof Example>
10
+ const Example = z.object({
11
+ input: z.any(),
12
+ filter: z.boolean(),
13
+ reason: z.string().optional()
14
+ })
15
+
16
+ export type Options = sdk.z.input<typeof Options>
17
+ const Options = z.object({
18
+ tokensPerItem: z
19
+ .number()
20
+ .min(1)
21
+ .max(100_000)
22
+ .optional()
23
+ .describe('The maximum number of tokens per item')
24
+ .default(250),
25
+ examples: z.array(Example).describe('Examples to filter the condition against').default([])
26
+ })
27
+
28
+ declare module '@botpress/zai' {
29
+ interface Zai {
30
+ /** Filters elements of an array against a condition */
31
+ filter<T>(input: Array<T>, condition: string, options?: Options): Promise<Array<T>>
32
+ }
33
+ }
34
+
35
+ const END = '■END■'
36
+
37
+ Zai.prototype.filter = async function (this: Zai, input, condition, _options) {
38
+ const options = Options.parse(_options ?? {})
39
+ const tokenizer = await this.getTokenizer()
40
+
41
+ const taskId = this.taskId
42
+ const taskType = 'zai.filter'
43
+
44
+ const MAX_ITEMS_PER_CHUNK = 50
45
+ const TOKENS_TOTAL_MAX = this.Model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER
46
+ const TOKENS_EXAMPLES_MAX = Math.floor(Math.max(250, TOKENS_TOTAL_MAX * 0.5))
47
+ const TOKENS_CONDITION_MAX = _.clamp(TOKENS_TOTAL_MAX * 0.25, 250, tokenizer.count(condition))
48
+ const TOKENS_INPUT_ARRAY_MAX = TOKENS_TOTAL_MAX - TOKENS_EXAMPLES_MAX - TOKENS_CONDITION_MAX
49
+
50
+ condition = tokenizer.truncate(condition, TOKENS_CONDITION_MAX)
51
+
52
+ let chunks: Array<typeof input> = []
53
+ let currentChunk: typeof input = []
54
+ let currentChunkTokens = 0
55
+
56
+ for (const element of input) {
57
+ const elementAsString = tokenizer.truncate(stringify(element, false), options.tokensPerItem)
58
+ const elementTokens = tokenizer.count(elementAsString)
59
+
60
+ if (currentChunkTokens + elementTokens > TOKENS_INPUT_ARRAY_MAX || currentChunk.length >= MAX_ITEMS_PER_CHUNK) {
61
+ chunks.push(currentChunk)
62
+ currentChunk = []
63
+ currentChunkTokens = 0
64
+ }
65
+
66
+ currentChunk.push(element)
67
+ currentChunkTokens += elementTokens
68
+ }
69
+
70
+ if (currentChunk.length > 0) {
71
+ chunks.push(currentChunk)
72
+ }
73
+
74
+ chunks = chunks.filter((x) => x.length > 0)
75
+
76
+ // ■1:true■2:true■3:true
77
+
78
+ const formatInput = (input: Example[], condition: string) => {
79
+ return `
80
+ Condition to check:
81
+ ${condition}
82
+
83
+ Items (from ■0 to ■${input.length - 1})
84
+ ==============================
85
+ ${input.map((x, idx) => `■${idx} = ${stringify(x.input ?? null, false)}`).join('\n')}
86
+ `.trim()
87
+ }
88
+
89
+ const formatExamples = (examples: Example[]) => {
90
+ return `
91
+ ${examples.map((x, idx) => `■${idx}:${!!x.filter ? 'true' : 'false'}`).join('')}
92
+ ${END}
93
+ ====
94
+ Here's the reasoning behind each example:
95
+ ${examples.map((x, idx) => `■${idx}:${!!x.filter ? 'true' : 'false'}:${x.reason ?? 'No reason provided'}`).join('\n')}
96
+ `.trim()
97
+ }
98
+
99
+ const genericExamples: Example[] = [
100
+ {
101
+ input: 'apple',
102
+ filter: true,
103
+ reason: 'Apples are fruits'
104
+ },
105
+ {
106
+ input: 'Apple Inc.',
107
+ filter: false,
108
+ reason: 'Apple Inc. is a company, not a fruit'
109
+ },
110
+ {
111
+ input: 'banana',
112
+ filter: true,
113
+ reason: 'Bananas are fruits'
114
+ },
115
+ {
116
+ input: 'potato',
117
+ filter: false,
118
+ reason: 'Potatoes are vegetables'
119
+ }
120
+ ]
121
+
122
+ const genericExamplesMessages = [
123
+ {
124
+ type: 'text' as const,
125
+ content: formatInput(genericExamples, 'is a fruit'),
126
+ role: 'user' as const
127
+ },
128
+ {
129
+ type: 'text' as const,
130
+ content: formatExamples(genericExamples),
131
+ role: 'assistant' as const
132
+ }
133
+ ]
134
+
135
+ const filterChunk = async (chunk: typeof input) => {
136
+ const examples = taskId
137
+ ? await this.adapter
138
+ .getExamples<string, unknown>({
139
+ // The Table API can't search for a huge input string
140
+ input: JSON.stringify(chunk).slice(0, 1000),
141
+ taskType,
142
+ taskId
143
+ })
144
+ .then((x) =>
145
+ x.map((y) => ({ filter: y.output as boolean, input: y.input, reason: y.explanation } satisfies Example))
146
+ )
147
+ : []
148
+
149
+ const allExamples = takeUntilTokens([...examples, ...(options.examples ?? [])], TOKENS_EXAMPLES_MAX, (el) =>
150
+ tokenizer.count(stringify(el.input))
151
+ )
152
+
153
+ const exampleMessages = [
154
+ {
155
+ type: 'text' as const,
156
+ content: formatInput(allExamples, condition),
157
+ role: 'user' as const
158
+ },
159
+ {
160
+ type: 'text' as const,
161
+ content: formatExamples(allExamples),
162
+ role: 'assistant' as const
163
+ }
164
+ ]
165
+
166
+ const output = await this.callModel({
167
+ systemPrompt: `
168
+ You are given a list of items. Your task is to filter out the items that meet the condition below.
169
+ You need to return the full list of items with the format:
170
+ ■x:true■y:false■z:true (where x, y, z are the indices of the items in the list)
171
+ You need to start with "■0" and go up to the last index "■${chunk.length - 1}".
172
+ If an item meets the condition, you should return ":true", otherwise ":false".
173
+
174
+ IMPORTANT: Make sure to read the condition and the examples carefully before making your decision.
175
+ The condition is: "${condition}"
176
+ `.trim(),
177
+ stopSequences: [END],
178
+ messages: [
179
+ ...(exampleMessages.length ? exampleMessages : genericExamplesMessages),
180
+ {
181
+ type: 'text',
182
+ content: formatInput(
183
+ chunk.map((x) => ({ input: x } as Example)),
184
+ condition
185
+ ),
186
+ role: 'user'
187
+ }
188
+ ]
189
+ })
190
+
191
+ const answer = output.choices[0]?.content as string
192
+ const indices = answer
193
+ .trim()
194
+ .split('■')
195
+ .filter((x) => x.length > 0)
196
+ .map((x) => {
197
+ const [idx, filter] = x.split(':')
198
+ return { idx: parseInt(idx?.trim() ?? ''), filter: filter?.toLowerCase().trim() === 'true' }
199
+ })
200
+
201
+ const partial = chunk.filter((_, idx) => {
202
+ return indices.find((x) => x.idx === idx)?.filter ?? false
203
+ })
204
+
205
+ if (taskId) {
206
+ const key = fastHash(
207
+ stringify({
208
+ taskId,
209
+ taskType,
210
+ input: JSON.stringify(chunk),
211
+ condition
212
+ })
213
+ )
214
+
215
+ await this.adapter.saveExample({
216
+ key,
217
+ taskType,
218
+ taskId,
219
+ input: JSON.stringify(chunk),
220
+ output: partial,
221
+ instructions: condition,
222
+ metadata: output.metadata
223
+ })
224
+ }
225
+
226
+ return partial
227
+ }
228
+
229
+ const filteredChunks = await Promise.all(chunks.map(filterChunk))
230
+
231
+ return filteredChunks.flat()
232
+ }
@@ -0,0 +1,333 @@
1
+ import sdk from '@botpress/sdk'
2
+ const { z } = sdk
3
+
4
+ import _ from 'lodash'
5
+ import { fastHash, stringify, takeUntilTokens } from '../utils'
6
+ import { Zai } from '../zai'
7
+ import { PROMPT_INPUT_BUFFER } from './constants'
8
+
9
+ type Label = keyof typeof LABELS
10
+ const LABELS = {
11
+ ABSOLUTELY_NOT: 'ABSOLUTELY_NOT',
12
+ PROBABLY_NOT: 'PROBABLY_NOT',
13
+ AMBIGUOUS: 'AMBIGUOUS',
14
+ PROBABLY_YES: 'PROBABLY_YES',
15
+ ABSOLUTELY_YES: 'ABSOLUTELY_YES'
16
+ } as const
17
+ const ALL_LABELS = Object.values(LABELS).join(' | ')
18
+
19
+ type Example<T extends string> = {
20
+ input: unknown
21
+ labels: Partial<Record<T, { label: Label; explanation?: string }>>
22
+ }
23
+
24
+ export type Options<T extends string> = Omit<sdk.z.input<typeof Options>, 'examples'> & {
25
+ examples?: Array<Partial<Example<T>>>
26
+ }
27
+
28
+ const Options = z.object({
29
+ examples: z
30
+ .array(
31
+ z.object({
32
+ input: z.any(),
33
+ labels: z.record(z.object({ label: z.enum(ALL_LABELS as never), explanation: z.string().optional() }))
34
+ })
35
+ )
36
+ .default([])
37
+ .describe('Examples to help the user make a decision'),
38
+ instructions: z.string().optional().describe('Instructions to guide the user on how to extract the data'),
39
+ chunkLength: z
40
+ .number()
41
+ .min(100)
42
+ .max(100_000)
43
+ .optional()
44
+ .describe('The maximum number of tokens per chunk')
45
+ .default(16_000)
46
+ })
47
+
48
+ type Labels<T extends string> = Record<T, string>
49
+
50
+ const Labels = z.record(z.string().min(1).max(250), z.string()).superRefine((labels, ctx) => {
51
+ const keys = Object.keys(labels)
52
+
53
+ for (const key of keys) {
54
+ if (key.length < 1 || key.length > 250) {
55
+ ctx.addIssue({ message: `The label key "${key}" must be between 1 and 250 characters long`, code: 'custom' })
56
+ }
57
+
58
+ if (keys.lastIndexOf(key) !== keys.indexOf(key)) {
59
+ ctx.addIssue({ message: `Duplicate label: ${labels[key]}`, code: 'custom' })
60
+ }
61
+
62
+ if (/[^a-zA-Z0-9_]/.test(key)) {
63
+ ctx.addIssue({
64
+ message: `The label key "${key}" must only contain alphanumeric characters and underscores`,
65
+ code: 'custom'
66
+ })
67
+ }
68
+ }
69
+
70
+ return true
71
+ })
72
+
73
+ declare module '@botpress/zai' {
74
+ interface Zai {
75
+ /** Tags the provided input with a list of predefined labels */
76
+ label<T extends string>(
77
+ input: unknown,
78
+ labels: Labels<T>,
79
+ options?: Options<T>
80
+ ): Promise<{
81
+ [K in T]: boolean
82
+ }>
83
+ }
84
+ }
85
+
86
+ const parseLabel = (label: string): Label => {
87
+ label = label.toUpperCase().replace(/\s+/g, '_').replace(/_{2,}/g, '_').trim()
88
+ if (label.includes('ABSOLUTELY') && label.includes('NOT')) {
89
+ return LABELS.ABSOLUTELY_NOT
90
+ } else if (label.includes('NOT')) {
91
+ return LABELS.PROBABLY_NOT
92
+ } else if (label.includes('AMBIGUOUS')) {
93
+ return LABELS.AMBIGUOUS
94
+ }
95
+ if (label.includes('YES')) {
96
+ return LABELS.PROBABLY_YES
97
+ } else if (label.includes('ABSOLUTELY') && label.includes('YES')) {
98
+ return LABELS.ABSOLUTELY_YES
99
+ }
100
+ return LABELS.AMBIGUOUS
101
+ }
102
+
103
+ Zai.prototype.label = async function <T extends string>(this: Zai, input, _labels, _options) {
104
+ const options = Options.parse(_options ?? {})
105
+ const labels = Labels.parse(_labels)
106
+ const tokenizer = await this.getTokenizer()
107
+
108
+ const taskId = this.taskId
109
+ const taskType = 'zai.label'
110
+
111
+ const TOTAL_MAX_TOKENS = _.clamp(options.chunkLength, 1000, this.Model.input.maxTokens - PROMPT_INPUT_BUFFER)
112
+ const CHUNK_EXAMPLES_MAX_TOKENS = _.clamp(Math.floor(TOTAL_MAX_TOKENS * 0.5), 250, 10_000)
113
+ const CHUNK_INPUT_MAX_TOKENS = _.clamp(
114
+ TOTAL_MAX_TOKENS - CHUNK_EXAMPLES_MAX_TOKENS,
115
+ TOTAL_MAX_TOKENS * 0.5,
116
+ TOTAL_MAX_TOKENS
117
+ )
118
+
119
+ const inputAsString = stringify(input)
120
+
121
+ if (tokenizer.count(inputAsString) > CHUNK_INPUT_MAX_TOKENS) {
122
+ const tokens = tokenizer.split(inputAsString)
123
+ const chunks = _.chunk(tokens, CHUNK_INPUT_MAX_TOKENS).map((x) => x.join(''))
124
+ const allLabels = await Promise.all(chunks.map((chunk) => this.label(chunk, _labels)))
125
+
126
+ // Merge all the labels together (those who are true will remain true)
127
+ return allLabels.reduce((acc, x) => {
128
+ Object.keys(x).forEach((key) => {
129
+ if (acc[key] === true) {
130
+ acc[key] = true
131
+ } else {
132
+ acc[key] = acc[key] || x[key]
133
+ }
134
+ })
135
+ return acc
136
+ }, {}) as {
137
+ [K in T]: boolean
138
+ }
139
+ }
140
+
141
+ const END = '■END■'
142
+
143
+ const Key = fastHash(
144
+ JSON.stringify({
145
+ taskType,
146
+ taskId,
147
+ input: inputAsString,
148
+ instructions: options.instructions ?? ''
149
+ })
150
+ )
151
+
152
+ const convertToAnswer = (mapping: { [K in T]: { explanation: string; label: Label } }) => {
153
+ return Object.keys(labels).reduce((acc, key) => {
154
+ acc[key] = mapping[key]?.label === 'ABSOLUTELY_YES' || mapping[key]?.label === 'PROBABLY_YES'
155
+ return acc
156
+ }, {}) as { [K in T]: boolean }
157
+ }
158
+
159
+ const examples = taskId
160
+ ? await this.adapter.getExamples<
161
+ string,
162
+ {
163
+ [K in T]: {
164
+ explanation: string
165
+ label: Label
166
+ }
167
+ }
168
+ >({
169
+ input: inputAsString,
170
+ taskType,
171
+ taskId
172
+ })
173
+ : []
174
+
175
+ options.examples.forEach((example) => {
176
+ examples.push({
177
+ key: fastHash(JSON.stringify(example)),
178
+ input: example.input,
179
+ similarity: 1,
180
+ explanation: '',
181
+ output: example.labels as unknown as {
182
+ [K in T]: {
183
+ explanation: string
184
+ label: Label
185
+ }
186
+ }
187
+ })
188
+ })
189
+
190
+ const exactMatch = examples.find((x) => x.key === Key)
191
+ if (exactMatch) {
192
+ return convertToAnswer(exactMatch.output)
193
+ }
194
+
195
+ const allExamples = takeUntilTokens(
196
+ examples,
197
+ CHUNK_EXAMPLES_MAX_TOKENS,
198
+ (el) =>
199
+ tokenizer.count(stringify(el.input)) +
200
+ tokenizer.count(stringify(el.output)) +
201
+ tokenizer.count(el.explanation ?? '') +
202
+ 100
203
+ )
204
+ .map((example, idx) => [
205
+ {
206
+ type: 'text' as const,
207
+ role: 'user' as const,
208
+ content: `
209
+ Expert Example #${idx + 1}
210
+
211
+ <|start_input|>
212
+ ${stringify(example.input)}
213
+ <|end_input|>`.trim()
214
+ },
215
+ {
216
+ type: 'text' as const,
217
+ role: 'assistant' as const,
218
+ content: `
219
+ Expert Example #${idx + 1}
220
+ ============
221
+ ${Object.keys(example.output)
222
+ .map((key) =>
223
+ `
224
+ ■${key}:【${example.output[key]?.explanation}】:${example.output[key]?.label}■
225
+ `.trim()
226
+ )
227
+ .join('\n')}
228
+ ${END}
229
+ `.trim()
230
+ }
231
+ ])
232
+ .flat()
233
+
234
+ const format = Object.keys(labels)
235
+ .map((key) => {
236
+ return `
237
+ ■${key}:【explanation (where "explanation" is answering the question "${labels[key]}")】:x■ (where x is ${ALL_LABELS})
238
+ `.trim()
239
+ })
240
+ .join('\n\n')
241
+
242
+ const output = await this.callModel({
243
+ stopSequences: [END],
244
+ systemPrompt: `
245
+ You need to tag the input with the following labels based on the question asked:
246
+ ${LABELS.ABSOLUTELY_NOT}: You are absolutely sure that the answer is "NO" to the question.
247
+ ${LABELS.PROBABLY_NOT}: You are leaning towards "NO" to the question.
248
+ ${LABELS.AMBIGUOUS}: You are unsure about the answer to the question.
249
+ ${LABELS.PROBABLY_YES}: You are leaning towards "YES" to the question.
250
+ ${LABELS.ABSOLUTELY_YES}: You are absolutely sure that the answer is "YES" to the question.
251
+
252
+ You need to return a mapping of the labels, an explanation and the answer for each label following the format below:
253
+ \`\`\`
254
+ ${format}
255
+ ${END}
256
+ \`\`\`
257
+
258
+ ${options.instructions}
259
+
260
+ ===
261
+ You should consider the Expert Examples below to help you make your decision.
262
+ In your "Analysis", please refer to the Expert Examples # to justify your decision.
263
+ `.trim(),
264
+ messages: [
265
+ ...allExamples,
266
+ {
267
+ type: 'text',
268
+ role: 'user',
269
+ content: `
270
+ Input to tag:
271
+ <|start_input|>
272
+ ${inputAsString}
273
+ <|end_input|>
274
+
275
+ Answer with this following format:
276
+ \`\`\`
277
+ ${format}
278
+ ${END}
279
+ \`\`\`
280
+
281
+ Format cheatsheet:
282
+ \`\`\`
283
+ ■label:【explanation】:x■
284
+ \`\`\`
285
+
286
+ Where \`x\` is one of the following: ${ALL_LABELS}
287
+
288
+ Remember: In your \`explanation\`, please refer to the Expert Examples # (and quote them) that are relevant to ground your decision-making process.
289
+ The Expert Examples are there to help you make your decision. They have been provided by experts in the field and their answers (and reasoning) are considered the ground truth and should be used as a reference to make your decision when applicable.
290
+ For example, you can say: "According to Expert Example #1, ..."`.trim()
291
+ }
292
+ ]
293
+ })
294
+
295
+ const answer = output.choices[0].content as string
296
+
297
+ const final = Object.keys(labels).reduce((acc, key) => {
298
+ const match = answer.match(new RegExp(`■${key}:【(.+)】:(\\w{2,})■`, 'i'))
299
+ if (match) {
300
+ const explanation = match[1].trim()
301
+ const label = parseLabel(match[2])
302
+ acc[key] = {
303
+ explanation,
304
+ label
305
+ }
306
+ } else {
307
+ acc[key] = {
308
+ explanation: '',
309
+ label: LABELS.AMBIGUOUS
310
+ }
311
+ }
312
+ return acc
313
+ }, {}) as {
314
+ [K in T]: {
315
+ explanation: string
316
+ label: Label
317
+ }
318
+ }
319
+
320
+ if (taskId) {
321
+ await this.adapter.saveExample({
322
+ key: Key,
323
+ taskType,
324
+ taskId,
325
+ instructions: options.instructions ?? '',
326
+ metadata: output.metadata,
327
+ input: inputAsString,
328
+ output: final
329
+ })
330
+ }
331
+
332
+ return convertToAnswer(final)
333
+ }
@@ -0,0 +1,149 @@
1
+ import sdk from '@botpress/sdk'
2
+ const { z } = sdk
3
+
4
+ import { fastHash, stringify, takeUntilTokens } from '../utils'
5
+ import { Zai } from '../zai'
6
+ import { PROMPT_INPUT_BUFFER } from './constants'
7
+
8
+ type Example = sdk.z.input<typeof Example> & { instructions?: string }
9
+ const Example = z.object({
10
+ input: z.string(),
11
+ output: z.string()
12
+ })
13
+
14
+ export type Options = sdk.z.input<typeof Options>
15
+ const Options = z.object({
16
+ examples: z.array(Example).default([]),
17
+ length: z.number().min(10).max(16_000).optional().describe('The maximum number of tokens to generate')
18
+ })
19
+
20
+ declare module '@botpress/zai' {
21
+ interface Zai {
22
+ /** Rewrites a string according to match the prompt */
23
+ rewrite(original: string, prompt: string, options?: Options): Promise<string>
24
+ }
25
+ }
26
+
27
+ const START = '■START■'
28
+ const END = '■END■'
29
+
30
+ Zai.prototype.rewrite = async function (this: Zai, original, prompt, _options) {
31
+ const options = Options.parse(_options ?? {})
32
+ const tokenizer = await this.getTokenizer()
33
+
34
+ const taskId = this.taskId
35
+ const taskType = 'zai.rewrite'
36
+
37
+ const INPUT_COMPONENT_SIZE = Math.max(100, (this.Model.input.maxTokens - PROMPT_INPUT_BUFFER) / 2)
38
+ prompt = tokenizer.truncate(prompt, INPUT_COMPONENT_SIZE)
39
+
40
+ const inputSize = tokenizer.count(original) + tokenizer.count(prompt)
41
+ const maxInputSize = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
42
+ if (inputSize > maxInputSize) {
43
+ throw new Error(
44
+ `The input size is ${inputSize} tokens long, which is more than the maximum of ${maxInputSize} tokens for this model (${this.Model.name} = ${this.Model.input.maxTokens} tokens)`
45
+ )
46
+ }
47
+
48
+ const instructions: string[] = []
49
+
50
+ const originalSize = tokenizer.count(original)
51
+ if (options.length && originalSize > options.length) {
52
+ instructions.push(`The original text is ${originalSize} tokens long – it should be less than ${options.length}`)
53
+ instructions.push(
54
+ `The text must be standalone and complete in less than ${options.length} tokens, so it has to be shortened to fit the length as well`
55
+ )
56
+ }
57
+
58
+ const format = (before: string, prompt: string) => {
59
+ return `
60
+ Prompt: ${prompt}
61
+
62
+ ${START}
63
+ ${before}
64
+ ${END}
65
+ `.trim()
66
+ }
67
+
68
+ const Key = fastHash(
69
+ stringify({
70
+ taskId,
71
+ taskType,
72
+ input: original,
73
+ prompt
74
+ })
75
+ )
76
+
77
+ const formatExample = ({ input, output, instructions }: Example) => {
78
+ return [
79
+ { type: 'text' as const, role: 'user' as const, content: format(input, instructions || prompt) },
80
+ { type: 'text' as const, role: 'assistant' as const, content: `${START}${output}${END}` }
81
+ ]
82
+ }
83
+
84
+ const defaultExamples: Example[] = [
85
+ { input: 'Hello, how are you?', output: 'Bonjour, comment ça va?', instructions: 'translate to French' },
86
+ { input: '1\n2\n3', output: '3\n2\n1', instructions: 'reverse the order' }
87
+ ]
88
+
89
+ const tableExamples = taskId
90
+ ? await this.adapter.getExamples<string, string>({
91
+ input: original,
92
+ taskId,
93
+ taskType
94
+ })
95
+ : []
96
+
97
+ const exactMatch = tableExamples.find((x) => x.key === Key)
98
+ if (exactMatch) {
99
+ return exactMatch.output
100
+ }
101
+
102
+ const savedExamples: Example[] = [
103
+ ...tableExamples.map((x) => ({ input: x.input as string, output: x.output as string })),
104
+ ...options.examples
105
+ ]
106
+
107
+ const REMAINING_TOKENS = this.Model.input.maxTokens - tokenizer.count(prompt) - PROMPT_INPUT_BUFFER
108
+ const examples = takeUntilTokens(
109
+ savedExamples.length ? savedExamples : defaultExamples,
110
+ REMAINING_TOKENS,
111
+ (el) => tokenizer.count(stringify(el.input)) + tokenizer.count(stringify(el.output))
112
+ )
113
+ .map(formatExample)
114
+ .flat()
115
+
116
+ const output = await this.callModel({
117
+ systemPrompt: `
118
+ Rewrite the text between the ${START} and ${END} tags to match the user prompt.
119
+ ${instructions.map((x) => `• ${x}`).join('\n')}
120
+ `.trim(),
121
+ messages: [...examples, { type: 'text', content: format(original, prompt), role: 'user' }],
122
+ maxTokens: options.length,
123
+ stopSequences: [END]
124
+ })
125
+
126
+ let result = output.choices[0]?.content as string
127
+
128
+ if (result.includes(START)) {
129
+ result = result.slice(result.indexOf(START) + START.length)
130
+ }
131
+
132
+ if (result.includes(END)) {
133
+ result = result.slice(0, result.indexOf(END))
134
+ }
135
+
136
+ if (taskId) {
137
+ await this.adapter.saveExample({
138
+ key: Key,
139
+ metadata: output.metadata,
140
+ instructions: prompt,
141
+ input: original,
142
+ output: result,
143
+ taskType,
144
+ taskId
145
+ })
146
+ }
147
+
148
+ return result
149
+ }