@botpress/zai 2.4.2 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,656 @@
1
+ // eslint-disable consistent-type-definitions
2
+ import { z } from '@bpinternal/zui'
3
+ import pLimit from 'p-limit'
4
+
5
+ import { ZaiContext } from '../context'
6
+ import { Micropatch } from '../micropatch'
7
+ import { Response } from '../response'
8
+ import { getTokenizer } from '../tokenizer'
9
+ import { fastHash, stringify } from '../utils'
10
+ import { Zai } from '../zai'
11
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from './constants'
12
+
13
+ /**
14
+ * Represents a file to be patched
15
+ */
16
+ export type File = {
17
+ /** The file path (e.g., 'src/components/Button.tsx') */
18
+ path: string
19
+ /** The file name (e.g., 'Button.tsx') */
20
+ name: string
21
+ /** The file content */
22
+ content: string
23
+ /** The patch operations that were applied (only present in output) */
24
+ patch?: string
25
+ }
26
+
27
+ const _File = z.object({
28
+ path: z.string(),
29
+ name: z.string(),
30
+ content: z.string(),
31
+ })
32
+
33
+ export type Options = {
34
+ /**
35
+ * Maximum tokens per chunk when processing large files or many files.
36
+ * If a single file exceeds this limit, it will be split into chunks.
37
+ * If all files together exceed this limit, they will be processed in batches.
38
+ * If not specified, all files must fit in a single prompt.
39
+ */
40
+ maxTokensPerChunk?: number
41
+ }
42
+
43
+ const Options = z.object({
44
+ maxTokensPerChunk: z.number().optional(),
45
+ })
46
+
47
+ declare module '@botpress/zai' {
48
+ interface Zai {
49
+ /**
50
+ * Patches files based on natural language instructions using the micropatch protocol.
51
+ *
52
+ * This operation takes an array of files and instructions, then returns the modified files.
53
+ * It uses a token-efficient line-based patching protocol (micropatch) that allows precise
54
+ * modifications without regenerating entire files.
55
+ *
56
+ * @param files - Array of files to patch, each with path, name, and content
57
+ * @param instructions - Natural language instructions describing what changes to make
58
+ * @param options - Optional configuration for patch generation
59
+ * @returns Response promise resolving to array of patched files
60
+ *
61
+ * @example Simple text replacement
62
+ * ```typescript
63
+ * const files = [{
64
+ * path: 'src/hello.ts',
65
+ * name: 'hello.ts',
66
+ * content: 'console.log("Hello World")'
67
+ * }]
68
+ *
69
+ * const patched = await zai.patch(
70
+ * files,
71
+ * 'change the message to say "Hi World"'
72
+ * )
73
+ * // patched[0].content contains: console.log("Hi World")
74
+ * // patched[0].patch contains: ◼︎=1|console.log("Hi World")
75
+ * ```
76
+ *
77
+ * @example Adding documentation
78
+ * ```typescript
79
+ * const files = [{
80
+ * path: 'src/utils.ts',
81
+ * name: 'utils.ts',
82
+ * content: 'export function add(a: number, b: number) {\n return a + b\n}'
83
+ * }]
84
+ *
85
+ * const patched = await zai.patch(
86
+ * files,
87
+ * 'add JSDoc comments to all exported functions'
88
+ * )
89
+ * ```
90
+ *
91
+ * @example Patching multiple files
92
+ * ```typescript
93
+ * const files = [
94
+ * { path: 'package.json', name: 'package.json', content: '...' },
95
+ * { path: 'config.json', name: 'config.json', content: '...' }
96
+ * ]
97
+ *
98
+ * const patched = await zai.patch(
99
+ * files,
100
+ * 'update version to 2.0.0 in all config files'
101
+ * )
102
+ * ```
103
+ *
104
+ * @example Refactoring code
105
+ * ```typescript
106
+ * const files = [{
107
+ * path: 'src/api.ts',
108
+ * name: 'api.ts',
109
+ * content: 'function fetchUser() { ... }'
110
+ * }]
111
+ *
112
+ * const patched = await zai.patch(
113
+ * files,
114
+ * 'convert fetchUser to an async function and add error handling'
115
+ * )
116
+ * ```
117
+ *
118
+ * @example Removing code
119
+ * ```typescript
120
+ * const files = [{
121
+ * path: 'src/legacy.ts',
122
+ * name: 'legacy.ts',
123
+ * content: 'const debug = true\nconsole.log("Debug mode")\nfunction main() {...}'
124
+ * }]
125
+ *
126
+ * const patched = await zai.patch(
127
+ * files,
128
+ * 'remove all debug-related code'
129
+ * )
130
+ * ```
131
+ *
132
+ * @example Inspecting applied patches
133
+ * ```typescript
134
+ * const patched = await zai.patch(files, 'add error handling')
135
+ *
136
+ * // Check what patches were applied
137
+ * for (const file of patched) {
138
+ * if (file.patch) {
139
+ * console.log(`Patches for ${file.path}:`)
140
+ * console.log(file.patch)
141
+ * }
142
+ * }
143
+ * ```
144
+ */
145
+ patch(files: Array<File>, instructions: string, options?: Options): Response<Array<File>>
146
+ }
147
+ }
148
+
149
+ /**
150
+ * Represents a chunk of a file (partial file)
151
+ */
152
+ type FileChunk = {
153
+ path: string
154
+ name: string
155
+ content: string
156
+ startLine: number // 1-based line number where this chunk starts in the original file
157
+ endLine: number // 1-based line number where this chunk ends in the original file
158
+ totalLines: number // Total lines in the complete file
159
+ isPartial: boolean // True if this is a chunk of a larger file
160
+ }
161
+
162
+ /**
163
+ * Represents a batch of files or file chunks to process together
164
+ */
165
+ type ProcessingBatch = {
166
+ items: Array<FileChunk>
167
+ tokenCount: number
168
+ }
169
+
170
+ const patch = async (
171
+ files: Array<File>,
172
+ instructions: string,
173
+ _options: Options | undefined,
174
+ ctx: ZaiContext
175
+ ): Promise<Array<File>> => {
176
+ ctx.controller.signal.throwIfAborted()
177
+
178
+ if (files.length === 0) {
179
+ return []
180
+ }
181
+
182
+ const options = Options.parse(_options ?? {}) as Options
183
+ const tokenizer = await getTokenizer()
184
+ const model = await ctx.getModel()
185
+
186
+ const taskId = ctx.taskId
187
+ const taskType = 'zai.patch'
188
+
189
+ const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER
190
+ const TOKENS_INSTRUCTIONS_MAX = Math.floor(TOKENS_TOTAL_MAX * 0.2)
191
+ const TOKENS_FILES_MAX = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX
192
+
193
+ const truncatedInstructions = tokenizer.truncate(instructions, TOKENS_INSTRUCTIONS_MAX)
194
+
195
+ // Determine max tokens per chunk
196
+ const maxTokensPerChunk = options.maxTokensPerChunk ?? TOKENS_FILES_MAX
197
+
198
+ // Convert files to file chunks (initially full files)
199
+ const fileTokenCounts = files.map((file) => ({
200
+ file,
201
+ tokens: tokenizer.count(file.content),
202
+ lines: file.content.split(/\r?\n/).length,
203
+ }))
204
+
205
+ const totalInputTokens = fileTokenCounts.reduce((sum, f) => sum + f.tokens, 0)
206
+
207
+ /**
208
+ * Split a file into chunks by line ranges
209
+ */
210
+ const splitFileIntoChunks = (file: File, totalLines: number, fileTokens: number): Array<FileChunk> => {
211
+ const lines = file.content.split(/\r?\n/)
212
+ const tokensPerLine = fileTokens / totalLines
213
+ const linesPerChunk = Math.floor(maxTokensPerChunk / tokensPerLine)
214
+
215
+ if (linesPerChunk >= totalLines) {
216
+ // File fits in one chunk
217
+ return [
218
+ {
219
+ path: file.path,
220
+ name: file.name,
221
+ content: file.content,
222
+ startLine: 1,
223
+ endLine: totalLines,
224
+ totalLines,
225
+ isPartial: false,
226
+ },
227
+ ]
228
+ }
229
+
230
+ const chunks: Array<FileChunk> = []
231
+ for (let start = 0; start < totalLines; start += linesPerChunk) {
232
+ const end = Math.min(start + linesPerChunk, totalLines)
233
+ const chunkLines = lines.slice(start, end)
234
+ const chunkContent = chunkLines.join('\n')
235
+
236
+ chunks.push({
237
+ path: file.path,
238
+ name: file.name,
239
+ content: chunkContent,
240
+ startLine: start + 1,
241
+ endLine: end,
242
+ totalLines,
243
+ isPartial: true,
244
+ })
245
+ }
246
+
247
+ return chunks
248
+ }
249
+
250
+ /**
251
+ * Create batches of file chunks that fit within token limits
252
+ */
253
+ const createBatches = (chunks: Array<FileChunk>): Array<ProcessingBatch> => {
254
+ const batches: Array<ProcessingBatch> = []
255
+ let currentBatch: ProcessingBatch = { items: [], tokenCount: 0 }
256
+
257
+ for (const chunk of chunks) {
258
+ const chunkTokens = tokenizer.count(chunk.content)
259
+
260
+ if (currentBatch.tokenCount + chunkTokens > maxTokensPerChunk && currentBatch.items.length > 0) {
261
+ batches.push(currentBatch)
262
+ currentBatch = { items: [], tokenCount: 0 }
263
+ }
264
+
265
+ currentBatch.items.push(chunk)
266
+ currentBatch.tokenCount += chunkTokens
267
+ }
268
+
269
+ if (currentBatch.items.length > 0) {
270
+ batches.push(currentBatch)
271
+ }
272
+
273
+ return batches
274
+ }
275
+
276
+ /**
277
+ * Format file chunks using XML tags with numbered lines
278
+ */
279
+ const formatChunksForInput = (chunks: Array<FileChunk>): string => {
280
+ return chunks
281
+ .map((chunk) => {
282
+ const lines = chunk.content.split(/\r?\n/)
283
+
284
+ // Render with global line numbers
285
+ const numberedView = lines
286
+ .map((line, idx) => {
287
+ const lineNum = chunk.startLine + idx
288
+ return `${String(lineNum).padStart(3, '0')}|${line}`
289
+ })
290
+ .join('\n')
291
+
292
+ const partialNote = chunk.isPartial
293
+ ? ` (PARTIAL: lines ${chunk.startLine}-${chunk.endLine} of ${chunk.totalLines} total lines)`
294
+ : ''
295
+
296
+ return `<FILE path="${chunk.path}" name="${chunk.name}"${partialNote}>
297
+ ${numberedView}
298
+ </FILE>`
299
+ })
300
+ .join('\n\n')
301
+ }
302
+
303
+ /**
304
+ * Parse XML output to extract patches per file
305
+ */
306
+ const parsePatchOutput = (output: string): Map<string, string> => {
307
+ const patchMap = new Map<string, string>()
308
+
309
+ // Match <FILE path="...">...</FILE> blocks
310
+ const fileBlockRegex = /<FILE[^>]*path="([^"]+)"[^>]*>([\s\S]*?)<\/FILE>/g
311
+ let match
312
+
313
+ while ((match = fileBlockRegex.exec(output)) !== null) {
314
+ const filePath = match[1]
315
+ const patchOps = match[2].trim()
316
+ patchMap.set(filePath, patchOps)
317
+ }
318
+
319
+ return patchMap
320
+ }
321
+
322
+ /**
323
+ * Process a single batch of file chunks
324
+ */
325
+ const processBatch = async (batch: ProcessingBatch): Promise<Map<string, string>> => {
326
+ const chunksInput = formatChunksForInput(batch.items)
327
+
328
+ const { extracted } = await ctx.generateContent({
329
+ systemPrompt: getMicropatchSystemPrompt(),
330
+ messages: [
331
+ {
332
+ type: 'text',
333
+ role: 'user',
334
+ content: `
335
+ Instructions: ${truncatedInstructions}
336
+
337
+ ${chunksInput}
338
+
339
+ Generate patches for each file that needs modification:
340
+ `.trim(),
341
+ },
342
+ ],
343
+ stopSequences: [],
344
+ transform: (text) => {
345
+ return text.trim()
346
+ },
347
+ })
348
+
349
+ return parsePatchOutput(extracted)
350
+ }
351
+
352
+ // Check if we need chunking
353
+ const needsChunking =
354
+ totalInputTokens > maxTokensPerChunk || fileTokenCounts.some((f) => f.tokens > maxTokensPerChunk)
355
+
356
+ if (!needsChunking) {
357
+ // Simple case: all files fit in one prompt (existing logic)
358
+ // Check for exact match in examples
359
+ const Key = fastHash(
360
+ stringify({
361
+ taskId,
362
+ taskType,
363
+ files: files.map((f) => ({ path: f.path, content: f.content })),
364
+ instructions: truncatedInstructions,
365
+ })
366
+ )
367
+
368
+ const tableExamples =
369
+ taskId && ctx.adapter
370
+ ? await ctx.adapter.getExamples<Array<File>, Array<File>>({
371
+ input: files,
372
+ taskId,
373
+ taskType,
374
+ })
375
+ : []
376
+
377
+ const exactMatch = tableExamples.find((x) => x.key === Key)
378
+ if (exactMatch) {
379
+ return exactMatch.output as Array<File>
380
+ }
381
+
382
+ // Process all files in one batch
383
+ const allChunks: Array<FileChunk> = fileTokenCounts.map(({ file }) => ({
384
+ path: file.path,
385
+ name: file.name,
386
+ content: file.content,
387
+ startLine: 1,
388
+ endLine: file.content.split(/\r?\n/).length,
389
+ totalLines: file.content.split(/\r?\n/).length,
390
+ isPartial: false,
391
+ }))
392
+
393
+ const patchMap = await processBatch({ items: allChunks, tokenCount: totalInputTokens })
394
+
395
+ // Apply patches to each file
396
+ const patchedFiles: Array<File> = files.map((file) => {
397
+ const patchOps = patchMap.get(file.path)
398
+
399
+ if (!patchOps || patchOps.trim().length === 0) {
400
+ return {
401
+ ...file,
402
+ patch: '',
403
+ }
404
+ }
405
+
406
+ try {
407
+ const patchedContent = Micropatch.applyText(file.content, patchOps)
408
+ return {
409
+ ...file,
410
+ content: patchedContent,
411
+ patch: patchOps,
412
+ }
413
+ } catch (error) {
414
+ console.error(`Failed to apply patch to ${file.path}:`, error)
415
+ return {
416
+ ...file,
417
+ patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`,
418
+ }
419
+ }
420
+ })
421
+
422
+ // Save example for active learning
423
+ if (taskId && ctx.adapter && !ctx.controller.signal.aborted) {
424
+ await ctx.adapter.saveExample({
425
+ key: Key,
426
+ taskType,
427
+ taskId,
428
+ input: files,
429
+ output: patchedFiles,
430
+ instructions: truncatedInstructions,
431
+ metadata: {
432
+ cost: {
433
+ input: ctx.usage.cost.input,
434
+ output: ctx.usage.cost.output,
435
+ },
436
+ latency: Date.now(),
437
+ model: ctx.modelId,
438
+ tokens: {
439
+ input: ctx.usage.tokens.input,
440
+ output: ctx.usage.tokens.output,
441
+ },
442
+ },
443
+ })
444
+ }
445
+
446
+ return patchedFiles
447
+ }
448
+
449
+ // Complex case: needs chunking
450
+ // Step 1: Split files that are too large
451
+ const allChunks: Array<FileChunk> = []
452
+ for (const { file, tokens, lines } of fileTokenCounts) {
453
+ const chunks = splitFileIntoChunks(file, lines, tokens)
454
+ allChunks.push(...chunks)
455
+ }
456
+
457
+ // Step 2: Create batches that fit within token limits
458
+ const batches = createBatches(allChunks)
459
+
460
+ // Step 3: Process batches in parallel
461
+ const limit = pLimit(10)
462
+ const batchResults = await Promise.all(batches.map((batch) => limit(() => processBatch(batch))))
463
+
464
+ // Step 4: Merge results - combine patches from all batches per file
465
+ const mergedPatches = new Map<string, string>()
466
+ for (const patchMap of batchResults) {
467
+ for (const [filePath, patchOps] of patchMap.entries()) {
468
+ const existing = mergedPatches.get(filePath) || ''
469
+ const combined = existing ? `${existing}\n${patchOps}` : patchOps
470
+ mergedPatches.set(filePath, combined)
471
+ }
472
+ }
473
+
474
+ // Step 5: Apply merged patches to original files
475
+ const patchedFiles: Array<File> = files.map((file) => {
476
+ const patchOps = mergedPatches.get(file.path)
477
+
478
+ if (!patchOps || patchOps.trim().length === 0) {
479
+ return {
480
+ ...file,
481
+ patch: '',
482
+ }
483
+ }
484
+
485
+ try {
486
+ const patchedContent = Micropatch.applyText(file.content, patchOps)
487
+ return {
488
+ ...file,
489
+ content: patchedContent,
490
+ patch: patchOps,
491
+ }
492
+ } catch (error) {
493
+ console.error(`Failed to apply patch to ${file.path}:`, error)
494
+ return {
495
+ ...file,
496
+ patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`,
497
+ }
498
+ }
499
+ })
500
+
501
+ return patchedFiles
502
+ }
503
+
504
+ /**
505
+ * Generate the system prompt that explains the micropatch protocol to the LLM
506
+ */
507
+ function getMicropatchSystemPrompt(): string {
508
+ return `
509
+ You are a code patching assistant. Your task is to generate precise line-based patches using the Micropatch protocol.
510
+
511
+ ## Input Format
512
+
513
+ You will receive files in this XML format:
514
+
515
+ \`\`\`
516
+ <FILE path="src/hello.ts" name="hello.ts">
517
+ 001|const x = 1
518
+ 002|const y = 2
519
+ 003|console.log(x + y)
520
+ </FILE>
521
+
522
+ <FILE path="src/utils.ts" name="utils.ts">
523
+ 001|export function add(a, b) {
524
+ 002| return a + b
525
+ 003|}
526
+ </FILE>
527
+ \`\`\`
528
+
529
+ Each file has:
530
+ - **path**: Full file path
531
+ - **name**: File name
532
+ - **Numbered lines**: Format is \`NNN|content\` where NNN is the ORIGINAL line number (1-based)
533
+
534
+ ## Output Format
535
+
536
+ Generate patches for EACH file that needs modification using this EXACT XML format:
537
+
538
+ \`\`\`
539
+ <FILE path="src/hello.ts">
540
+ ◼︎=1|const a = 1
541
+ ◼︎=2|const b = 2
542
+ ◼︎=3|console.log(a + b)
543
+ </FILE>
544
+
545
+ <FILE path="src/utils.ts">
546
+ ◼︎<1|/**
547
+ * Adds two numbers
548
+ */
549
+ </FILE>
550
+ \`\`\`
551
+
552
+ **CRITICAL RULES**:
553
+ 1. Each \`<FILE>\` tag MUST include the exact \`path\` attribute from the input
554
+ 2. Put patch operations for EACH file inside its own \`<FILE>...</FILE>\` block
555
+ 3. If a file doesn't need changes, omit its \`<FILE>\` block entirely
556
+ 4. DO NOT mix patches from different files
557
+ 5. DO NOT include line numbers or any text outside the patch operations
558
+
559
+ ## Micropatch Protocol
560
+
561
+ The Micropatch protocol uses line numbers to reference ORIGINAL lines (before any edits).
562
+
563
+ ### Operations
564
+
565
+ Each operation starts with the marker \`◼︎\` at the beginning of a line:
566
+
567
+ 1. **Insert BEFORE line**: \`◼︎<NNN|text\`
568
+ - Inserts \`text\` as a new line BEFORE original line NNN
569
+ - Example: \`◼︎<5|console.log('debug')\`
570
+
571
+ 2. **Insert AFTER line**: \`◼︎>NNN|text\`
572
+ - Inserts \`text\` as a new line AFTER original line NNN
573
+ - Example: \`◼︎>10|}\`
574
+
575
+ 3. **Replace single line**: \`◼︎=NNN|new text\`
576
+ - Replaces original line NNN with \`new text\`
577
+ - Can span multiple lines (continue until next ◼︎ or end)
578
+ - Example:
579
+ \`\`\`
580
+ ◼︎=7|function newName() {
581
+ return 42
582
+ }
583
+ \`\`\`
584
+
585
+ 4. **Replace range**: \`◼︎=NNN-MMM|replacement\`
586
+ - Replaces lines NNN through MMM with replacement text
587
+ - Example: \`◼︎=5-8|const combined = a + b + c + d\`
588
+
589
+ 5. **Delete single line**: \`◼︎-NNN\`
590
+ - Deletes original line NNN
591
+ - Example: \`◼︎-12\`
592
+
593
+ 6. **Delete range**: \`◼︎-NNN-MMM\`
594
+ - Deletes lines NNN through MMM inclusive
595
+ - Example: \`◼︎-5-10\`
596
+
597
+ ### Escaping
598
+
599
+ - To include a literal \`◼︎\` in your text, use \`\\◼︎\`
600
+ - No other escape sequences are recognized
601
+
602
+ ### Important Rules
603
+
604
+ 1. **Use ORIGINAL line numbers**: Always reference the line numbers shown in the input (001, 002, etc.)
605
+ 2. **One operation per line**: Each operation must start on a new line with \`◼︎\`
606
+ 3. **No explanations**: Output ONLY patch operations inside \`<FILE>\` tags
607
+ 4. **Precise operations**: Use the minimal set of operations to achieve the goal
608
+ 5. **Verify line numbers**: Double-check that line numbers match the input
609
+
610
+ ## Example
611
+
612
+ **Input:**
613
+ \`\`\`
614
+ <FILE path="src/math.ts" name="math.ts">
615
+ 001|const x = 1
616
+ 002|const y = 2
617
+ 003|console.log(x + y)
618
+ 004|
619
+ 005|export { x, y }
620
+ </FILE>
621
+ \`\`\`
622
+
623
+ **Task:** Change variable names from x,y to a,b
624
+
625
+ **Output:**
626
+ \`\`\`
627
+ <FILE path="src/math.ts">
628
+ ◼︎=1|const a = 1
629
+ ◼︎=2|const b = 2
630
+ ◼︎=3|console.log(a + b)
631
+ ◼︎=5|export { a, b }
632
+ </FILE>
633
+ \`\`\`
634
+
635
+ ## Your Task
636
+
637
+ Generate ONLY the \`<FILE>\` blocks with patch operations. Do not include explanations, comments, or any other text.
638
+ `.trim()
639
+ }
640
+
641
+ Zai.prototype.patch = function (
642
+ this: Zai,
643
+ files: Array<File>,
644
+ instructions: string,
645
+ _options?: Options
646
+ ): Response<Array<File>> {
647
+ const context = new ZaiContext({
648
+ client: this.client,
649
+ modelId: this.Model,
650
+ taskId: this.taskId,
651
+ taskType: 'zai.patch',
652
+ adapter: this.adapter,
653
+ })
654
+
655
+ return new Response<Array<File>>(context, patch(files, instructions, _options, context), (result) => result)
656
+ }