@botpress/zai 2.4.2 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,398 @@
1
+ import { z } from "@bpinternal/zui";
2
+ import pLimit from "p-limit";
3
+ import { ZaiContext } from "../context";
4
+ import { Micropatch } from "../micropatch";
5
+ import { Response } from "../response";
6
+ import { getTokenizer } from "../tokenizer";
7
+ import { fastHash, stringify } from "../utils";
8
+ import { Zai } from "../zai";
9
+ import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
10
+ const _File = z.object({
11
+ path: z.string(),
12
+ name: z.string(),
13
+ content: z.string()
14
+ });
15
+ const Options = z.object({
16
+ maxTokensPerChunk: z.number().optional()
17
+ });
18
+ const patch = async (files, instructions, _options, ctx) => {
19
+ ctx.controller.signal.throwIfAborted();
20
+ if (files.length === 0) {
21
+ return [];
22
+ }
23
+ const options = Options.parse(_options ?? {});
24
+ const tokenizer = await getTokenizer();
25
+ const model = await ctx.getModel();
26
+ const taskId = ctx.taskId;
27
+ const taskType = "zai.patch";
28
+ const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
29
+ const TOKENS_INSTRUCTIONS_MAX = Math.floor(TOKENS_TOTAL_MAX * 0.2);
30
+ const TOKENS_FILES_MAX = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX;
31
+ const truncatedInstructions = tokenizer.truncate(instructions, TOKENS_INSTRUCTIONS_MAX);
32
+ const maxTokensPerChunk = options.maxTokensPerChunk ?? TOKENS_FILES_MAX;
33
+ const fileTokenCounts = files.map((file) => ({
34
+ file,
35
+ tokens: tokenizer.count(file.content),
36
+ lines: file.content.split(/\r?\n/).length
37
+ }));
38
+ const totalInputTokens = fileTokenCounts.reduce((sum, f) => sum + f.tokens, 0);
39
+ const splitFileIntoChunks = (file, totalLines, fileTokens) => {
40
+ const lines = file.content.split(/\r?\n/);
41
+ const tokensPerLine = fileTokens / totalLines;
42
+ const linesPerChunk = Math.floor(maxTokensPerChunk / tokensPerLine);
43
+ if (linesPerChunk >= totalLines) {
44
+ return [
45
+ {
46
+ path: file.path,
47
+ name: file.name,
48
+ content: file.content,
49
+ startLine: 1,
50
+ endLine: totalLines,
51
+ totalLines,
52
+ isPartial: false
53
+ }
54
+ ];
55
+ }
56
+ const chunks = [];
57
+ for (let start = 0; start < totalLines; start += linesPerChunk) {
58
+ const end = Math.min(start + linesPerChunk, totalLines);
59
+ const chunkLines = lines.slice(start, end);
60
+ const chunkContent = chunkLines.join("\n");
61
+ chunks.push({
62
+ path: file.path,
63
+ name: file.name,
64
+ content: chunkContent,
65
+ startLine: start + 1,
66
+ endLine: end,
67
+ totalLines,
68
+ isPartial: true
69
+ });
70
+ }
71
+ return chunks;
72
+ };
73
+ const createBatches = (chunks) => {
74
+ const batches2 = [];
75
+ let currentBatch = { items: [], tokenCount: 0 };
76
+ for (const chunk of chunks) {
77
+ const chunkTokens = tokenizer.count(chunk.content);
78
+ if (currentBatch.tokenCount + chunkTokens > maxTokensPerChunk && currentBatch.items.length > 0) {
79
+ batches2.push(currentBatch);
80
+ currentBatch = { items: [], tokenCount: 0 };
81
+ }
82
+ currentBatch.items.push(chunk);
83
+ currentBatch.tokenCount += chunkTokens;
84
+ }
85
+ if (currentBatch.items.length > 0) {
86
+ batches2.push(currentBatch);
87
+ }
88
+ return batches2;
89
+ };
90
+ const formatChunksForInput = (chunks) => {
91
+ return chunks.map((chunk) => {
92
+ const lines = chunk.content.split(/\r?\n/);
93
+ const numberedView = lines.map((line, idx) => {
94
+ const lineNum = chunk.startLine + idx;
95
+ return `${String(lineNum).padStart(3, "0")}|${line}`;
96
+ }).join("\n");
97
+ const partialNote = chunk.isPartial ? ` (PARTIAL: lines ${chunk.startLine}-${chunk.endLine} of ${chunk.totalLines} total lines)` : "";
98
+ return `<FILE path="${chunk.path}" name="${chunk.name}"${partialNote}>
99
+ ${numberedView}
100
+ </FILE>`;
101
+ }).join("\n\n");
102
+ };
103
+ const parsePatchOutput = (output) => {
104
+ const patchMap = /* @__PURE__ */ new Map();
105
+ const fileBlockRegex = /<FILE[^>]*path="([^"]+)"[^>]*>([\s\S]*?)<\/FILE>/g;
106
+ let match;
107
+ while ((match = fileBlockRegex.exec(output)) !== null) {
108
+ const filePath = match[1];
109
+ const patchOps = match[2].trim();
110
+ patchMap.set(filePath, patchOps);
111
+ }
112
+ return patchMap;
113
+ };
114
+ const processBatch = async (batch) => {
115
+ const chunksInput = formatChunksForInput(batch.items);
116
+ const { extracted } = await ctx.generateContent({
117
+ systemPrompt: getMicropatchSystemPrompt(),
118
+ messages: [
119
+ {
120
+ type: "text",
121
+ role: "user",
122
+ content: `
123
+ Instructions: ${truncatedInstructions}
124
+
125
+ ${chunksInput}
126
+
127
+ Generate patches for each file that needs modification:
128
+ `.trim()
129
+ }
130
+ ],
131
+ stopSequences: [],
132
+ transform: (text) => {
133
+ return text.trim();
134
+ }
135
+ });
136
+ return parsePatchOutput(extracted);
137
+ };
138
+ const needsChunking = totalInputTokens > maxTokensPerChunk || fileTokenCounts.some((f) => f.tokens > maxTokensPerChunk);
139
+ if (!needsChunking) {
140
+ const Key = fastHash(
141
+ stringify({
142
+ taskId,
143
+ taskType,
144
+ files: files.map((f) => ({ path: f.path, content: f.content })),
145
+ instructions: truncatedInstructions
146
+ })
147
+ );
148
+ const tableExamples = taskId && ctx.adapter ? await ctx.adapter.getExamples({
149
+ input: files,
150
+ taskId,
151
+ taskType
152
+ }) : [];
153
+ const exactMatch = tableExamples.find((x) => x.key === Key);
154
+ if (exactMatch) {
155
+ return exactMatch.output;
156
+ }
157
+ const allChunks2 = fileTokenCounts.map(({ file }) => ({
158
+ path: file.path,
159
+ name: file.name,
160
+ content: file.content,
161
+ startLine: 1,
162
+ endLine: file.content.split(/\r?\n/).length,
163
+ totalLines: file.content.split(/\r?\n/).length,
164
+ isPartial: false
165
+ }));
166
+ const patchMap = await processBatch({ items: allChunks2, tokenCount: totalInputTokens });
167
+ const patchedFiles2 = files.map((file) => {
168
+ const patchOps = patchMap.get(file.path);
169
+ if (!patchOps || patchOps.trim().length === 0) {
170
+ return {
171
+ ...file,
172
+ patch: ""
173
+ };
174
+ }
175
+ try {
176
+ const patchedContent = Micropatch.applyText(file.content, patchOps);
177
+ return {
178
+ ...file,
179
+ content: patchedContent,
180
+ patch: patchOps
181
+ };
182
+ } catch (error) {
183
+ console.error(`Failed to apply patch to ${file.path}:`, error);
184
+ return {
185
+ ...file,
186
+ patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`
187
+ };
188
+ }
189
+ });
190
+ if (taskId && ctx.adapter && !ctx.controller.signal.aborted) {
191
+ await ctx.adapter.saveExample({
192
+ key: Key,
193
+ taskType,
194
+ taskId,
195
+ input: files,
196
+ output: patchedFiles2,
197
+ instructions: truncatedInstructions,
198
+ metadata: {
199
+ cost: {
200
+ input: ctx.usage.cost.input,
201
+ output: ctx.usage.cost.output
202
+ },
203
+ latency: Date.now(),
204
+ model: ctx.modelId,
205
+ tokens: {
206
+ input: ctx.usage.tokens.input,
207
+ output: ctx.usage.tokens.output
208
+ }
209
+ }
210
+ });
211
+ }
212
+ return patchedFiles2;
213
+ }
214
+ const allChunks = [];
215
+ for (const { file, tokens, lines } of fileTokenCounts) {
216
+ const chunks = splitFileIntoChunks(file, lines, tokens);
217
+ allChunks.push(...chunks);
218
+ }
219
+ const batches = createBatches(allChunks);
220
+ const limit = pLimit(10);
221
+ const batchResults = await Promise.all(batches.map((batch) => limit(() => processBatch(batch))));
222
+ const mergedPatches = /* @__PURE__ */ new Map();
223
+ for (const patchMap of batchResults) {
224
+ for (const [filePath, patchOps] of patchMap.entries()) {
225
+ const existing = mergedPatches.get(filePath) || "";
226
+ const combined = existing ? `${existing}
227
+ ${patchOps}` : patchOps;
228
+ mergedPatches.set(filePath, combined);
229
+ }
230
+ }
231
+ const patchedFiles = files.map((file) => {
232
+ const patchOps = mergedPatches.get(file.path);
233
+ if (!patchOps || patchOps.trim().length === 0) {
234
+ return {
235
+ ...file,
236
+ patch: ""
237
+ };
238
+ }
239
+ try {
240
+ const patchedContent = Micropatch.applyText(file.content, patchOps);
241
+ return {
242
+ ...file,
243
+ content: patchedContent,
244
+ patch: patchOps
245
+ };
246
+ } catch (error) {
247
+ console.error(`Failed to apply patch to ${file.path}:`, error);
248
+ return {
249
+ ...file,
250
+ patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`
251
+ };
252
+ }
253
+ });
254
+ return patchedFiles;
255
+ };
256
+ function getMicropatchSystemPrompt() {
257
+ return `
258
+ You are a code patching assistant. Your task is to generate precise line-based patches using the Micropatch protocol.
259
+
260
+ ## Input Format
261
+
262
+ You will receive files in this XML format:
263
+
264
+ \`\`\`
265
+ <FILE path="src/hello.ts" name="hello.ts">
266
+ 001|const x = 1
267
+ 002|const y = 2
268
+ 003|console.log(x + y)
269
+ </FILE>
270
+
271
+ <FILE path="src/utils.ts" name="utils.ts">
272
+ 001|export function add(a, b) {
273
+ 002| return a + b
274
+ 003|}
275
+ </FILE>
276
+ \`\`\`
277
+
278
+ Each file has:
279
+ - **path**: Full file path
280
+ - **name**: File name
281
+ - **Numbered lines**: Format is \`NNN|content\` where NNN is the ORIGINAL line number (1-based)
282
+
283
+ ## Output Format
284
+
285
+ Generate patches for EACH file that needs modification using this EXACT XML format:
286
+
287
+ \`\`\`
288
+ <FILE path="src/hello.ts">
289
+ \u25FC\uFE0E=1|const a = 1
290
+ \u25FC\uFE0E=2|const b = 2
291
+ \u25FC\uFE0E=3|console.log(a + b)
292
+ </FILE>
293
+
294
+ <FILE path="src/utils.ts">
295
+ \u25FC\uFE0E<1|/**
296
+ * Adds two numbers
297
+ */
298
+ </FILE>
299
+ \`\`\`
300
+
301
+ **CRITICAL RULES**:
302
+ 1. Each \`<FILE>\` tag MUST include the exact \`path\` attribute from the input
303
+ 2. Put patch operations for EACH file inside its own \`<FILE>...</FILE>\` block
304
+ 3. If a file doesn't need changes, omit its \`<FILE>\` block entirely
305
+ 4. DO NOT mix patches from different files
306
+ 5. DO NOT include line numbers or any text outside the patch operations
307
+
308
+ ## Micropatch Protocol
309
+
310
+ The Micropatch protocol uses line numbers to reference ORIGINAL lines (before any edits).
311
+
312
+ ### Operations
313
+
314
+ Each operation starts with the marker \`\u25FC\uFE0E\` at the beginning of a line:
315
+
316
+ 1. **Insert BEFORE line**: \`\u25FC\uFE0E<NNN|text\`
317
+ - Inserts \`text\` as a new line BEFORE original line NNN
318
+ - Example: \`\u25FC\uFE0E<5|console.log('debug')\`
319
+
320
+ 2. **Insert AFTER line**: \`\u25FC\uFE0E>NNN|text\`
321
+ - Inserts \`text\` as a new line AFTER original line NNN
322
+ - Example: \`\u25FC\uFE0E>10|}\`
323
+
324
+ 3. **Replace single line**: \`\u25FC\uFE0E=NNN|new text\`
325
+ - Replaces original line NNN with \`new text\`
326
+ - Can span multiple lines (continue until next \u25FC\uFE0E or end)
327
+ - Example:
328
+ \`\`\`
329
+ \u25FC\uFE0E=7|function newName() {
330
+ return 42
331
+ }
332
+ \`\`\`
333
+
334
+ 4. **Replace range**: \`\u25FC\uFE0E=NNN-MMM|replacement\`
335
+ - Replaces lines NNN through MMM with replacement text
336
+ - Example: \`\u25FC\uFE0E=5-8|const combined = a + b + c + d\`
337
+
338
+ 5. **Delete single line**: \`\u25FC\uFE0E-NNN\`
339
+ - Deletes original line NNN
340
+ - Example: \`\u25FC\uFE0E-12\`
341
+
342
+ 6. **Delete range**: \`\u25FC\uFE0E-NNN-MMM\`
343
+ - Deletes lines NNN through MMM inclusive
344
+ - Example: \`\u25FC\uFE0E-5-10\`
345
+
346
+ ### Escaping
347
+
348
+ - To include a literal \`\u25FC\uFE0E\` in your text, use \`\\\u25FC\uFE0E\`
349
+ - No other escape sequences are recognized
350
+
351
+ ### Important Rules
352
+
353
+ 1. **Use ORIGINAL line numbers**: Always reference the line numbers shown in the input (001, 002, etc.)
354
+ 2. **One operation per line**: Each operation must start on a new line with \`\u25FC\uFE0E\`
355
+ 3. **No explanations**: Output ONLY patch operations inside \`<FILE>\` tags
356
+ 4. **Precise operations**: Use the minimal set of operations to achieve the goal
357
+ 5. **Verify line numbers**: Double-check that line numbers match the input
358
+
359
+ ## Example
360
+
361
+ **Input:**
362
+ \`\`\`
363
+ <FILE path="src/math.ts" name="math.ts">
364
+ 001|const x = 1
365
+ 002|const y = 2
366
+ 003|console.log(x + y)
367
+ 004|
368
+ 005|export { x, y }
369
+ </FILE>
370
+ \`\`\`
371
+
372
+ **Task:** Change variable names from x,y to a,b
373
+
374
+ **Output:**
375
+ \`\`\`
376
+ <FILE path="src/math.ts">
377
+ \u25FC\uFE0E=1|const a = 1
378
+ \u25FC\uFE0E=2|const b = 2
379
+ \u25FC\uFE0E=3|console.log(a + b)
380
+ \u25FC\uFE0E=5|export { a, b }
381
+ </FILE>
382
+ \`\`\`
383
+
384
+ ## Your Task
385
+
386
+ Generate ONLY the \`<FILE>\` blocks with patch operations. Do not include explanations, comments, or any other text.
387
+ `.trim();
388
+ }
389
+ Zai.prototype.patch = function(files, instructions, _options) {
390
+ const context = new ZaiContext({
391
+ client: this.client,
392
+ modelId: this.Model,
393
+ taskId: this.taskId,
394
+ taskType: "zai.patch",
395
+ adapter: this.adapter
396
+ });
397
+ return new Response(context, patch(files, instructions, _options, context), (result) => result);
398
+ };