npm - @botpress/zai - Versions diffs - 2.4.2 → 2.5.1 - Mend

@botpress/zai 2.4.2 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts +146 -22
package/dist/index.js +1 -0
package/dist/micropatch.js +273 -0
package/dist/operations/patch.js +398 -0
package/e2e/data/cache.jsonl +105 -0
package/package.json +2 -2
package/src/index.ts +1 -0
package/src/micropatch.ts +364 -0
package/src/operations/patch.ts +656 -0

package/dist/operations/patch.js ADDED Viewed

@@ -0,0 +1,398 @@
+import { z } from "@bpinternal/zui";
+import pLimit from "p-limit";
+import { ZaiContext } from "../context";
+import { Micropatch } from "../micropatch";
+import { Response } from "../response";
+import { getTokenizer } from "../tokenizer";
+import { fastHash, stringify } from "../utils";
+import { Zai } from "../zai";
+import { PROMPT_INPUT_BUFFER, PROMPT_OUTPUT_BUFFER } from "./constants";
+const _File = z.object({
+  path: z.string(),
+  name: z.string(),
+  content: z.string()
+});
+const Options = z.object({
+  maxTokensPerChunk: z.number().optional()
+});
+const patch = async (files, instructions, _options, ctx) => {
+  ctx.controller.signal.throwIfAborted();
+  if (files.length === 0) {
+    return [];
+  }
+  const options = Options.parse(_options ?? {});
+  const tokenizer = await getTokenizer();
+  const model = await ctx.getModel();
+  const taskId = ctx.taskId;
+  const taskType = "zai.patch";
+  const TOKENS_TOTAL_MAX = model.input.maxTokens - PROMPT_INPUT_BUFFER - PROMPT_OUTPUT_BUFFER;
+  const TOKENS_INSTRUCTIONS_MAX = Math.floor(TOKENS_TOTAL_MAX * 0.2);
+  const TOKENS_FILES_MAX = TOKENS_TOTAL_MAX - TOKENS_INSTRUCTIONS_MAX;
+  const truncatedInstructions = tokenizer.truncate(instructions, TOKENS_INSTRUCTIONS_MAX);
+  const maxTokensPerChunk = options.maxTokensPerChunk ?? TOKENS_FILES_MAX;
+  const fileTokenCounts = files.map((file) => ({
+    file,
+    tokens: tokenizer.count(file.content),
+    lines: file.content.split(/\r?\n/).length
+  }));
+  const totalInputTokens = fileTokenCounts.reduce((sum, f) => sum + f.tokens, 0);
+  const splitFileIntoChunks = (file, totalLines, fileTokens) => {
+    const lines = file.content.split(/\r?\n/);
+    const tokensPerLine = fileTokens / totalLines;
+    const linesPerChunk = Math.floor(maxTokensPerChunk / tokensPerLine);
+    if (linesPerChunk >= totalLines) {
+      return [
+        {
+          path: file.path,
+          name: file.name,
+          content: file.content,
+          startLine: 1,
+          endLine: totalLines,
+          totalLines,
+          isPartial: false
+        }
+      ];
+    }
+    const chunks = [];
+    for (let start = 0; start < totalLines; start += linesPerChunk) {
+      const end = Math.min(start + linesPerChunk, totalLines);
+      const chunkLines = lines.slice(start, end);
+      const chunkContent = chunkLines.join("\n");
+      chunks.push({
+        path: file.path,
+        name: file.name,
+        content: chunkContent,
+        startLine: start + 1,
+        endLine: end,
+        totalLines,
+        isPartial: true
+      });
+    }
+    return chunks;
+  };
+  const createBatches = (chunks) => {
+    const batches2 = [];
+    let currentBatch = { items: [], tokenCount: 0 };
+    for (const chunk of chunks) {
+      const chunkTokens = tokenizer.count(chunk.content);
+      if (currentBatch.tokenCount + chunkTokens > maxTokensPerChunk && currentBatch.items.length > 0) {
+        batches2.push(currentBatch);
+        currentBatch = { items: [], tokenCount: 0 };
+      }
+      currentBatch.items.push(chunk);
+      currentBatch.tokenCount += chunkTokens;
+    }
+    if (currentBatch.items.length > 0) {
+      batches2.push(currentBatch);
+    }
+    return batches2;
+  };
+  const formatChunksForInput = (chunks) => {
+    return chunks.map((chunk) => {
+      const lines = chunk.content.split(/\r?\n/);
+      const numberedView = lines.map((line, idx) => {
+        const lineNum = chunk.startLine + idx;
+        return `${String(lineNum).padStart(3, "0")}|${line}`;
+      }).join("\n");
+      const partialNote = chunk.isPartial ? ` (PARTIAL: lines ${chunk.startLine}-${chunk.endLine} of ${chunk.totalLines} total lines)` : "";
+      return `<FILE path="${chunk.path}" name="${chunk.name}"${partialNote}>
+${numberedView}
+</FILE>`;
+    }).join("\n\n");
+  };
+  const parsePatchOutput = (output) => {
+    const patchMap = /* @__PURE__ */ new Map();
+    const fileBlockRegex = /<FILE[^>]*path="([^"]+)"[^>]*>([\s\S]*?)<\/FILE>/g;
+    let match;
+    while ((match = fileBlockRegex.exec(output)) !== null) {
+      const filePath = match[1];
+      const patchOps = match[2].trim();
+      patchMap.set(filePath, patchOps);
+    }
+    return patchMap;
+  };
+  const processBatch = async (batch) => {
+    const chunksInput = formatChunksForInput(batch.items);
+    const { extracted } = await ctx.generateContent({
+      systemPrompt: getMicropatchSystemPrompt(),
+      messages: [
+        {
+          type: "text",
+          role: "user",
+          content: `
+Instructions: ${truncatedInstructions}
+${chunksInput}
+Generate patches for each file that needs modification:
+`.trim()
+        }
+      ],
+      stopSequences: [],
+      transform: (text) => {
+        return text.trim();
+      }
+    });
+    return parsePatchOutput(extracted);
+  };
+  const needsChunking = totalInputTokens > maxTokensPerChunk || fileTokenCounts.some((f) => f.tokens > maxTokensPerChunk);
+  if (!needsChunking) {
+    const Key = fastHash(
+      stringify({
+        taskId,
+        taskType,
+        files: files.map((f) => ({ path: f.path, content: f.content })),
+        instructions: truncatedInstructions
+      })
+    );
+    const tableExamples = taskId && ctx.adapter ? await ctx.adapter.getExamples({
+      input: files,
+      taskId,
+      taskType
+    }) : [];
+    const exactMatch = tableExamples.find((x) => x.key === Key);
+    if (exactMatch) {
+      return exactMatch.output;
+    }
+    const allChunks2 = fileTokenCounts.map(({ file }) => ({
+      path: file.path,
+      name: file.name,
+      content: file.content,
+      startLine: 1,
+      endLine: file.content.split(/\r?\n/).length,
+      totalLines: file.content.split(/\r?\n/).length,
+      isPartial: false
+    }));
+    const patchMap = await processBatch({ items: allChunks2, tokenCount: totalInputTokens });
+    const patchedFiles2 = files.map((file) => {
+      const patchOps = patchMap.get(file.path);
+      if (!patchOps || patchOps.trim().length === 0) {
+        return {
+          ...file,
+          patch: ""
+        };
+      }
+      try {
+        const patchedContent = Micropatch.applyText(file.content, patchOps);
+        return {
+          ...file,
+          content: patchedContent,
+          patch: patchOps
+        };
+      } catch (error) {
+        console.error(`Failed to apply patch to ${file.path}:`, error);
+        return {
+          ...file,
+          patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`
+        };
+      }
+    });
+    if (taskId && ctx.adapter && !ctx.controller.signal.aborted) {
+      await ctx.adapter.saveExample({
+        key: Key,
+        taskType,
+        taskId,
+        input: files,
+        output: patchedFiles2,
+        instructions: truncatedInstructions,
+        metadata: {
+          cost: {
+            input: ctx.usage.cost.input,
+            output: ctx.usage.cost.output
+          },
+          latency: Date.now(),
+          model: ctx.modelId,
+          tokens: {
+            input: ctx.usage.tokens.input,
+            output: ctx.usage.tokens.output
+          }
+        }
+      });
+    }
+    return patchedFiles2;
+  }
+  const allChunks = [];
+  for (const { file, tokens, lines } of fileTokenCounts) {
+    const chunks = splitFileIntoChunks(file, lines, tokens);
+    allChunks.push(...chunks);
+  }
+  const batches = createBatches(allChunks);
+  const limit = pLimit(10);
+  const batchResults = await Promise.all(batches.map((batch) => limit(() => processBatch(batch))));
+  const mergedPatches = /* @__PURE__ */ new Map();
+  for (const patchMap of batchResults) {
+    for (const [filePath, patchOps] of patchMap.entries()) {
+      const existing = mergedPatches.get(filePath) || "";
+      const combined = existing ? `${existing}
+${patchOps}` : patchOps;
+      mergedPatches.set(filePath, combined);
+    }
+  }
+  const patchedFiles = files.map((file) => {
+    const patchOps = mergedPatches.get(file.path);
+    if (!patchOps || patchOps.trim().length === 0) {
+      return {
+        ...file,
+        patch: ""
+      };
+    }
+    try {
+      const patchedContent = Micropatch.applyText(file.content, patchOps);
+      return {
+        ...file,
+        content: patchedContent,
+        patch: patchOps
+      };
+    } catch (error) {
+      console.error(`Failed to apply patch to ${file.path}:`, error);
+      return {
+        ...file,
+        patch: `ERROR: ${error instanceof Error ? error.message : String(error)}`
+      };
+    }
+  });
+  return patchedFiles;
+};
+function getMicropatchSystemPrompt() {
+  return `
+You are a code patching assistant. Your task is to generate precise line-based patches using the Micropatch protocol.
+## Input Format
+You will receive files in this XML format:
+\`\`\`
+<FILE path="src/hello.ts" name="hello.ts">
+001|const x = 1
+002|const y = 2
+003|console.log(x + y)
+</FILE>
+<FILE path="src/utils.ts" name="utils.ts">
+001|export function add(a, b) {
+002|  return a + b
+003|}
+</FILE>
+\`\`\`
+Each file has:
+- **path**: Full file path
+- **name**: File name
+- **Numbered lines**: Format is \`NNN|content\` where NNN is the ORIGINAL line number (1-based)
+## Output Format
+Generate patches for EACH file that needs modification using this EXACT XML format:
+\`\`\`
+<FILE path="src/hello.ts">
+\u25FC\uFE0E=1|const a = 1
+\u25FC\uFE0E=2|const b = 2
+\u25FC\uFE0E=3|console.log(a + b)
+</FILE>
+<FILE path="src/utils.ts">
+\u25FC\uFE0E<1|/**
+ * Adds two numbers
+ */
+</FILE>
+\`\`\`
+**CRITICAL RULES**:
+1. Each \`<FILE>\` tag MUST include the exact \`path\` attribute from the input
+2. Put patch operations for EACH file inside its own \`<FILE>...</FILE>\` block
+3. If a file doesn't need changes, omit its \`<FILE>\` block entirely
+4. DO NOT mix patches from different files
+5. DO NOT include line numbers or any text outside the patch operations
+## Micropatch Protocol
+The Micropatch protocol uses line numbers to reference ORIGINAL lines (before any edits).
+### Operations
+Each operation starts with the marker \`\u25FC\uFE0E\` at the beginning of a line:
+1. **Insert BEFORE line**: \`\u25FC\uFE0E<NNN|text\`
+   - Inserts \`text\` as a new line BEFORE original line NNN
+   - Example: \`\u25FC\uFE0E<5|console.log('debug')\`
+2. **Insert AFTER line**: \`\u25FC\uFE0E>NNN|text\`
+   - Inserts \`text\` as a new line AFTER original line NNN
+   - Example: \`\u25FC\uFE0E>10|}\`
+3. **Replace single line**: \`\u25FC\uFE0E=NNN|new text\`
+   - Replaces original line NNN with \`new text\`
+   - Can span multiple lines (continue until next \u25FC\uFE0E or end)
+   - Example:
+     \`\`\`
+     \u25FC\uFE0E=7|function newName() {
+       return 42
+     }
+     \`\`\`
+4. **Replace range**: \`\u25FC\uFE0E=NNN-MMM|replacement\`
+   - Replaces lines NNN through MMM with replacement text
+   - Example: \`\u25FC\uFE0E=5-8|const combined = a + b + c + d\`
+5. **Delete single line**: \`\u25FC\uFE0E-NNN\`
+   - Deletes original line NNN
+   - Example: \`\u25FC\uFE0E-12\`
+6. **Delete range**: \`\u25FC\uFE0E-NNN-MMM\`
+   - Deletes lines NNN through MMM inclusive
+   - Example: \`\u25FC\uFE0E-5-10\`
+### Escaping
+- To include a literal \`\u25FC\uFE0E\` in your text, use \`\\\u25FC\uFE0E\`
+- No other escape sequences are recognized
+### Important Rules
+1. **Use ORIGINAL line numbers**: Always reference the line numbers shown in the input (001, 002, etc.)
+2. **One operation per line**: Each operation must start on a new line with \`\u25FC\uFE0E\`
+3. **No explanations**: Output ONLY patch operations inside \`<FILE>\` tags
+4. **Precise operations**: Use the minimal set of operations to achieve the goal
+5. **Verify line numbers**: Double-check that line numbers match the input
+## Example
+**Input:**
+\`\`\`
+<FILE path="src/math.ts" name="math.ts">
+001|const x = 1
+002|const y = 2
+003|console.log(x + y)
+004|
+005|export { x, y }
+</FILE>
+\`\`\`
+**Task:** Change variable names from x,y to a,b
+**Output:**
+\`\`\`
+<FILE path="src/math.ts">
+\u25FC\uFE0E=1|const a = 1
+\u25FC\uFE0E=2|const b = 2
+\u25FC\uFE0E=3|console.log(a + b)
+\u25FC\uFE0E=5|export { a, b }
+</FILE>
+\`\`\`
+## Your Task
+Generate ONLY the \`<FILE>\` blocks with patch operations. Do not include explanations, comments, or any other text.
+`.trim();
+}
+Zai.prototype.patch = function(files, instructions, _options) {
+  const context = new ZaiContext({
+    client: this.client,
+    modelId: this.Model,
+    taskId: this.taskId,
+    taskType: "zai.patch",
+    adapter: this.adapter
+  });
+  return new Response(context, patch(files, instructions, _options, context), (result) => result);
+};