npm - repoburg - Versions diffs - 1.3.11 → 1.3.13 - Mend

repoburg 1.3.11 → 1.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/backend/packages/tokenpatch/index.spec.ts CHANGED Viewed

@@ -1,15 +1,39 @@
 import { applySnippetPatch } from './index';
 import * as path from 'path';
+import {
+  SPECIAL_PATCH_BEGIN_FILE_MARKER,
+  SPECIAL_PATCH_END_FILE_MARKER,
+} from '../../src/llm-orchestration/parser/parsing.constants';
 const TS_WASM_PATH = path.join(
   __dirname,
   './grammar/tree-sitter-typescript.wasm',
 );
-const TSX_WASM_PATH = path.join(__dirname, './grammar/tree-sitter-tsx.wasm');
 // Helper to normalize whitespace for robust comparison
-const normalize = (str: string) => str.replace(/\s+/g, ' ').trim();
+const normalize = (str: string) =>
+  str
+    .replace(/}/g, '} ')
+    .replace(/\s+/g, ' ')
+    .trim();
+const runTest = async (
+  sourceCode: string,
+  patchCode: string,
+  expectedResult: string,
+) => {
+  const resultTiktoken = await applySnippetPatch(sourceCode, patchCode, {
+    useTiktoken: true,
+  });
+  expect(normalize(resultTiktoken)).toEqual(normalize(expectedResult));
+  const resultTreeSitter = await applySnippetPatch(
+    sourceCode,
+    patchCode,
+    TS_WASM_PATH,
+  );
+  expect(normalize(resultTreeSitter)).toEqual(normalize(expectedResult));
+};
 describe('applySnippetPatch', () => {
   it('should replace a method body in a class by automatically finding anchor size', async () => {
@@ -35,12 +59,11 @@ class Greeter {
     // A new implementation
     return "Hello, TypeScript!";
   }
-    fc(){ return 42; }
+  fc(){ return 42; }
 }
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
   it('should replace a data structure definition', async () => {
@@ -63,8 +86,7 @@ interface MyData {
 }
 console.log("hello");
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
   it('should replace a full function definition', async () => {
@@ -95,11 +117,10 @@ function calculate() {
 export { calculate };
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
-  it('should replace a beginning of the file @begin-of-file', async () => {
+  it(`should replace a beginning of the file ${SPECIAL_PATCH_BEGIN_FILE_MARKER}`, async () => {
     const sourceCode = `
 import { ModuleA } from './moduleA';
 import { ModuleB } from './moduleB';
@@ -107,7 +128,7 @@ import { ModuleB } from './moduleB';
 console.log('starting up');
 `;
     const patchCode = `
-// @begin-of-file
+// ${SPECIAL_PATCH_BEGIN_FILE_MARKER}
 import groupBy from  'lodash';
 import { ModuleA } from './moduleA';
 `;
@@ -118,11 +139,10 @@ import { ModuleB } from './moduleB';
 console.log('starting up');
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
-  it('should handle @begin-of-file marker with variations', async () => {
+  it(`should handle ${SPECIAL_PATCH_BEGIN_FILE_MARKER} marker with variations`, async () => {
     const sourceCode = `
 import { ModuleA } from './moduleA';
 import { ModuleB } from './moduleB';
@@ -130,7 +150,7 @@ import { ModuleB } from './moduleB';
 console.log('starting up');
 `;
     const patchCode = `
-//@begin-of-file extra text
+//${SPECIAL_PATCH_BEGIN_FILE_MARKER} extra text
 import groupBy from  'lodash';
 import { ModuleA } from './moduleA';
 `;
@@ -141,11 +161,10 @@ import { ModuleB } from './moduleB';
 console.log('starting up');
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
-  it('should replace a end of the file @end-of-file', async () => {
+  it(`should replace a end of the file ${SPECIAL_PATCH_END_FILE_MARKER}`, async () => {
     const sourceCode = `
 import fs from 'fs';
@@ -158,7 +177,7 @@ export { calculate };
     const patchCode = `
 export { calculate };
 export { sum };
-// @end-of-file
+// ${SPECIAL_PATCH_END_FILE_MARKER}
 `;
     const expectedResult = `
 import fs from 'fs';
@@ -170,11 +189,10 @@ function calculate() {
 export { calculate };
 export { sum };
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
-  it('should handle @end-of-file marker with variations', async () => {
+  it(`should handle ${SPECIAL_PATCH_END_FILE_MARKER} marker with variations`, async () => {
     const sourceCode = `
 import fs from 'fs';
@@ -187,7 +205,7 @@ export { calculate };
     const patchCode = `
 export { calculate };
 export { sum };
-//@end-of-file some extra text
+//${SPECIAL_PATCH_END_FILE_MARKER} some extra text
 `;
     const expectedResult = `
 import fs from 'fs';
@@ -199,11 +217,11 @@ function calculate() {
 export { calculate };
 export { sum };
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
   it('should succeed with dynamic anchor sizing when initial anchors are ambiguous', async () => {
     const sourceCode = `
+import dotenv from 'dotenv';
 const config = {
     port: 8080,
     host: 'localhost',
@@ -219,6 +237,7 @@ const config2 = {
 };
 `;
     const patchCode = `
+import dotenv from 'dotenv';
 const config = {
     port: 9000,
     host: 'localhost',
@@ -228,6 +247,7 @@ const config = {
 function connect() {
 `;
     const expectedResult = `
+import dotenv from 'dotenv';
 const config = {
     port: 9000,
     host: 'localhost',
@@ -243,8 +263,7 @@ const config2 = {
     host: 'remote',
 };
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
   it('should handle partial type definitions by trimming ambiguous tokens', async () => {
@@ -275,6 +294,8 @@ const config2 = {
      */
     combinedStatusNew2?: everest_appserver_primitive_Text | null;
     /**
+     * Contains Billable Expense line
+     */
 `;
     const expectedResult = `
     /**
@@ -296,8 +317,7 @@ const config2 = {
      */
     containsPrepaidItem?: everest_appserver_primitive_TrueFalse | null;
 `;
-    const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
   it('should patch TSX correctly by trimming tokens from invalid partial snippets', async () => {
@@ -408,7 +428,10 @@ export function TransactionsPage() {
               <DataTable
                 columns={billColumns}
                 data={bills}
-                onRowClick={(row) => selectMatchable(row.id)}
+                onRowClick={(row) => {
+                  console.log(\`[UI] Selected matchable bill ID\`)
+                  selectMatchable(row.id)
+                }}
                 selectedId={selectedMatchableId}
               />
             </div>
@@ -569,11 +592,6 @@ export function TransactionsPage() {
   )
 }
 `;
-    const result = await applySnippetPatch(
-      sourceCode,
-      patchCode,
-      TSX_WASM_PATH,
-    );
-    expect(normalize(result)).toEqual(normalize(expectedResult));
+    await runTest(sourceCode, patchCode, expectedResult);
   });
 });

package/backend/packages/tokenpatch/index.ts CHANGED Viewed

@@ -1,23 +1,48 @@
 import { initializeParser } from './parser';
-import { collectTokens } from './tokens';
 import {
   handleBeginOfFilePatch,
   handleEndOfFilePatch,
   handleStandardPatch,
 } from './patcher';
+import {
+  SPECIAL_PATCH_BEGIN_FILE_MARKER,
+  SPECIAL_PATCH_END_FILE_MARKER,
+} from '../../src/llm-orchestration/parser/parsing.constants';
+import { TokenizerStrategy } from './tokenizer.interface';
+import { TreeSitterTokenizer } from './strategies/tree-sitter-tokenizer';
+import { TiktokenTokenizer } from './strategies/tiktoken-tokenizer';
+export interface ApplyPatchOptions {
+  grammarPath?: string;
+  useTiktoken?: boolean;
+}
 export async function applySnippetPatch(
   sourceCode: string,
   patchCode: string,
-  grammarPath: string,
+  optionsOrGrammarPath: string | ApplyPatchOptions,
 ): Promise<string> {
-  const parser = await initializeParser(grammarPath);
+  let strategy: TokenizerStrategy;
+  let options: ApplyPatchOptions;
+  // Backward compatibility for when the 3rd argument was just grammarPath string
+  if (typeof optionsOrGrammarPath === 'string') {
+    options = { grammarPath: optionsOrGrammarPath };
+  } else {
+    options = optionsOrGrammarPath;
+  }
-  const sourceTree = parser.parse(sourceCode);
-  if (!sourceTree) {
-    throw new Error('Failed to parse source code.');
+  if (options.useTiktoken) {
+    strategy = new TiktokenTokenizer();
+  } else if (options.grammarPath) {
+    const parser = await initializeParser(options.grammarPath);
+    strategy = new TreeSitterTokenizer(parser);
+  } else {
+    // Default to Tiktoken if no grammar path is provided
+    strategy = new TiktokenTokenizer();
   }
-  const sourceTokens = collectTokens(sourceTree, sourceCode);
+  const sourceTokens = strategy.tokenize(sourceCode);
   let patchResult: {
     replaceStart: number;
@@ -27,42 +52,47 @@ export async function applySnippetPatch(
   };
   let processedPatchCode = patchCode;
-  const beginOfFileRegex = /\/\/\s*@begin-of-file.*/;
-  const endOfFileRegex = /\/\/\s*@end-of-file.*/;
+  const beginOfFileRegex = new RegExp(
+    `//\\s*${SPECIAL_PATCH_BEGIN_FILE_MARKER}.*`,
+  );
+  const endOfFileRegex = new RegExp(`//\\s*${SPECIAL_PATCH_END_FILE_MARKER}.*`);
   const hasBeginOfFile = beginOfFileRegex.test(patchCode);
   const hasEndOfFile = endOfFileRegex.test(patchCode);
   if (hasBeginOfFile) {
     processedPatchCode = patchCode.replace(beginOfFileRegex, '');
-    const patchTree = parser.parse(processedPatchCode);
-    if (!patchTree) {
-      throw new Error('Failed to parse patch code.');
-    }
-    const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
-      (t) => t.text !== '',
-    );
+    // We need to tokenize the processed patch code
+    // We filter out empty text tokens if any strategy produces them, though likely not needed for tiktoken
+    const patchTokens = strategy
+      .tokenize(processedPatchCode)
+      .filter((t) => t.text !== '' && t.text !== '\n');
     patchResult = handleBeginOfFilePatch(sourceTokens, patchTokens);
   } else if (hasEndOfFile) {
     processedPatchCode = patchCode.replace(endOfFileRegex, '');
-    const patchTree = parser.parse(processedPatchCode);
-    if (!patchTree) {
-      throw new Error('Failed to parse patch code.');
-    }
-    const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
-      (t) => t.text !== '',
-    );
+    const patchTokens = strategy
+      .tokenize(processedPatchCode)
+      .filter((t) => t.text !== '' && t.text !== '\n');
     patchResult = handleEndOfFilePatch(sourceTokens, patchTokens, sourceCode);
   } else {
     processedPatchCode = patchCode.trim();
-    const patchTree = parser.parse(processedPatchCode);
-    if (!patchTree) {
-      throw new Error('Failed to parse patch code.');
+    const patchTokens = strategy
+      .tokenize(processedPatchCode)
+      .filter((t) => t.text !== '');
+    // Heuristic: If using Tiktoken, we discard the first and last 3 tokens to improve matching resilience.
+    // This prevents issues where the LLM hallucinating extra delimiters or context at the edges of the snippet.
+    const isTiktoken = options.useTiktoken || !options.grammarPath;
+    if (
+      isTiktoken &&
+      patchTokens.length >= 6 &&
+      patchTokens.length < sourceTokens.length
+    ) {
+      const innerTokens = patchTokens.slice(2, -2);
+      patchResult = handleStandardPatch(sourceTokens, innerTokens);
+    } else {
+      patchResult = handleStandardPatch(sourceTokens, patchTokens);
     }
-    const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
-      (t) => t.text !== '',
-    );
-    patchResult = handleStandardPatch(sourceTokens, patchTokens);
   }
   // NOTE: replaceStart/End are byte offsets.
@@ -77,4 +107,4 @@ export async function applySnippetPatch(
   );
   return prefix + finalPatchContent + suffix;
-}
+}

package/backend/packages/tokenpatch/patcher.ts CHANGED Viewed

@@ -1,5 +1,9 @@
 import type { Token } from './types';
 import { findAllSequences, formatAnchor } from './tokens';
+import {
+  SPECIAL_PATCH_BEGIN_FILE_MARKER,
+  SPECIAL_PATCH_END_FILE_MARKER,
+} from '../../src/llm-orchestration/parser/parsing.constants';
 interface PatchResult {
   replaceStart: number;
@@ -13,6 +17,23 @@ interface SimplePatchResult {
   replaceEnd: number;
 }
+// Helper to prioritize "Ambiguous" errors over "Not found" errors
+const updateLastError = (
+  currentError: Error | null,
+  newError: Error,
+): Error => {
+  if (!currentError) return newError;
+  // If we already have an Ambiguous error, keep it unless the new one is also Ambiguous
+  // (Assuming Ambiguous is more useful/specific than "Not found")
+  const currentIsAmbiguous = currentError.message.includes('Ambiguous');
+  const newIsAmbiguous = newError.message.includes('Ambiguous');
+  if (currentIsAmbiguous && !newIsAmbiguous) {
+    return currentError;
+  }
+  return newError;
+};
 // Internal helper for the original matching logic
 function _findBeginOfFilePatchLocation(
   sourceTokens: Token[],
@@ -36,7 +57,10 @@ function _findBeginOfFilePatchLocation(
     if (indices.length > 1) {
       const formattedAnchor = formatAnchor(suffixAnchor);
       const locations = indices
-        .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
+        .map(
+          (i) =>
+            `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
+        )
         .join(', ');
       lastError = `Ambiguous suffix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
     }
@@ -60,7 +84,9 @@ export function handleBeginOfFilePatch(
   originalPatchTokens: Token[],
 ): PatchResult {
   if (originalPatchTokens.length === 0) {
-    throw new Error('Patch is empty after removing @begin-of-file marker.');
+    throw new Error(
+      `Patch is empty after removing ${SPECIAL_PATCH_BEGIN_FILE_MARKER} marker.`,
+    );
   }
   let patchAttempt = [...originalPatchTokens];
@@ -77,13 +103,13 @@ export function handleBeginOfFilePatch(
       const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
       return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
     } catch (e) {
-      lastError = e as Error;
+      lastError = updateLastError(lastError, e as Error);
       patchAttempt = patchAttempt.slice(1); // Trim one token from the beginning
     }
   }
   throw new Error(
-    `Failed to apply @begin-of-file patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
+    `Failed to apply ${SPECIAL_PATCH_BEGIN_FILE_MARKER} patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
   );
 }
@@ -110,7 +136,10 @@ function _findEndOfFilePatchLocation(
     if (indices.length > 1) {
       const formattedAnchor = formatAnchor(prefixAnchor);
       const locations = indices
-        .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
+        .map(
+          (i) =>
+            `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
+        )
         .join(', ');
       lastError = `Ambiguous prefix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
     }
@@ -134,7 +163,9 @@ export function handleEndOfFilePatch(
   sourceCode: string,
 ): PatchResult {
   if (originalPatchTokens.length === 0) {
-    throw new Error('Patch is empty after removing @end-of-file marker.');
+    throw new Error(
+      `Patch is empty after removing ${SPECIAL_PATCH_END_FILE_MARKER} marker.`,
+    );
   }
   let patchAttempt = [...originalPatchTokens];
@@ -152,22 +183,26 @@ export function handleEndOfFilePatch(
       const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
       return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
     } catch (e) {
-      lastError = e as Error;
+      lastError = updateLastError(lastError, e as Error);
       patchAttempt = patchAttempt.slice(0, -1); // Trim one token from the end
     }
   }
   throw new Error(
-    `Failed to apply @end-of-file patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
+    `Failed to apply ${SPECIAL_PATCH_END_FILE_MARKER} patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
   );
 }
-// Internal helper for the original matching logic
-function _findStandardPatchLocation(
+interface PrefixResult {
+  prefixAnchor: Token[];
+  prefixIndex: number;
+  replaceStart: number;
+}
+function _findPrefixLocation(
   sourceTokens: Token[],
   patchTokens: Token[],
-): SimplePatchResult {
-  // 1. Find smallest unique prefix
+): PrefixResult {
   let prefixAnchor: Token[] | null = null;
   let prefixIndex: number | null = null;
   let bestPrefixError: string | null = null;
@@ -185,7 +220,10 @@ function _findStandardPatchLocation(
     if (prefixIndices.length > 1) {
       const formatted = formatAnchor(currentPrefix);
       const locations = prefixIndices
-        .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
+        .map(
+          (i) =>
+            `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
+        )
         .join(', ');
       bestPrefixError = `Ambiguous prefix anchor. The sequence "${formatted}" was found at ${prefixIndices.length} locations: ${locations}.`;
     }
@@ -199,12 +237,24 @@ function _findStandardPatchLocation(
   }
   if (!prefixAnchor || prefixIndex === null) {
-    throw new Error(
-      bestPrefixError || 'Could not find a unique prefix anchor.',
-    );
+    throw new Error(bestPrefixError || 'Could not find a unique prefix anchor.');
   }
-  // 2. Find smallest unique suffix after prefix
+  const replaceStart = sourceTokens[prefixIndex].startIndex;
+  return { prefixAnchor, prefixIndex, replaceStart };
+}
+interface SuffixResult {
+  replaceEnd: number;
+}
+function _findSuffixLocation(
+  sourceTokens: Token[],
+  patchTokens: Token[],
+  prefixAnchor: Token[],
+  prefixIndex: number,
+): SuffixResult {
   let suffixAnchor: Token[] | null = null;
   let suffixIndex: number | null = null;
   let bestSuffixError: string | null = null;
@@ -231,7 +281,9 @@ function _findStandardPatchLocation(
         .map(
           (i) =>
             `line ${
-              sourceTokens[searchStartIndex + i].startPosition.row + 1
+              sourceTokens[searchStartIndex + i].startPosition?.row
+                ? sourceTokens[searchStartIndex + i].startPosition.row + 1
+                : '?'
             }`,
         )
         .join(', ');
@@ -243,7 +295,9 @@ function _findStandardPatchLocation(
       throw new Error(bestSuffixError);
     }
     const prefixLocation = `line ${
-      sourceTokens[prefixIndex].startPosition.row + 1
+      sourceTokens[prefixIndex].startPosition?.row
+        ? sourceTokens[prefixIndex].startPosition.row + 1
+        : '?'
     }`;
     const formattedPrefix = formatAnchor(prefixAnchor);
     const smallestSuffix = formatAnchor(
@@ -256,12 +310,10 @@ function _findStandardPatchLocation(
     );
   }
-  // 3. Apply patch
-  const replaceStart = sourceTokens[prefixIndex].startIndex;
   const replaceEnd =
     sourceTokens[suffixIndex + suffixAnchor.length - 1].endIndex;
-  return { replaceStart, replaceEnd };
+  return { replaceEnd };
 }
 export function handleStandardPatch(
@@ -274,27 +326,64 @@ export function handleStandardPatch(
     );
   }
-  let patchAttempt = [...originalPatchTokens];
+  let startTrim = 0;
+  const endTrim = 0;
   let lastError: Error | null = null;
-  // Outer loop for trimming tokens from both ends
+  let prefixInfo: PrefixResult | null = null;
+  // Loop 1: Find Prefix (trim from start)
+  let patchAttempt = [...originalPatchTokens];
   while (patchAttempt.length >= 2) {
     try {
-      const { replaceStart, replaceEnd } = _findStandardPatchLocation(
+      prefixInfo = _findPrefixLocation(sourceTokens, patchAttempt);
+      // Success finding prefix
+      startTrim = originalPatchTokens.length - patchAttempt.length;
+      break;
+    } catch (e) {
+      lastError = updateLastError(lastError, e as Error);
+      // Trim one token from the start
+      patchAttempt = patchAttempt.slice(1);
+    }
+  }
+  if (!prefixInfo) {
+    throw new Error(
+      `Failed to apply patch. Could not find a unique prefix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
+    );
+  }
+  // Loop 2: Find Suffix (trim from end)
+  // Reset patchAttempt to start from the found prefix startTrim, but allow trimming end
+  patchAttempt = originalPatchTokens.slice(startTrim);
+  lastError = null; // Reset last error for suffix search phase
+  while (patchAttempt.length >= prefixInfo.prefixAnchor.length + 1) {
+    // Need at least prefix + 1 token? Or just prefix + suffix?
+    try {
+      const { replaceEnd } = _findSuffixLocation(
         sourceTokens,
         patchAttempt,
+        prefixInfo.prefixAnchor,
+        prefixInfo.prefixIndex,
       );
+      // Success finding suffix
       const patchInsertStart = patchAttempt[0].startIndex;
       const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
-      return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
+      return {
+        replaceStart: prefixInfo.replaceStart,
+        replaceEnd,
+        patchInsertStart,
+        patchInsertEnd,
+      };
     } catch (e) {
-      lastError = e as Error;
-      // Trim one token from the start and one from the end for the next attempt
-      patchAttempt = patchAttempt.slice(1, -1);
+      lastError = updateLastError(lastError, e as Error);
+      // Trim one token from the end
+      patchAttempt = patchAttempt.slice(0, -1);
     }
   }
   throw new Error(
-    `Failed to apply patch. Could not find a unique anchor in the source file, even after trimming ambiguous tokens. Last known error: ${lastError?.message}`,
+    `Failed to apply patch. Could not find a unique suffix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
   );
 }

package/backend/packages/tokenpatch/strategies/tiktoken-tokenizer.ts ADDED Viewed

@@ -0,0 +1,35 @@
+import { getEncoding } from 'js-tiktoken';
+import { Token } from '../types';
+import { TokenizerStrategy } from '../tokenizer.interface';
+export class TiktokenTokenizer implements TokenizerStrategy {
+  // Use cl100k_base (GPT-4) as the standard encoding
+  private enc = getEncoding('cl100k_base');
+  tokenize(content: string): Token[] {
+    const tokens: Token[] = [];
+    const encoded = this.enc.encode(content);
+    let currentIndex = 0;
+    // Iterate through token IDs, decode them individually to get text and length.
+    // This allows us to reconstruct the offsets (startIndex/endIndex).
+    for (const tokenId of encoded) {
+        // decoding a single token is the only way to get its exact text representation
+        // to map back to the source string indices.
+        const text = this.enc.decode([tokenId]);
+        const length = text.length;
+        tokens.push({
+            text,
+            type: 'bpe',
+            startIndex: currentIndex,
+            endIndex: currentIndex + length,
+            // startPosition is not calculated for Tiktoken strategy as it's computationally expensive
+            // and not strictly required for the patching algorithm which relies on text matching.
+        });
+        currentIndex += length;
+    }
+    return tokens;
+  }
+}