repoburg 1.3.11 → 1.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/backend/dist/packages/tokenpatch/index.d.ts +5 -1
  2. package/backend/dist/packages/tokenpatch/index.js +42 -24
  3. package/backend/dist/packages/tokenpatch/index.js.map +1 -1
  4. package/backend/dist/packages/tokenpatch/patcher.js +62 -20
  5. package/backend/dist/packages/tokenpatch/patcher.js.map +1 -1
  6. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.d.ts +6 -0
  7. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js +28 -0
  8. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js.map +1 -0
  9. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.d.ts +9 -0
  10. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js +36 -0
  11. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js.map +1 -0
  12. package/backend/dist/packages/tokenpatch/tokenizer.interface.d.ts +4 -0
  13. package/backend/dist/packages/tokenpatch/tokenizer.interface.js +3 -0
  14. package/backend/dist/packages/tokenpatch/tokenizer.interface.js.map +1 -0
  15. package/backend/dist/packages/tokenpatch/tokens.d.ts +0 -2
  16. package/backend/dist/packages/tokenpatch/tokens.js +4 -23
  17. package/backend/dist/packages/tokenpatch/tokens.js.map +1 -1
  18. package/backend/dist/packages/tokenpatch/types.d.ts +2 -2
  19. package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js +130 -51
  20. package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js.map +1 -1
  21. package/backend/dist/src/llm-orchestration/parser/parsing.constants.d.ts +2 -0
  22. package/backend/dist/src/llm-orchestration/parser/parsing.constants.js +3 -1
  23. package/backend/dist/src/llm-orchestration/parser/parsing.constants.js.map +1 -1
  24. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.d.ts +1 -1
  25. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js +44 -55
  26. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js.map +1 -1
  27. package/backend/dist/tsconfig.build.tsbuildinfo +1 -1
  28. package/backend/packages/tokenpatch/index.spec.ts +55 -37
  29. package/backend/packages/tokenpatch/index.ts +61 -31
  30. package/backend/packages/tokenpatch/patcher.ts +119 -30
  31. package/backend/packages/tokenpatch/strategies/tiktoken-tokenizer.ts +35 -0
  32. package/backend/packages/tokenpatch/strategies/tree-sitter-tokenizer.ts +37 -0
  33. package/backend/packages/tokenpatch/tokenizer.interface.ts +5 -0
  34. package/backend/packages/tokenpatch/tokens.ts +10 -28
  35. package/backend/packages/tokenpatch/types.ts +4 -4
  36. package/package.json +2 -1
@@ -1,15 +1,39 @@
1
1
  import { applySnippetPatch } from './index';
2
2
  import * as path from 'path';
3
+ import {
4
+ SPECIAL_PATCH_BEGIN_FILE_MARKER,
5
+ SPECIAL_PATCH_END_FILE_MARKER,
6
+ } from '../../src/llm-orchestration/parser/parsing.constants';
3
7
 
4
8
  const TS_WASM_PATH = path.join(
5
9
  __dirname,
6
10
  './grammar/tree-sitter-typescript.wasm',
7
11
  );
8
12
 
9
- const TSX_WASM_PATH = path.join(__dirname, './grammar/tree-sitter-tsx.wasm');
10
-
11
13
  // Helper to normalize whitespace for robust comparison
12
- const normalize = (str: string) => str.replace(/\s+/g, ' ').trim();
14
+ const normalize = (str: string) =>
15
+ str
16
+ .replace(/}/g, '} ')
17
+ .replace(/\s+/g, ' ')
18
+ .trim();
19
+
20
+ const runTest = async (
21
+ sourceCode: string,
22
+ patchCode: string,
23
+ expectedResult: string,
24
+ ) => {
25
+ const resultTiktoken = await applySnippetPatch(sourceCode, patchCode, {
26
+ useTiktoken: true,
27
+ });
28
+ expect(normalize(resultTiktoken)).toEqual(normalize(expectedResult));
29
+
30
+ const resultTreeSitter = await applySnippetPatch(
31
+ sourceCode,
32
+ patchCode,
33
+ TS_WASM_PATH,
34
+ );
35
+ expect(normalize(resultTreeSitter)).toEqual(normalize(expectedResult));
36
+ };
13
37
 
14
38
  describe('applySnippetPatch', () => {
15
39
  it('should replace a method body in a class by automatically finding anchor size', async () => {
@@ -35,12 +59,11 @@ class Greeter {
35
59
  // A new implementation
36
60
  return "Hello, TypeScript!";
37
61
  }
38
- fc(){ return 42; }
62
+ fc(){ return 42; }
39
63
 
40
64
  }
41
65
  `;
42
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
43
- expect(normalize(result)).toEqual(normalize(expectedResult));
66
+ await runTest(sourceCode, patchCode, expectedResult);
44
67
  });
45
68
 
46
69
  it('should replace a data structure definition', async () => {
@@ -63,8 +86,7 @@ interface MyData {
63
86
  }
64
87
  console.log("hello");
65
88
  `;
66
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
67
- expect(normalize(result)).toEqual(normalize(expectedResult));
89
+ await runTest(sourceCode, patchCode, expectedResult);
68
90
  });
69
91
 
70
92
  it('should replace a full function definition', async () => {
@@ -95,11 +117,10 @@ function calculate() {
95
117
 
96
118
  export { calculate };
97
119
  `;
98
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
99
- expect(normalize(result)).toEqual(normalize(expectedResult));
120
+ await runTest(sourceCode, patchCode, expectedResult);
100
121
  });
101
122
 
102
- it('should replace a beginning of the file @begin-of-file', async () => {
123
+ it(`should replace a beginning of the file ${SPECIAL_PATCH_BEGIN_FILE_MARKER}`, async () => {
103
124
  const sourceCode = `
104
125
  import { ModuleA } from './moduleA';
105
126
  import { ModuleB } from './moduleB';
@@ -107,7 +128,7 @@ import { ModuleB } from './moduleB';
107
128
  console.log('starting up');
108
129
  `;
109
130
  const patchCode = `
110
- // @begin-of-file
131
+ // ${SPECIAL_PATCH_BEGIN_FILE_MARKER}
111
132
  import groupBy from 'lodash';
112
133
  import { ModuleA } from './moduleA';
113
134
  `;
@@ -118,11 +139,10 @@ import { ModuleB } from './moduleB';
118
139
 
119
140
  console.log('starting up');
120
141
  `;
121
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
122
- expect(normalize(result)).toEqual(normalize(expectedResult));
142
+ await runTest(sourceCode, patchCode, expectedResult);
123
143
  });
124
144
 
125
- it('should handle @begin-of-file marker with variations', async () => {
145
+ it(`should handle ${SPECIAL_PATCH_BEGIN_FILE_MARKER} marker with variations`, async () => {
126
146
  const sourceCode = `
127
147
  import { ModuleA } from './moduleA';
128
148
  import { ModuleB } from './moduleB';
@@ -130,7 +150,7 @@ import { ModuleB } from './moduleB';
130
150
  console.log('starting up');
131
151
  `;
132
152
  const patchCode = `
133
- //@begin-of-file extra text
153
+ //${SPECIAL_PATCH_BEGIN_FILE_MARKER} extra text
134
154
  import groupBy from 'lodash';
135
155
  import { ModuleA } from './moduleA';
136
156
  `;
@@ -141,11 +161,10 @@ import { ModuleB } from './moduleB';
141
161
 
142
162
  console.log('starting up');
143
163
  `;
144
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
145
- expect(normalize(result)).toEqual(normalize(expectedResult));
164
+ await runTest(sourceCode, patchCode, expectedResult);
146
165
  });
147
166
 
148
- it('should replace a end of the file @end-of-file', async () => {
167
+ it(`should replace a end of the file ${SPECIAL_PATCH_END_FILE_MARKER}`, async () => {
149
168
  const sourceCode = `
150
169
  import fs from 'fs';
151
170
 
@@ -158,7 +177,7 @@ export { calculate };
158
177
  const patchCode = `
159
178
  export { calculate };
160
179
  export { sum };
161
- // @end-of-file
180
+ // ${SPECIAL_PATCH_END_FILE_MARKER}
162
181
  `;
163
182
  const expectedResult = `
164
183
  import fs from 'fs';
@@ -170,11 +189,10 @@ function calculate() {
170
189
  export { calculate };
171
190
  export { sum };
172
191
  `;
173
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
174
- expect(normalize(result)).toEqual(normalize(expectedResult));
192
+ await runTest(sourceCode, patchCode, expectedResult);
175
193
  });
176
194
 
177
- it('should handle @end-of-file marker with variations', async () => {
195
+ it(`should handle ${SPECIAL_PATCH_END_FILE_MARKER} marker with variations`, async () => {
178
196
  const sourceCode = `
179
197
  import fs from 'fs';
180
198
 
@@ -187,7 +205,7 @@ export { calculate };
187
205
  const patchCode = `
188
206
  export { calculate };
189
207
  export { sum };
190
- //@end-of-file some extra text
208
+ //${SPECIAL_PATCH_END_FILE_MARKER} some extra text
191
209
  `;
192
210
  const expectedResult = `
193
211
  import fs from 'fs';
@@ -199,11 +217,11 @@ function calculate() {
199
217
  export { calculate };
200
218
  export { sum };
201
219
  `;
202
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
203
- expect(normalize(result)).toEqual(normalize(expectedResult));
220
+ await runTest(sourceCode, patchCode, expectedResult);
204
221
  });
205
222
  it('should succeed with dynamic anchor sizing when initial anchors are ambiguous', async () => {
206
223
  const sourceCode = `
224
+ import dotenv from 'dotenv';
207
225
  const config = {
208
226
  port: 8080,
209
227
  host: 'localhost',
@@ -219,6 +237,7 @@ const config2 = {
219
237
  };
220
238
  `;
221
239
  const patchCode = `
240
+ import dotenv from 'dotenv';
222
241
  const config = {
223
242
  port: 9000,
224
243
  host: 'localhost',
@@ -228,6 +247,7 @@ const config = {
228
247
  function connect() {
229
248
  `;
230
249
  const expectedResult = `
250
+ import dotenv from 'dotenv';
231
251
  const config = {
232
252
  port: 9000,
233
253
  host: 'localhost',
@@ -243,8 +263,7 @@ const config2 = {
243
263
  host: 'remote',
244
264
  };
245
265
  `;
246
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
247
- expect(normalize(result)).toEqual(normalize(expectedResult));
266
+ await runTest(sourceCode, patchCode, expectedResult);
248
267
  });
249
268
 
250
269
  it('should handle partial type definitions by trimming ambiguous tokens', async () => {
@@ -275,6 +294,8 @@ const config2 = {
275
294
  */
276
295
  combinedStatusNew2?: everest_appserver_primitive_Text | null;
277
296
  /**
297
+ * Contains Billable Expense line
298
+ */
278
299
  `;
279
300
  const expectedResult = `
280
301
  /**
@@ -296,8 +317,7 @@ const config2 = {
296
317
  */
297
318
  containsPrepaidItem?: everest_appserver_primitive_TrueFalse | null;
298
319
  `;
299
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
300
- expect(normalize(result)).toEqual(normalize(expectedResult));
320
+ await runTest(sourceCode, patchCode, expectedResult);
301
321
  });
302
322
 
303
323
  it('should patch TSX correctly by trimming tokens from invalid partial snippets', async () => {
@@ -408,7 +428,10 @@ export function TransactionsPage() {
408
428
  <DataTable
409
429
  columns={billColumns}
410
430
  data={bills}
411
- onRowClick={(row) => selectMatchable(row.id)}
431
+ onRowClick={(row) => {
432
+ console.log(\`[UI] Selected matchable bill ID\`)
433
+ selectMatchable(row.id)
434
+ }}
412
435
  selectedId={selectedMatchableId}
413
436
  />
414
437
  </div>
@@ -569,11 +592,6 @@ export function TransactionsPage() {
569
592
  )
570
593
  }
571
594
  `;
572
- const result = await applySnippetPatch(
573
- sourceCode,
574
- patchCode,
575
- TSX_WASM_PATH,
576
- );
577
- expect(normalize(result)).toEqual(normalize(expectedResult));
595
+ await runTest(sourceCode, patchCode, expectedResult);
578
596
  });
579
597
  });
@@ -1,23 +1,48 @@
1
1
  import { initializeParser } from './parser';
2
- import { collectTokens } from './tokens';
3
2
  import {
4
3
  handleBeginOfFilePatch,
5
4
  handleEndOfFilePatch,
6
5
  handleStandardPatch,
7
6
  } from './patcher';
7
+ import {
8
+ SPECIAL_PATCH_BEGIN_FILE_MARKER,
9
+ SPECIAL_PATCH_END_FILE_MARKER,
10
+ } from '../../src/llm-orchestration/parser/parsing.constants';
11
+ import { TokenizerStrategy } from './tokenizer.interface';
12
+ import { TreeSitterTokenizer } from './strategies/tree-sitter-tokenizer';
13
+ import { TiktokenTokenizer } from './strategies/tiktoken-tokenizer';
14
+
15
+ export interface ApplyPatchOptions {
16
+ grammarPath?: string;
17
+ useTiktoken?: boolean;
18
+ }
8
19
 
9
20
  export async function applySnippetPatch(
10
21
  sourceCode: string,
11
22
  patchCode: string,
12
- grammarPath: string,
23
+ optionsOrGrammarPath: string | ApplyPatchOptions,
13
24
  ): Promise<string> {
14
- const parser = await initializeParser(grammarPath);
25
+ let strategy: TokenizerStrategy;
26
+ let options: ApplyPatchOptions;
27
+
28
+ // Backward compatibility for when the 3rd argument was just grammarPath string
29
+ if (typeof optionsOrGrammarPath === 'string') {
30
+ options = { grammarPath: optionsOrGrammarPath };
31
+ } else {
32
+ options = optionsOrGrammarPath;
33
+ }
15
34
 
16
- const sourceTree = parser.parse(sourceCode);
17
- if (!sourceTree) {
18
- throw new Error('Failed to parse source code.');
35
+ if (options.useTiktoken) {
36
+ strategy = new TiktokenTokenizer();
37
+ } else if (options.grammarPath) {
38
+ const parser = await initializeParser(options.grammarPath);
39
+ strategy = new TreeSitterTokenizer(parser);
40
+ } else {
41
+ // Default to Tiktoken if no grammar path is provided
42
+ strategy = new TiktokenTokenizer();
19
43
  }
20
- const sourceTokens = collectTokens(sourceTree, sourceCode);
44
+
45
+ const sourceTokens = strategy.tokenize(sourceCode);
21
46
 
22
47
  let patchResult: {
23
48
  replaceStart: number;
@@ -27,42 +52,47 @@ export async function applySnippetPatch(
27
52
  };
28
53
  let processedPatchCode = patchCode;
29
54
 
30
- const beginOfFileRegex = /\/\/\s*@begin-of-file.*/;
31
- const endOfFileRegex = /\/\/\s*@end-of-file.*/;
55
+ const beginOfFileRegex = new RegExp(
56
+ `//\\s*${SPECIAL_PATCH_BEGIN_FILE_MARKER}.*`,
57
+ );
58
+ const endOfFileRegex = new RegExp(`//\\s*${SPECIAL_PATCH_END_FILE_MARKER}.*`);
32
59
 
33
60
  const hasBeginOfFile = beginOfFileRegex.test(patchCode);
34
61
  const hasEndOfFile = endOfFileRegex.test(patchCode);
35
62
 
36
63
  if (hasBeginOfFile) {
37
64
  processedPatchCode = patchCode.replace(beginOfFileRegex, '');
38
- const patchTree = parser.parse(processedPatchCode);
39
- if (!patchTree) {
40
- throw new Error('Failed to parse patch code.');
41
- }
42
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
43
- (t) => t.text !== '',
44
- );
65
+ // We need to tokenize the processed patch code
66
+ // We filter out empty text tokens if any strategy produces them, though likely not needed for tiktoken
67
+ const patchTokens = strategy
68
+ .tokenize(processedPatchCode)
69
+ .filter((t) => t.text !== '' && t.text !== '\n');
45
70
  patchResult = handleBeginOfFilePatch(sourceTokens, patchTokens);
46
71
  } else if (hasEndOfFile) {
47
72
  processedPatchCode = patchCode.replace(endOfFileRegex, '');
48
- const patchTree = parser.parse(processedPatchCode);
49
- if (!patchTree) {
50
- throw new Error('Failed to parse patch code.');
51
- }
52
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
53
- (t) => t.text !== '',
54
- );
73
+ const patchTokens = strategy
74
+ .tokenize(processedPatchCode)
75
+ .filter((t) => t.text !== '' && t.text !== '\n');
55
76
  patchResult = handleEndOfFilePatch(sourceTokens, patchTokens, sourceCode);
56
77
  } else {
57
78
  processedPatchCode = patchCode.trim();
58
- const patchTree = parser.parse(processedPatchCode);
59
- if (!patchTree) {
60
- throw new Error('Failed to parse patch code.');
79
+ const patchTokens = strategy
80
+ .tokenize(processedPatchCode)
81
+ .filter((t) => t.text !== '');
82
+
83
+ // Heuristic: If using Tiktoken, we discard the first and last 3 tokens to improve matching resilience.
84
+ // This prevents issues where the LLM hallucinating extra delimiters or context at the edges of the snippet.
85
+ const isTiktoken = options.useTiktoken || !options.grammarPath;
86
+ if (
87
+ isTiktoken &&
88
+ patchTokens.length >= 6 &&
89
+ patchTokens.length < sourceTokens.length
90
+ ) {
91
+ const innerTokens = patchTokens.slice(2, -2);
92
+ patchResult = handleStandardPatch(sourceTokens, innerTokens);
93
+ } else {
94
+ patchResult = handleStandardPatch(sourceTokens, patchTokens);
61
95
  }
62
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
63
- (t) => t.text !== '',
64
- );
65
- patchResult = handleStandardPatch(sourceTokens, patchTokens);
66
96
  }
67
97
 
68
98
  // NOTE: replaceStart/End are byte offsets.
@@ -77,4 +107,4 @@ export async function applySnippetPatch(
77
107
  );
78
108
 
79
109
  return prefix + finalPatchContent + suffix;
80
- }
110
+ }
@@ -1,5 +1,9 @@
1
1
  import type { Token } from './types';
2
2
  import { findAllSequences, formatAnchor } from './tokens';
3
+ import {
4
+ SPECIAL_PATCH_BEGIN_FILE_MARKER,
5
+ SPECIAL_PATCH_END_FILE_MARKER,
6
+ } from '../../src/llm-orchestration/parser/parsing.constants';
3
7
 
4
8
  interface PatchResult {
5
9
  replaceStart: number;
@@ -13,6 +17,23 @@ interface SimplePatchResult {
13
17
  replaceEnd: number;
14
18
  }
15
19
 
20
+ // Helper to prioritize "Ambiguous" errors over "Not found" errors
21
+ const updateLastError = (
22
+ currentError: Error | null,
23
+ newError: Error,
24
+ ): Error => {
25
+ if (!currentError) return newError;
26
+ // If we already have an Ambiguous error, keep it unless the new one is also Ambiguous
27
+ // (Assuming Ambiguous is more useful/specific than "Not found")
28
+ const currentIsAmbiguous = currentError.message.includes('Ambiguous');
29
+ const newIsAmbiguous = newError.message.includes('Ambiguous');
30
+
31
+ if (currentIsAmbiguous && !newIsAmbiguous) {
32
+ return currentError;
33
+ }
34
+ return newError;
35
+ };
36
+
16
37
  // Internal helper for the original matching logic
17
38
  function _findBeginOfFilePatchLocation(
18
39
  sourceTokens: Token[],
@@ -36,7 +57,10 @@ function _findBeginOfFilePatchLocation(
36
57
  if (indices.length > 1) {
37
58
  const formattedAnchor = formatAnchor(suffixAnchor);
38
59
  const locations = indices
39
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
60
+ .map(
61
+ (i) =>
62
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
63
+ )
40
64
  .join(', ');
41
65
  lastError = `Ambiguous suffix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
42
66
  }
@@ -60,7 +84,9 @@ export function handleBeginOfFilePatch(
60
84
  originalPatchTokens: Token[],
61
85
  ): PatchResult {
62
86
  if (originalPatchTokens.length === 0) {
63
- throw new Error('Patch is empty after removing @begin-of-file marker.');
87
+ throw new Error(
88
+ `Patch is empty after removing ${SPECIAL_PATCH_BEGIN_FILE_MARKER} marker.`,
89
+ );
64
90
  }
65
91
 
66
92
  let patchAttempt = [...originalPatchTokens];
@@ -77,13 +103,13 @@ export function handleBeginOfFilePatch(
77
103
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
78
104
  return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
79
105
  } catch (e) {
80
- lastError = e as Error;
106
+ lastError = updateLastError(lastError, e as Error);
81
107
  patchAttempt = patchAttempt.slice(1); // Trim one token from the beginning
82
108
  }
83
109
  }
84
110
 
85
111
  throw new Error(
86
- `Failed to apply @begin-of-file patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
112
+ `Failed to apply ${SPECIAL_PATCH_BEGIN_FILE_MARKER} patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
87
113
  );
88
114
  }
89
115
 
@@ -110,7 +136,10 @@ function _findEndOfFilePatchLocation(
110
136
  if (indices.length > 1) {
111
137
  const formattedAnchor = formatAnchor(prefixAnchor);
112
138
  const locations = indices
113
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
139
+ .map(
140
+ (i) =>
141
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
142
+ )
114
143
  .join(', ');
115
144
  lastError = `Ambiguous prefix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
116
145
  }
@@ -134,7 +163,9 @@ export function handleEndOfFilePatch(
134
163
  sourceCode: string,
135
164
  ): PatchResult {
136
165
  if (originalPatchTokens.length === 0) {
137
- throw new Error('Patch is empty after removing @end-of-file marker.');
166
+ throw new Error(
167
+ `Patch is empty after removing ${SPECIAL_PATCH_END_FILE_MARKER} marker.`,
168
+ );
138
169
  }
139
170
 
140
171
  let patchAttempt = [...originalPatchTokens];
@@ -152,22 +183,26 @@ export function handleEndOfFilePatch(
152
183
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
153
184
  return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
154
185
  } catch (e) {
155
- lastError = e as Error;
186
+ lastError = updateLastError(lastError, e as Error);
156
187
  patchAttempt = patchAttempt.slice(0, -1); // Trim one token from the end
157
188
  }
158
189
  }
159
190
 
160
191
  throw new Error(
161
- `Failed to apply @end-of-file patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
192
+ `Failed to apply ${SPECIAL_PATCH_END_FILE_MARKER} patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
162
193
  );
163
194
  }
164
195
 
165
- // Internal helper for the original matching logic
166
- function _findStandardPatchLocation(
196
+ interface PrefixResult {
197
+ prefixAnchor: Token[];
198
+ prefixIndex: number;
199
+ replaceStart: number;
200
+ }
201
+
202
+ function _findPrefixLocation(
167
203
  sourceTokens: Token[],
168
204
  patchTokens: Token[],
169
- ): SimplePatchResult {
170
- // 1. Find smallest unique prefix
205
+ ): PrefixResult {
171
206
  let prefixAnchor: Token[] | null = null;
172
207
  let prefixIndex: number | null = null;
173
208
  let bestPrefixError: string | null = null;
@@ -185,7 +220,10 @@ function _findStandardPatchLocation(
185
220
  if (prefixIndices.length > 1) {
186
221
  const formatted = formatAnchor(currentPrefix);
187
222
  const locations = prefixIndices
188
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
223
+ .map(
224
+ (i) =>
225
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
226
+ )
189
227
  .join(', ');
190
228
  bestPrefixError = `Ambiguous prefix anchor. The sequence "${formatted}" was found at ${prefixIndices.length} locations: ${locations}.`;
191
229
  }
@@ -199,12 +237,24 @@ function _findStandardPatchLocation(
199
237
  }
200
238
 
201
239
  if (!prefixAnchor || prefixIndex === null) {
202
- throw new Error(
203
- bestPrefixError || 'Could not find a unique prefix anchor.',
204
- );
240
+ throw new Error(bestPrefixError || 'Could not find a unique prefix anchor.');
205
241
  }
206
242
 
207
- // 2. Find smallest unique suffix after prefix
243
+ const replaceStart = sourceTokens[prefixIndex].startIndex;
244
+
245
+ return { prefixAnchor, prefixIndex, replaceStart };
246
+ }
247
+
248
+ interface SuffixResult {
249
+ replaceEnd: number;
250
+ }
251
+
252
+ function _findSuffixLocation(
253
+ sourceTokens: Token[],
254
+ patchTokens: Token[],
255
+ prefixAnchor: Token[],
256
+ prefixIndex: number,
257
+ ): SuffixResult {
208
258
  let suffixAnchor: Token[] | null = null;
209
259
  let suffixIndex: number | null = null;
210
260
  let bestSuffixError: string | null = null;
@@ -231,7 +281,9 @@ function _findStandardPatchLocation(
231
281
  .map(
232
282
  (i) =>
233
283
  `line ${
234
- sourceTokens[searchStartIndex + i].startPosition.row + 1
284
+ sourceTokens[searchStartIndex + i].startPosition?.row
285
+ ? sourceTokens[searchStartIndex + i].startPosition.row + 1
286
+ : '?'
235
287
  }`,
236
288
  )
237
289
  .join(', ');
@@ -243,7 +295,9 @@ function _findStandardPatchLocation(
243
295
  throw new Error(bestSuffixError);
244
296
  }
245
297
  const prefixLocation = `line ${
246
- sourceTokens[prefixIndex].startPosition.row + 1
298
+ sourceTokens[prefixIndex].startPosition?.row
299
+ ? sourceTokens[prefixIndex].startPosition.row + 1
300
+ : '?'
247
301
  }`;
248
302
  const formattedPrefix = formatAnchor(prefixAnchor);
249
303
  const smallestSuffix = formatAnchor(
@@ -256,12 +310,10 @@ function _findStandardPatchLocation(
256
310
  );
257
311
  }
258
312
 
259
- // 3. Apply patch
260
- const replaceStart = sourceTokens[prefixIndex].startIndex;
261
313
  const replaceEnd =
262
314
  sourceTokens[suffixIndex + suffixAnchor.length - 1].endIndex;
263
315
 
264
- return { replaceStart, replaceEnd };
316
+ return { replaceEnd };
265
317
  }
266
318
 
267
319
  export function handleStandardPatch(
@@ -274,27 +326,64 @@ export function handleStandardPatch(
274
326
  );
275
327
  }
276
328
 
277
- let patchAttempt = [...originalPatchTokens];
329
+ let startTrim = 0;
330
+ const endTrim = 0;
278
331
  let lastError: Error | null = null;
279
332
 
280
- // Outer loop for trimming tokens from both ends
333
+ let prefixInfo: PrefixResult | null = null;
334
+
335
+ // Loop 1: Find Prefix (trim from start)
336
+ let patchAttempt = [...originalPatchTokens];
281
337
  while (patchAttempt.length >= 2) {
282
338
  try {
283
- const { replaceStart, replaceEnd } = _findStandardPatchLocation(
339
+ prefixInfo = _findPrefixLocation(sourceTokens, patchAttempt);
340
+ // Success finding prefix
341
+ startTrim = originalPatchTokens.length - patchAttempt.length;
342
+ break;
343
+ } catch (e) {
344
+ lastError = updateLastError(lastError, e as Error);
345
+ // Trim one token from the start
346
+ patchAttempt = patchAttempt.slice(1);
347
+ }
348
+ }
349
+
350
+ if (!prefixInfo) {
351
+ throw new Error(
352
+ `Failed to apply patch. Could not find a unique prefix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
353
+ );
354
+ }
355
+
356
+ // Loop 2: Find Suffix (trim from end)
357
+ // Reset patchAttempt to start from the found prefix startTrim, but allow trimming end
358
+ patchAttempt = originalPatchTokens.slice(startTrim);
359
+ lastError = null; // Reset last error for suffix search phase
360
+
361
+ while (patchAttempt.length >= prefixInfo.prefixAnchor.length + 1) {
362
+ // Need at least prefix + 1 token? Or just prefix + suffix?
363
+ try {
364
+ const { replaceEnd } = _findSuffixLocation(
284
365
  sourceTokens,
285
366
  patchAttempt,
367
+ prefixInfo.prefixAnchor,
368
+ prefixInfo.prefixIndex,
286
369
  );
370
+ // Success finding suffix
287
371
  const patchInsertStart = patchAttempt[0].startIndex;
288
372
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
289
- return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
373
+ return {
374
+ replaceStart: prefixInfo.replaceStart,
375
+ replaceEnd,
376
+ patchInsertStart,
377
+ patchInsertEnd,
378
+ };
290
379
  } catch (e) {
291
- lastError = e as Error;
292
- // Trim one token from the start and one from the end for the next attempt
293
- patchAttempt = patchAttempt.slice(1, -1);
380
+ lastError = updateLastError(lastError, e as Error);
381
+ // Trim one token from the end
382
+ patchAttempt = patchAttempt.slice(0, -1);
294
383
  }
295
384
  }
296
385
 
297
386
  throw new Error(
298
- `Failed to apply patch. Could not find a unique anchor in the source file, even after trimming ambiguous tokens. Last known error: ${lastError?.message}`,
387
+ `Failed to apply patch. Could not find a unique suffix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
299
388
  );
300
389
  }
@@ -0,0 +1,35 @@
1
+ import { getEncoding } from 'js-tiktoken';
2
+ import { Token } from '../types';
3
+ import { TokenizerStrategy } from '../tokenizer.interface';
4
+
5
+ export class TiktokenTokenizer implements TokenizerStrategy {
6
+ // Use cl100k_base (GPT-4) as the standard encoding
7
+ private enc = getEncoding('cl100k_base');
8
+
9
+ tokenize(content: string): Token[] {
10
+ const tokens: Token[] = [];
11
+ const encoded = this.enc.encode(content);
12
+
13
+ let currentIndex = 0;
14
+
15
+ // Iterate through token IDs, decode them individually to get text and length.
16
+ // This allows us to reconstruct the offsets (startIndex/endIndex).
17
+ for (const tokenId of encoded) {
18
+ // decoding a single token is the only way to get its exact text representation
19
+ // to map back to the source string indices.
20
+ const text = this.enc.decode([tokenId]);
21
+ const length = text.length;
22
+
23
+ tokens.push({
24
+ text,
25
+ type: 'bpe',
26
+ startIndex: currentIndex,
27
+ endIndex: currentIndex + length,
28
+ // startPosition is not calculated for Tiktoken strategy as it's computationally expensive
29
+ // and not strictly required for the patching algorithm which relies on text matching.
30
+ });
31
+ currentIndex += length;
32
+ }
33
+ return tokens;
34
+ }
35
+ }