repoburg 1.3.12 → 1.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. package/backend/dist/packages/tokenpatch/index.d.ts +5 -1
  2. package/backend/dist/packages/tokenpatch/index.js +39 -22
  3. package/backend/dist/packages/tokenpatch/index.js.map +1 -1
  4. package/backend/dist/packages/tokenpatch/patcher.js +57 -16
  5. package/backend/dist/packages/tokenpatch/patcher.js.map +1 -1
  6. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.d.ts +6 -0
  7. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js +28 -0
  8. package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js.map +1 -0
  9. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.d.ts +9 -0
  10. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js +36 -0
  11. package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js.map +1 -0
  12. package/backend/dist/packages/tokenpatch/tokenizer.interface.d.ts +4 -0
  13. package/backend/dist/packages/tokenpatch/tokenizer.interface.js +3 -0
  14. package/backend/dist/packages/tokenpatch/tokenizer.interface.js.map +1 -0
  15. package/backend/dist/packages/tokenpatch/tokens.d.ts +0 -2
  16. package/backend/dist/packages/tokenpatch/tokens.js +4 -23
  17. package/backend/dist/packages/tokenpatch/tokens.js.map +1 -1
  18. package/backend/dist/packages/tokenpatch/types.d.ts +2 -2
  19. package/backend/dist/src/ai-actions/ai-actions.service.d.ts +3 -1
  20. package/backend/dist/src/ai-actions/ai-actions.service.js +4 -0
  21. package/backend/dist/src/ai-actions/ai-actions.service.js.map +1 -1
  22. package/backend/dist/src/llm-orchestration/action-handlers/dto/execute-shell.args.dto.d.ts +4 -0
  23. package/backend/dist/src/llm-orchestration/action-handlers/dto/execute-shell.args.dto.js +27 -0
  24. package/backend/dist/src/llm-orchestration/action-handlers/dto/execute-shell.args.dto.js.map +1 -0
  25. package/backend/dist/src/llm-orchestration/action-handlers/dto/write-todo.args.dto.d.ts +4 -0
  26. package/backend/dist/src/llm-orchestration/action-handlers/dto/write-todo.args.dto.js +31 -0
  27. package/backend/dist/src/llm-orchestration/action-handlers/dto/write-todo.args.dto.js.map +1 -0
  28. package/backend/dist/src/llm-orchestration/action-handlers/execute-shell.handler.d.ts +11 -0
  29. package/backend/dist/src/llm-orchestration/action-handlers/execute-shell.handler.js +111 -0
  30. package/backend/dist/src/llm-orchestration/action-handlers/execute-shell.handler.js.map +1 -0
  31. package/backend/dist/src/llm-orchestration/action-handlers/howto.handler.js +2 -0
  32. package/backend/dist/src/llm-orchestration/action-handlers/howto.handler.js.map +1 -1
  33. package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js +125 -47
  34. package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js.map +1 -1
  35. package/backend/dist/src/llm-orchestration/action-handlers/run-command.handler.d.ts +1 -0
  36. package/backend/dist/src/llm-orchestration/action-handlers/run-command.handler.js +12 -19
  37. package/backend/dist/src/llm-orchestration/action-handlers/run-command.handler.js.map +1 -1
  38. package/backend/dist/src/llm-orchestration/action-handlers/write-todo.handler.d.ts +13 -0
  39. package/backend/dist/src/llm-orchestration/action-handlers/write-todo.handler.js +116 -0
  40. package/backend/dist/src/llm-orchestration/action-handlers/write-todo.handler.js.map +1 -0
  41. package/backend/dist/src/llm-orchestration/llm-orchestration.module.js +4 -0
  42. package/backend/dist/src/llm-orchestration/llm-orchestration.module.js.map +1 -1
  43. package/backend/dist/src/seeding/data/system-prompts/default_master-agent.d.ts +1 -1
  44. package/backend/dist/src/seeding/data/system-prompts/default_master-agent.js +50 -493
  45. package/backend/dist/src/seeding/data/system-prompts/default_master-agent.js.map +1 -1
  46. package/backend/dist/tsconfig.build.tsbuildinfo +1 -1
  47. package/backend/packages/tokenpatch/index.spec.ts +44 -30
  48. package/backend/packages/tokenpatch/index.ts +54 -32
  49. package/backend/packages/tokenpatch/patcher.ts +107 -26
  50. package/backend/packages/tokenpatch/strategies/tiktoken-tokenizer.ts +35 -0
  51. package/backend/packages/tokenpatch/strategies/tree-sitter-tokenizer.ts +37 -0
  52. package/backend/packages/tokenpatch/tokenizer.interface.ts +5 -0
  53. package/backend/packages/tokenpatch/tokens.ts +10 -28
  54. package/backend/packages/tokenpatch/types.ts +4 -4
  55. package/package.json +2 -1
  56. package/backend/dist/src/seeding/data/system-prompts/carryover-agent.d.ts +0 -2
  57. package/backend/dist/src/seeding/data/system-prompts/carryover-agent.js +0 -107
  58. package/backend/dist/src/seeding/data/system-prompts/carryover-agent.js.map +0 -1
  59. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.d.ts +0 -2
  60. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js +0 -62
  61. package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js.map +0 -1
  62. package/backend/dist/src/seeding/data/system-prompts/experimental_master-agent.d.ts +0 -2
  63. package/backend/dist/src/seeding/data/system-prompts/experimental_master-agent.js +0 -633
  64. package/backend/dist/src/seeding/data/system-prompts/experimental_master-agent.js.map +0 -1
  65. package/backend/dist/src/seeding/data/system-prompts/experimental_patch_master-agent.d.ts +0 -2
  66. package/backend/dist/src/seeding/data/system-prompts/experimental_patch_master-agent.js +0 -463
  67. package/backend/dist/src/seeding/data/system-prompts/experimental_patch_master-agent.js.map +0 -1
  68. package/backend/dist/src/seeding/data/system-prompts/refactor-split.d.ts +0 -2
  69. package/backend/dist/src/seeding/data/system-prompts/refactor-split.js +0 -57
  70. package/backend/dist/src/seeding/data/system-prompts/refactor-split.js.map +0 -1
@@ -10,10 +10,30 @@ const TS_WASM_PATH = path.join(
10
10
  './grammar/tree-sitter-typescript.wasm',
11
11
  );
12
12
 
13
- const TSX_WASM_PATH = path.join(__dirname, './grammar/tree-sitter-tsx.wasm');
14
-
15
13
  // Helper to normalize whitespace for robust comparison
16
- const normalize = (str: string) => str.replace(/\s+/g, ' ').trim();
14
+ const normalize = (str: string) =>
15
+ str
16
+ .replace(/}/g, '} ')
17
+ .replace(/\s+/g, ' ')
18
+ .trim();
19
+
20
+ const runTest = async (
21
+ sourceCode: string,
22
+ patchCode: string,
23
+ expectedResult: string,
24
+ ) => {
25
+ const resultTiktoken = await applySnippetPatch(sourceCode, patchCode, {
26
+ useTiktoken: true,
27
+ });
28
+ expect(normalize(resultTiktoken)).toEqual(normalize(expectedResult));
29
+
30
+ const resultTreeSitter = await applySnippetPatch(
31
+ sourceCode,
32
+ patchCode,
33
+ TS_WASM_PATH,
34
+ );
35
+ expect(normalize(resultTreeSitter)).toEqual(normalize(expectedResult));
36
+ };
17
37
 
18
38
  describe('applySnippetPatch', () => {
19
39
  it('should replace a method body in a class by automatically finding anchor size', async () => {
@@ -39,12 +59,11 @@ class Greeter {
39
59
  // A new implementation
40
60
  return "Hello, TypeScript!";
41
61
  }
42
- fc(){ return 42; }
62
+ fc(){ return 42; }
43
63
 
44
64
  }
45
65
  `;
46
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
47
- expect(normalize(result)).toEqual(normalize(expectedResult));
66
+ await runTest(sourceCode, patchCode, expectedResult);
48
67
  });
49
68
 
50
69
  it('should replace a data structure definition', async () => {
@@ -67,8 +86,7 @@ interface MyData {
67
86
  }
68
87
  console.log("hello");
69
88
  `;
70
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
71
- expect(normalize(result)).toEqual(normalize(expectedResult));
89
+ await runTest(sourceCode, patchCode, expectedResult);
72
90
  });
73
91
 
74
92
  it('should replace a full function definition', async () => {
@@ -99,8 +117,7 @@ function calculate() {
99
117
 
100
118
  export { calculate };
101
119
  `;
102
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
103
- expect(normalize(result)).toEqual(normalize(expectedResult));
120
+ await runTest(sourceCode, patchCode, expectedResult);
104
121
  });
105
122
 
106
123
  it(`should replace a beginning of the file ${SPECIAL_PATCH_BEGIN_FILE_MARKER}`, async () => {
@@ -122,8 +139,7 @@ import { ModuleB } from './moduleB';
122
139
 
123
140
  console.log('starting up');
124
141
  `;
125
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
126
- expect(normalize(result)).toEqual(normalize(expectedResult));
142
+ await runTest(sourceCode, patchCode, expectedResult);
127
143
  });
128
144
 
129
145
  it(`should handle ${SPECIAL_PATCH_BEGIN_FILE_MARKER} marker with variations`, async () => {
@@ -145,8 +161,7 @@ import { ModuleB } from './moduleB';
145
161
 
146
162
  console.log('starting up');
147
163
  `;
148
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
149
- expect(normalize(result)).toEqual(normalize(expectedResult));
164
+ await runTest(sourceCode, patchCode, expectedResult);
150
165
  });
151
166
 
152
167
  it(`should replace a end of the file ${SPECIAL_PATCH_END_FILE_MARKER}`, async () => {
@@ -174,8 +189,7 @@ function calculate() {
174
189
  export { calculate };
175
190
  export { sum };
176
191
  `;
177
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
178
- expect(normalize(result)).toEqual(normalize(expectedResult));
192
+ await runTest(sourceCode, patchCode, expectedResult);
179
193
  });
180
194
 
181
195
  it(`should handle ${SPECIAL_PATCH_END_FILE_MARKER} marker with variations`, async () => {
@@ -203,11 +217,11 @@ function calculate() {
203
217
  export { calculate };
204
218
  export { sum };
205
219
  `;
206
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
207
- expect(normalize(result)).toEqual(normalize(expectedResult));
220
+ await runTest(sourceCode, patchCode, expectedResult);
208
221
  });
209
222
  it('should succeed with dynamic anchor sizing when initial anchors are ambiguous', async () => {
210
223
  const sourceCode = `
224
+ import dotenv from 'dotenv';
211
225
  const config = {
212
226
  port: 8080,
213
227
  host: 'localhost',
@@ -223,6 +237,7 @@ const config2 = {
223
237
  };
224
238
  `;
225
239
  const patchCode = `
240
+ import dotenv from 'dotenv';
226
241
  const config = {
227
242
  port: 9000,
228
243
  host: 'localhost',
@@ -232,6 +247,7 @@ const config = {
232
247
  function connect() {
233
248
  `;
234
249
  const expectedResult = `
250
+ import dotenv from 'dotenv';
235
251
  const config = {
236
252
  port: 9000,
237
253
  host: 'localhost',
@@ -247,8 +263,7 @@ const config2 = {
247
263
  host: 'remote',
248
264
  };
249
265
  `;
250
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
251
- expect(normalize(result)).toEqual(normalize(expectedResult));
266
+ await runTest(sourceCode, patchCode, expectedResult);
252
267
  });
253
268
 
254
269
  it('should handle partial type definitions by trimming ambiguous tokens', async () => {
@@ -279,6 +294,8 @@ const config2 = {
279
294
  */
280
295
  combinedStatusNew2?: everest_appserver_primitive_Text | null;
281
296
  /**
297
+ * Contains Billable Expense line
298
+ */
282
299
  `;
283
300
  const expectedResult = `
284
301
  /**
@@ -300,8 +317,7 @@ const config2 = {
300
317
  */
301
318
  containsPrepaidItem?: everest_appserver_primitive_TrueFalse | null;
302
319
  `;
303
- const result = await applySnippetPatch(sourceCode, patchCode, TS_WASM_PATH);
304
- expect(normalize(result)).toEqual(normalize(expectedResult));
320
+ await runTest(sourceCode, patchCode, expectedResult);
305
321
  });
306
322
 
307
323
  it('should patch TSX correctly by trimming tokens from invalid partial snippets', async () => {
@@ -412,7 +428,10 @@ export function TransactionsPage() {
412
428
  <DataTable
413
429
  columns={billColumns}
414
430
  data={bills}
415
- onRowClick={(row) => selectMatchable(row.id)}
431
+ onRowClick={(row) => {
432
+ console.log(\`[UI] Selected matchable bill ID\`)
433
+ selectMatchable(row.id)
434
+ }}
416
435
  selectedId={selectedMatchableId}
417
436
  />
418
437
  </div>
@@ -573,11 +592,6 @@ export function TransactionsPage() {
573
592
  )
574
593
  }
575
594
  `;
576
- const result = await applySnippetPatch(
577
- sourceCode,
578
- patchCode,
579
- TSX_WASM_PATH,
580
- );
581
- expect(normalize(result)).toEqual(normalize(expectedResult));
595
+ await runTest(sourceCode, patchCode, expectedResult);
582
596
  });
583
- });
597
+ });
@@ -1,5 +1,4 @@
1
1
  import { initializeParser } from './parser';
2
- import { collectTokens } from './tokens';
3
2
  import {
4
3
  handleBeginOfFilePatch,
5
4
  handleEndOfFilePatch,
@@ -9,19 +8,41 @@ import {
9
8
  SPECIAL_PATCH_BEGIN_FILE_MARKER,
10
9
  SPECIAL_PATCH_END_FILE_MARKER,
11
10
  } from '../../src/llm-orchestration/parser/parsing.constants';
11
+ import { TokenizerStrategy } from './tokenizer.interface';
12
+ import { TreeSitterTokenizer } from './strategies/tree-sitter-tokenizer';
13
+ import { TiktokenTokenizer } from './strategies/tiktoken-tokenizer';
14
+
15
+ export interface ApplyPatchOptions {
16
+ grammarPath?: string;
17
+ useTiktoken?: boolean;
18
+ }
12
19
 
13
20
  export async function applySnippetPatch(
14
21
  sourceCode: string,
15
22
  patchCode: string,
16
- grammarPath: string,
23
+ optionsOrGrammarPath: string | ApplyPatchOptions,
17
24
  ): Promise<string> {
18
- const parser = await initializeParser(grammarPath);
25
+ let strategy: TokenizerStrategy;
26
+ let options: ApplyPatchOptions;
19
27
 
20
- const sourceTree = parser.parse(sourceCode);
21
- if (!sourceTree) {
22
- throw new Error('Failed to parse source code.');
28
+ // Backward compatibility for when the 3rd argument was just grammarPath string
29
+ if (typeof optionsOrGrammarPath === 'string') {
30
+ options = { grammarPath: optionsOrGrammarPath };
31
+ } else {
32
+ options = optionsOrGrammarPath;
33
+ }
34
+
35
+ if (options.useTiktoken) {
36
+ strategy = new TiktokenTokenizer();
37
+ } else if (options.grammarPath) {
38
+ const parser = await initializeParser(options.grammarPath);
39
+ strategy = new TreeSitterTokenizer(parser);
40
+ } else {
41
+ // Default to Tiktoken if no grammar path is provided
42
+ strategy = new TiktokenTokenizer();
23
43
  }
24
- const sourceTokens = collectTokens(sourceTree, sourceCode);
44
+
45
+ const sourceTokens = strategy.tokenize(sourceCode);
25
46
 
26
47
  let patchResult: {
27
48
  replaceStart: number;
@@ -34,43 +55,44 @@ export async function applySnippetPatch(
34
55
  const beginOfFileRegex = new RegExp(
35
56
  `//\\s*${SPECIAL_PATCH_BEGIN_FILE_MARKER}.*`,
36
57
  );
37
- const endOfFileRegex = new RegExp(
38
- `//\\s*${SPECIAL_PATCH_END_FILE_MARKER}.*`,
39
- );
58
+ const endOfFileRegex = new RegExp(`//\\s*${SPECIAL_PATCH_END_FILE_MARKER}.*`);
40
59
 
41
60
  const hasBeginOfFile = beginOfFileRegex.test(patchCode);
42
61
  const hasEndOfFile = endOfFileRegex.test(patchCode);
43
62
 
44
63
  if (hasBeginOfFile) {
45
64
  processedPatchCode = patchCode.replace(beginOfFileRegex, '');
46
- const patchTree = parser.parse(processedPatchCode);
47
- if (!patchTree) {
48
- throw new Error('Failed to parse patch code.');
49
- }
50
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
51
- (t) => t.text !== '',
52
- );
65
+ // We need to tokenize the processed patch code
66
+ // We filter out empty text tokens if any strategy produces them, though likely not needed for tiktoken
67
+ const patchTokens = strategy
68
+ .tokenize(processedPatchCode)
69
+ .filter((t) => t.text !== '' && t.text !== '\n');
53
70
  patchResult = handleBeginOfFilePatch(sourceTokens, patchTokens);
54
71
  } else if (hasEndOfFile) {
55
72
  processedPatchCode = patchCode.replace(endOfFileRegex, '');
56
- const patchTree = parser.parse(processedPatchCode);
57
- if (!patchTree) {
58
- throw new Error('Failed to parse patch code.');
59
- }
60
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
61
- (t) => t.text !== '',
62
- );
73
+ const patchTokens = strategy
74
+ .tokenize(processedPatchCode)
75
+ .filter((t) => t.text !== '' && t.text !== '\n');
63
76
  patchResult = handleEndOfFilePatch(sourceTokens, patchTokens, sourceCode);
64
77
  } else {
65
78
  processedPatchCode = patchCode.trim();
66
- const patchTree = parser.parse(processedPatchCode);
67
- if (!patchTree) {
68
- throw new Error('Failed to parse patch code.');
79
+ const patchTokens = strategy
80
+ .tokenize(processedPatchCode)
81
+ .filter((t) => t.text !== '');
82
+
83
+ // Heuristic: If using Tiktoken, we discard the first and last 3 tokens to improve matching resilience.
84
+ // This prevents issues where the LLM hallucinating extra delimiters or context at the edges of the snippet.
85
+ const isTiktoken = options.useTiktoken || !options.grammarPath;
86
+ if (
87
+ isTiktoken &&
88
+ patchTokens.length >= 6 &&
89
+ patchTokens.length < sourceTokens.length
90
+ ) {
91
+ const innerTokens = patchTokens.slice(2, -2);
92
+ patchResult = handleStandardPatch(sourceTokens, innerTokens);
93
+ } else {
94
+ patchResult = handleStandardPatch(sourceTokens, patchTokens);
69
95
  }
70
- const patchTokens = collectTokens(patchTree, processedPatchCode).filter(
71
- (t) => t.text !== '',
72
- );
73
- patchResult = handleStandardPatch(sourceTokens, patchTokens);
74
96
  }
75
97
 
76
98
  // NOTE: replaceStart/End are byte offsets.
@@ -85,4 +107,4 @@ export async function applySnippetPatch(
85
107
  );
86
108
 
87
109
  return prefix + finalPatchContent + suffix;
88
- }
110
+ }
@@ -17,6 +17,23 @@ interface SimplePatchResult {
17
17
  replaceEnd: number;
18
18
  }
19
19
 
20
+ // Helper to prioritize "Ambiguous" errors over "Not found" errors
21
+ const updateLastError = (
22
+ currentError: Error | null,
23
+ newError: Error,
24
+ ): Error => {
25
+ if (!currentError) return newError;
26
+ // If we already have an Ambiguous error, keep it unless the new one is also Ambiguous
27
+ // (Assuming Ambiguous is more useful/specific than "Not found")
28
+ const currentIsAmbiguous = currentError.message.includes('Ambiguous');
29
+ const newIsAmbiguous = newError.message.includes('Ambiguous');
30
+
31
+ if (currentIsAmbiguous && !newIsAmbiguous) {
32
+ return currentError;
33
+ }
34
+ return newError;
35
+ };
36
+
20
37
  // Internal helper for the original matching logic
21
38
  function _findBeginOfFilePatchLocation(
22
39
  sourceTokens: Token[],
@@ -40,7 +57,10 @@ function _findBeginOfFilePatchLocation(
40
57
  if (indices.length > 1) {
41
58
  const formattedAnchor = formatAnchor(suffixAnchor);
42
59
  const locations = indices
43
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
60
+ .map(
61
+ (i) =>
62
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
63
+ )
44
64
  .join(', ');
45
65
  lastError = `Ambiguous suffix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
46
66
  }
@@ -83,7 +103,7 @@ export function handleBeginOfFilePatch(
83
103
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
84
104
  return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
85
105
  } catch (e) {
86
- lastError = e as Error;
106
+ lastError = updateLastError(lastError, e as Error);
87
107
  patchAttempt = patchAttempt.slice(1); // Trim one token from the beginning
88
108
  }
89
109
  }
@@ -116,7 +136,10 @@ function _findEndOfFilePatchLocation(
116
136
  if (indices.length > 1) {
117
137
  const formattedAnchor = formatAnchor(prefixAnchor);
118
138
  const locations = indices
119
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
139
+ .map(
140
+ (i) =>
141
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
142
+ )
120
143
  .join(', ');
121
144
  lastError = `Ambiguous prefix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
122
145
  }
@@ -160,7 +183,7 @@ export function handleEndOfFilePatch(
160
183
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
161
184
  return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
162
185
  } catch (e) {
163
- lastError = e as Error;
186
+ lastError = updateLastError(lastError, e as Error);
164
187
  patchAttempt = patchAttempt.slice(0, -1); // Trim one token from the end
165
188
  }
166
189
  }
@@ -170,12 +193,16 @@ export function handleEndOfFilePatch(
170
193
  );
171
194
  }
172
195
 
173
- // Internal helper for the original matching logic
174
- function _findStandardPatchLocation(
196
+ interface PrefixResult {
197
+ prefixAnchor: Token[];
198
+ prefixIndex: number;
199
+ replaceStart: number;
200
+ }
201
+
202
+ function _findPrefixLocation(
175
203
  sourceTokens: Token[],
176
204
  patchTokens: Token[],
177
- ): SimplePatchResult {
178
- // 1. Find smallest unique prefix
205
+ ): PrefixResult {
179
206
  let prefixAnchor: Token[] | null = null;
180
207
  let prefixIndex: number | null = null;
181
208
  let bestPrefixError: string | null = null;
@@ -193,7 +220,10 @@ function _findStandardPatchLocation(
193
220
  if (prefixIndices.length > 1) {
194
221
  const formatted = formatAnchor(currentPrefix);
195
222
  const locations = prefixIndices
196
- .map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
223
+ .map(
224
+ (i) =>
225
+ `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`,
226
+ )
197
227
  .join(', ');
198
228
  bestPrefixError = `Ambiguous prefix anchor. The sequence "${formatted}" was found at ${prefixIndices.length} locations: ${locations}.`;
199
229
  }
@@ -207,12 +237,24 @@ function _findStandardPatchLocation(
207
237
  }
208
238
 
209
239
  if (!prefixAnchor || prefixIndex === null) {
210
- throw new Error(
211
- bestPrefixError || 'Could not find a unique prefix anchor.',
212
- );
240
+ throw new Error(bestPrefixError || 'Could not find a unique prefix anchor.');
213
241
  }
214
242
 
215
- // 2. Find smallest unique suffix after prefix
243
+ const replaceStart = sourceTokens[prefixIndex].startIndex;
244
+
245
+ return { prefixAnchor, prefixIndex, replaceStart };
246
+ }
247
+
248
+ interface SuffixResult {
249
+ replaceEnd: number;
250
+ }
251
+
252
+ function _findSuffixLocation(
253
+ sourceTokens: Token[],
254
+ patchTokens: Token[],
255
+ prefixAnchor: Token[],
256
+ prefixIndex: number,
257
+ ): SuffixResult {
216
258
  let suffixAnchor: Token[] | null = null;
217
259
  let suffixIndex: number | null = null;
218
260
  let bestSuffixError: string | null = null;
@@ -239,7 +281,9 @@ function _findStandardPatchLocation(
239
281
  .map(
240
282
  (i) =>
241
283
  `line ${
242
- sourceTokens[searchStartIndex + i].startPosition.row + 1
284
+ sourceTokens[searchStartIndex + i].startPosition?.row
285
+ ? sourceTokens[searchStartIndex + i].startPosition.row + 1
286
+ : '?'
243
287
  }`,
244
288
  )
245
289
  .join(', ');
@@ -251,7 +295,9 @@ function _findStandardPatchLocation(
251
295
  throw new Error(bestSuffixError);
252
296
  }
253
297
  const prefixLocation = `line ${
254
- sourceTokens[prefixIndex].startPosition.row + 1
298
+ sourceTokens[prefixIndex].startPosition?.row
299
+ ? sourceTokens[prefixIndex].startPosition.row + 1
300
+ : '?'
255
301
  }`;
256
302
  const formattedPrefix = formatAnchor(prefixAnchor);
257
303
  const smallestSuffix = formatAnchor(
@@ -264,12 +310,10 @@ function _findStandardPatchLocation(
264
310
  );
265
311
  }
266
312
 
267
- // 3. Apply patch
268
- const replaceStart = sourceTokens[prefixIndex].startIndex;
269
313
  const replaceEnd =
270
314
  sourceTokens[suffixIndex + suffixAnchor.length - 1].endIndex;
271
315
 
272
- return { replaceStart, replaceEnd };
316
+ return { replaceEnd };
273
317
  }
274
318
 
275
319
  export function handleStandardPatch(
@@ -282,27 +326,64 @@ export function handleStandardPatch(
282
326
  );
283
327
  }
284
328
 
285
- let patchAttempt = [...originalPatchTokens];
329
+ let startTrim = 0;
330
+ const endTrim = 0;
286
331
  let lastError: Error | null = null;
287
332
 
288
- // Outer loop for trimming tokens from both ends
333
+ let prefixInfo: PrefixResult | null = null;
334
+
335
+ // Loop 1: Find Prefix (trim from start)
336
+ let patchAttempt = [...originalPatchTokens];
289
337
  while (patchAttempt.length >= 2) {
290
338
  try {
291
- const { replaceStart, replaceEnd } = _findStandardPatchLocation(
339
+ prefixInfo = _findPrefixLocation(sourceTokens, patchAttempt);
340
+ // Success finding prefix
341
+ startTrim = originalPatchTokens.length - patchAttempt.length;
342
+ break;
343
+ } catch (e) {
344
+ lastError = updateLastError(lastError, e as Error);
345
+ // Trim one token from the start
346
+ patchAttempt = patchAttempt.slice(1);
347
+ }
348
+ }
349
+
350
+ if (!prefixInfo) {
351
+ throw new Error(
352
+ `Failed to apply patch. Could not find a unique prefix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
353
+ );
354
+ }
355
+
356
+ // Loop 2: Find Suffix (trim from end)
357
+ // Reset patchAttempt to start from the found prefix startTrim, but allow trimming end
358
+ patchAttempt = originalPatchTokens.slice(startTrim);
359
+ lastError = null; // Reset last error for suffix search phase
360
+
361
+ while (patchAttempt.length >= prefixInfo.prefixAnchor.length + 1) {
362
+ // Need at least prefix + 1 token? Or just prefix + suffix?
363
+ try {
364
+ const { replaceEnd } = _findSuffixLocation(
292
365
  sourceTokens,
293
366
  patchAttempt,
367
+ prefixInfo.prefixAnchor,
368
+ prefixInfo.prefixIndex,
294
369
  );
370
+ // Success finding suffix
295
371
  const patchInsertStart = patchAttempt[0].startIndex;
296
372
  const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
297
- return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
373
+ return {
374
+ replaceStart: prefixInfo.replaceStart,
375
+ replaceEnd,
376
+ patchInsertStart,
377
+ patchInsertEnd,
378
+ };
298
379
  } catch (e) {
299
- lastError = e as Error;
300
- // Trim one token from the start and one from the end for the next attempt
301
- patchAttempt = patchAttempt.slice(1, -1);
380
+ lastError = updateLastError(lastError, e as Error);
381
+ // Trim one token from the end
382
+ patchAttempt = patchAttempt.slice(0, -1);
302
383
  }
303
384
  }
304
385
 
305
386
  throw new Error(
306
- `Failed to apply patch. Could not find a unique anchor in the source file, even after trimming ambiguous tokens. Last known error: ${lastError?.message}`,
387
+ `Failed to apply patch. Could not find a unique suffix anchor, even after trimming tokens. Last known error: ${lastError?.message}`,
307
388
  );
308
389
  }
@@ -0,0 +1,35 @@
1
+ import { getEncoding } from 'js-tiktoken';
2
+ import { Token } from '../types';
3
+ import { TokenizerStrategy } from '../tokenizer.interface';
4
+
5
+ export class TiktokenTokenizer implements TokenizerStrategy {
6
+ // Use cl100k_base (GPT-4) as the standard encoding
7
+ private enc = getEncoding('cl100k_base');
8
+
9
+ tokenize(content: string): Token[] {
10
+ const tokens: Token[] = [];
11
+ const encoded = this.enc.encode(content);
12
+
13
+ let currentIndex = 0;
14
+
15
+ // Iterate through token IDs, decode them individually to get text and length.
16
+ // This allows us to reconstruct the offsets (startIndex/endIndex).
17
+ for (const tokenId of encoded) {
18
+ // decoding a single token is the only way to get its exact text representation
19
+ // to map back to the source string indices.
20
+ const text = this.enc.decode([tokenId]);
21
+ const length = text.length;
22
+
23
+ tokens.push({
24
+ text,
25
+ type: 'bpe',
26
+ startIndex: currentIndex,
27
+ endIndex: currentIndex + length,
28
+ // startPosition is not calculated for Tiktoken strategy as it's computationally expensive
29
+ // and not strictly required for the patching algorithm which relies on text matching.
30
+ });
31
+ currentIndex += length;
32
+ }
33
+ return tokens;
34
+ }
35
+ }
@@ -0,0 +1,37 @@
1
+ import { Token } from '../types';
2
+ import { TokenizerStrategy } from '../tokenizer.interface';
3
+ import type { Tree, Node, Parser } from 'web-tree-sitter';
4
+
5
+ export class TreeSitterTokenizer implements TokenizerStrategy {
6
+ constructor(private parser: Parser) {}
7
+
8
+ tokenize(content: string): Token[] {
9
+ const tree = this.parser.parse(content);
10
+ return this.collectTokens(tree, content);
11
+ }
12
+
13
+ private collectTokens(tree: Tree, code: string): Token[] {
14
+ const tokens: Token[] = [];
15
+
16
+ function visit(node: Node) {
17
+ if (node.childCount === 0) {
18
+ tokens.push({
19
+ text: code.slice(node.startIndex, node.endIndex),
20
+ type: node.type,
21
+ startIndex: node.startIndex,
22
+ endIndex: node.endIndex,
23
+ startPosition: node.startPosition,
24
+ });
25
+ return;
26
+ }
27
+ // By iterating over all children (not just named), we include punctuation
28
+ for (let i = 0; i < node.childCount; i++) {
29
+ const child = node.child(i);
30
+ if (child) visit(child);
31
+ }
32
+ }
33
+
34
+ visit(tree.rootNode);
35
+ return tokens;
36
+ }
37
+ }
@@ -0,0 +1,5 @@
1
+ import { Token } from './types';
2
+
3
+ export interface TokenizerStrategy {
4
+ tokenize(content: string): Token[];
5
+ }
@@ -1,33 +1,15 @@
1
- import type { Tree, Node } from 'web-tree-sitter';
2
1
  import type { Token } from './types';
3
2
 
4
- export function collectTokens(tree: Tree, code: string): Token[] {
5
- const tokens: Token[] = [];
6
-
7
- function visit(node: Node) {
8
- if (node.childCount === 0) {
9
- tokens.push({
10
- text: code.slice(node.startIndex, node.endIndex),
11
- type: node.type,
12
- startIndex: node.startIndex,
13
- endIndex: node.endIndex,
14
- startPosition: node.startPosition,
15
- });
16
- return;
17
- }
18
- // By iterating over all children (not just named), we include punctuation
19
- // which is critical for anchor-based matching.
20
- for (let i = 0; i < node.childCount; i++) {
21
- const child = node.child(i);
22
- if (child) visit(child);
23
- }
24
- }
25
-
26
- visit(tree.rootNode);
27
- return tokens;
28
- }
29
-
30
3
  export function tokensEqual(a: Token, b: Token): boolean {
4
+ // If using Tiktoken (BPE), we relax the comparison to ignore whitespace differences
5
+ // and newlines by trimming and stripping them. This helps when the patch might
6
+ // have slightly different indentation or spacing than the source.
7
+ if (a.type === 'bpe' || b.type === 'bpe') {
8
+ return (
9
+ a.text.replace(/\r?\n/g, '').trim() ===
10
+ b.text.replace(/\r?\n/g, '').trim()
11
+ );
12
+ }
31
13
  return a.text === b.text;
32
14
  }
33
15
 
@@ -47,4 +29,4 @@ export function findAllSequences(haystack: Token[], needle: Token[]): number[] {
47
29
  indices.push(i);
48
30
  }
49
31
  return indices;
50
- }
32
+ }
@@ -2,8 +2,8 @@ import type { Point } from 'web-tree-sitter';
2
2
 
3
3
  export interface Token {
4
4
  text: string;
5
- type: string;
6
- startIndex: number; // byte offsets (tree-sitter)
5
+ type?: string;
6
+ startIndex: number; // byte offsets
7
7
  endIndex: number;
8
- startPosition: Point;
9
- }
8
+ startPosition?: Point;
9
+ }