repoburg 1.3.12 → 1.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/backend/dist/packages/tokenpatch/index.d.ts +5 -1
- package/backend/dist/packages/tokenpatch/index.js +39 -22
- package/backend/dist/packages/tokenpatch/index.js.map +1 -1
- package/backend/dist/packages/tokenpatch/patcher.js +57 -16
- package/backend/dist/packages/tokenpatch/patcher.js.map +1 -1
- package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.d.ts +6 -0
- package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js +28 -0
- package/backend/dist/packages/tokenpatch/strategies/tiktoken-tokenizer.js.map +1 -0
- package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.d.ts +9 -0
- package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js +36 -0
- package/backend/dist/packages/tokenpatch/strategies/tree-sitter-tokenizer.js.map +1 -0
- package/backend/dist/packages/tokenpatch/tokenizer.interface.d.ts +4 -0
- package/backend/dist/packages/tokenpatch/tokenizer.interface.js +3 -0
- package/backend/dist/packages/tokenpatch/tokenizer.interface.js.map +1 -0
- package/backend/dist/packages/tokenpatch/tokens.d.ts +0 -2
- package/backend/dist/packages/tokenpatch/tokens.js +4 -23
- package/backend/dist/packages/tokenpatch/tokens.js.map +1 -1
- package/backend/dist/packages/tokenpatch/types.d.ts +2 -2
- package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js +125 -47
- package/backend/dist/src/llm-orchestration/action-handlers/patch.handler.js.map +1 -1
- package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.d.ts +1 -1
- package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js +44 -55
- package/backend/dist/src/seeding/data/system-prompts/experimental_eta_master-agent.js.map +1 -1
- package/backend/dist/tsconfig.build.tsbuildinfo +1 -1
- package/backend/packages/tokenpatch/index.spec.ts +44 -30
- package/backend/packages/tokenpatch/index.ts +54 -32
- package/backend/packages/tokenpatch/patcher.ts +107 -26
- package/backend/packages/tokenpatch/strategies/tiktoken-tokenizer.ts +35 -0
- package/backend/packages/tokenpatch/strategies/tree-sitter-tokenizer.ts +37 -0
- package/backend/packages/tokenpatch/tokenizer.interface.ts +5 -0
- package/backend/packages/tokenpatch/tokens.ts +10 -28
- package/backend/packages/tokenpatch/types.ts +4 -4
- package/package.json +2 -1
|
@@ -1 +1,5 @@
|
|
|
1
|
-
export
|
|
1
|
+
export interface ApplyPatchOptions {
|
|
2
|
+
grammarPath?: string;
|
|
3
|
+
useTiktoken?: boolean;
|
|
4
|
+
}
|
|
5
|
+
export declare function applySnippetPatch(sourceCode: string, patchCode: string, optionsOrGrammarPath: string | ApplyPatchOptions): Promise<string>;
|
|
@@ -2,16 +2,30 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.applySnippetPatch = applySnippetPatch;
|
|
4
4
|
const parser_1 = require("./parser");
|
|
5
|
-
const tokens_1 = require("./tokens");
|
|
6
5
|
const patcher_1 = require("./patcher");
|
|
7
6
|
const parsing_constants_1 = require("../../src/llm-orchestration/parser/parsing.constants");
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
const tree_sitter_tokenizer_1 = require("./strategies/tree-sitter-tokenizer");
|
|
8
|
+
const tiktoken_tokenizer_1 = require("./strategies/tiktoken-tokenizer");
|
|
9
|
+
async function applySnippetPatch(sourceCode, patchCode, optionsOrGrammarPath) {
|
|
10
|
+
let strategy;
|
|
11
|
+
let options;
|
|
12
|
+
if (typeof optionsOrGrammarPath === 'string') {
|
|
13
|
+
options = { grammarPath: optionsOrGrammarPath };
|
|
13
14
|
}
|
|
14
|
-
|
|
15
|
+
else {
|
|
16
|
+
options = optionsOrGrammarPath;
|
|
17
|
+
}
|
|
18
|
+
if (options.useTiktoken) {
|
|
19
|
+
strategy = new tiktoken_tokenizer_1.TiktokenTokenizer();
|
|
20
|
+
}
|
|
21
|
+
else if (options.grammarPath) {
|
|
22
|
+
const parser = await (0, parser_1.initializeParser)(options.grammarPath);
|
|
23
|
+
strategy = new tree_sitter_tokenizer_1.TreeSitterTokenizer(parser);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
strategy = new tiktoken_tokenizer_1.TiktokenTokenizer();
|
|
27
|
+
}
|
|
28
|
+
const sourceTokens = strategy.tokenize(sourceCode);
|
|
15
29
|
let patchResult;
|
|
16
30
|
let processedPatchCode = patchCode;
|
|
17
31
|
const beginOfFileRegex = new RegExp(`//\\s*${parsing_constants_1.SPECIAL_PATCH_BEGIN_FILE_MARKER}.*`);
|
|
@@ -20,30 +34,33 @@ async function applySnippetPatch(sourceCode, patchCode, grammarPath) {
|
|
|
20
34
|
const hasEndOfFile = endOfFileRegex.test(patchCode);
|
|
21
35
|
if (hasBeginOfFile) {
|
|
22
36
|
processedPatchCode = patchCode.replace(beginOfFileRegex, '');
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
}
|
|
27
|
-
const patchTokens = (0, tokens_1.collectTokens)(patchTree, processedPatchCode).filter((t) => t.text !== '');
|
|
37
|
+
const patchTokens = strategy
|
|
38
|
+
.tokenize(processedPatchCode)
|
|
39
|
+
.filter((t) => t.text !== '' && t.text !== '\n');
|
|
28
40
|
patchResult = (0, patcher_1.handleBeginOfFilePatch)(sourceTokens, patchTokens);
|
|
29
41
|
}
|
|
30
42
|
else if (hasEndOfFile) {
|
|
31
43
|
processedPatchCode = patchCode.replace(endOfFileRegex, '');
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
}
|
|
36
|
-
const patchTokens = (0, tokens_1.collectTokens)(patchTree, processedPatchCode).filter((t) => t.text !== '');
|
|
44
|
+
const patchTokens = strategy
|
|
45
|
+
.tokenize(processedPatchCode)
|
|
46
|
+
.filter((t) => t.text !== '' && t.text !== '\n');
|
|
37
47
|
patchResult = (0, patcher_1.handleEndOfFilePatch)(sourceTokens, patchTokens, sourceCode);
|
|
38
48
|
}
|
|
39
49
|
else {
|
|
40
50
|
processedPatchCode = patchCode.trim();
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
51
|
+
const patchTokens = strategy
|
|
52
|
+
.tokenize(processedPatchCode)
|
|
53
|
+
.filter((t) => t.text !== '');
|
|
54
|
+
const isTiktoken = options.useTiktoken || !options.grammarPath;
|
|
55
|
+
if (isTiktoken &&
|
|
56
|
+
patchTokens.length >= 6 &&
|
|
57
|
+
patchTokens.length < sourceTokens.length) {
|
|
58
|
+
const innerTokens = patchTokens.slice(2, -2);
|
|
59
|
+
patchResult = (0, patcher_1.handleStandardPatch)(sourceTokens, innerTokens);
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
patchResult = (0, patcher_1.handleStandardPatch)(sourceTokens, patchTokens);
|
|
44
63
|
}
|
|
45
|
-
const patchTokens = (0, tokens_1.collectTokens)(patchTree, processedPatchCode).filter((t) => t.text !== '');
|
|
46
|
-
patchResult = (0, patcher_1.handleStandardPatch)(sourceTokens, patchTokens);
|
|
47
64
|
}
|
|
48
65
|
const prefix = sourceCode.slice(0, patchResult.replaceStart);
|
|
49
66
|
const suffix = sourceCode.slice(patchResult.replaceEnd);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../packages/tokenpatch/index.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../packages/tokenpatch/index.ts"],"names":[],"mappings":";;AAmBA,8CA0FC;AA7GD,qCAA4C;AAC5C,uCAImB;AACnB,4FAG8D;AAE9D,8EAAyE;AACzE,wEAAoE;AAO7D,KAAK,UAAU,iBAAiB,CACrC,UAAkB,EAClB,SAAiB,EACjB,oBAAgD;IAEhD,IAAI,QAA2B,CAAC;IAChC,IAAI,OAA0B,CAAC;IAG/B,IAAI,OAAO,oBAAoB,KAAK,QAAQ,EAAE,CAAC;QAC7C,OAAO,GAAG,EAAE,WAAW,EAAE,oBAAoB,EAAE,CAAC;IAClD,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,oBAAoB,CAAC;IACjC,CAAC;IAED,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QACxB,QAAQ,GAAG,IAAI,sCAAiB,EAAE,CAAC;IACrC,CAAC;SAAM,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;QAC/B,MAAM,MAAM,GAAG,MAAM,IAAA,yBAAgB,EAAC,OAAO,CAAC,WAAW,CAAC,CAAC;QAC3D,QAAQ,GAAG,IAAI,2CAAmB,CAAC,MAAM,CAAC,CAAC;IAC7C,CAAC;SAAM,CAAC;QAEN,QAAQ,GAAG,IAAI,sCAAiB,EAAE,CAAC;IACrC,CAAC;IAED,MAAM,YAAY,GAAG,QAAQ,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;IAEnD,IAAI,WAKH,CAAC;IACF,IAAI,kBAAkB,GAAG,SAAS,CAAC;IAEnC,MAAM,gBAAgB,GAAG,IAAI,MAAM,CACjC,SAAS,mDAA+B,IAAI,CAC7C,CAAC;IACF,MAAM,cAAc,GAAG,IAAI,MAAM,CAAC,SAAS,iDAA6B,IAAI,CAAC,CAAC;IAE9E,MAAM,cAAc,GAAG,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACxD,MAAM,YAAY,GAAG,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAEpD,IAAI,cAAc,EAAE,CAAC;QACnB,kBAAkB,GAAG,SAAS,CAAC,OAAO,CAAC,gBAAgB,EAAE,EAAE,CAAC,CAAC;QAG7D,MAAM,WAAW,GAAG,QAAQ;aACzB,QAAQ,CAAC,kBAAkB,CAAC;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QACnD,WAAW,GAAG,IAAA,gCAAsB,EAAC,YAAY,EAAE,WAAW,CAAC,CAAC;IAClE,CAAC;SAAM,IAAI,YAAY,EAAE,CAAC;QACxB,kBAAkB,GAAG,SAAS,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;QAC3D,MAAM,WAAW,GAAG,QAAQ;aACzB,QAAQ,CAAC,kBAAkB,CAAC;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,IAAI,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;QACnD,WAAW,GAAG,IAAA,8BAAoB,EAAC,YAAY,EAAE,WAAW,EAAE,UAAU,CAAC,CAAC;IAC5E,CAAC;SAAM,CAAC;QACN,kBAAkB,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;QACtC,MAAM,WAAW,GAAG,QAAQ;aACzB,QAAQ,CAAC,kBAAkB,CAAC;aAC5B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,EAAE,CAAC,CAAC;QAIhC,MAAM,UAAU,GAAG,OAAO,CAAC,WAAW,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC;QAC/D,IACE,UAAU;YACV,WAAW,CAAC,MAAM,IAAI,CAAC;YACvB,WAAW,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,EACxC,CAAC;YACD,MAAM,WAAW,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAC7C,WAAW,GAAG,IAAA,6BAAmB,EAAC,YAAY,EAAE,WAAW,CAAC,CAAC;QAC/D,CAAC;aAAM,CAAC;YACN,WAAW,GAAG,IAAA,6BAAmB,EAAC,YAAY,EAAE,WAAW,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAMD,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC,YAAY,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,UAAU,CAAC,KAAK,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;IACxD,MAAM,iBAAiB,GAAG,kBAAkB,CAAC,KAAK,CAChD,WAAW,CAAC,gBAAgB,EAC5B,WAAW,CAAC,cAAc,CAC3B,CAAC;IAEF,OAAO,MAAM,GAAG,iBAAiB,GAAG,MAAM,CAAC;AAC7C,CAAC"}
|
|
@@ -5,6 +5,16 @@ exports.handleEndOfFilePatch = handleEndOfFilePatch;
|
|
|
5
5
|
exports.handleStandardPatch = handleStandardPatch;
|
|
6
6
|
const tokens_1 = require("./tokens");
|
|
7
7
|
const parsing_constants_1 = require("../../src/llm-orchestration/parser/parsing.constants");
|
|
8
|
+
const updateLastError = (currentError, newError) => {
|
|
9
|
+
if (!currentError)
|
|
10
|
+
return newError;
|
|
11
|
+
const currentIsAmbiguous = currentError.message.includes('Ambiguous');
|
|
12
|
+
const newIsAmbiguous = newError.message.includes('Ambiguous');
|
|
13
|
+
if (currentIsAmbiguous && !newIsAmbiguous) {
|
|
14
|
+
return currentError;
|
|
15
|
+
}
|
|
16
|
+
return newError;
|
|
17
|
+
};
|
|
8
18
|
function _findBeginOfFilePatchLocation(sourceTokens, patchTokens) {
|
|
9
19
|
let replaceStart = null;
|
|
10
20
|
let replaceEnd = null;
|
|
@@ -21,7 +31,7 @@ function _findBeginOfFilePatchLocation(sourceTokens, patchTokens) {
|
|
|
21
31
|
if (indices.length > 1) {
|
|
22
32
|
const formattedAnchor = (0, tokens_1.formatAnchor)(suffixAnchor);
|
|
23
33
|
const locations = indices
|
|
24
|
-
.map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
|
|
34
|
+
.map((i) => `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`)
|
|
25
35
|
.join(', ');
|
|
26
36
|
lastError = `Ambiguous suffix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
|
|
27
37
|
}
|
|
@@ -47,7 +57,7 @@ function handleBeginOfFilePatch(sourceTokens, originalPatchTokens) {
|
|
|
47
57
|
return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
|
|
48
58
|
}
|
|
49
59
|
catch (e) {
|
|
50
|
-
lastError = e;
|
|
60
|
+
lastError = updateLastError(lastError, e);
|
|
51
61
|
patchAttempt = patchAttempt.slice(1);
|
|
52
62
|
}
|
|
53
63
|
}
|
|
@@ -68,7 +78,7 @@ function _findEndOfFilePatchLocation(sourceTokens, patchTokens, sourceCode) {
|
|
|
68
78
|
if (indices.length > 1) {
|
|
69
79
|
const formattedAnchor = (0, tokens_1.formatAnchor)(prefixAnchor);
|
|
70
80
|
const locations = indices
|
|
71
|
-
.map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
|
|
81
|
+
.map((i) => `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`)
|
|
72
82
|
.join(', ');
|
|
73
83
|
lastError = `Ambiguous prefix anchor. The sequence "${formattedAnchor}" was found at ${indices.length} locations: ${locations}.`;
|
|
74
84
|
}
|
|
@@ -94,13 +104,13 @@ function handleEndOfFilePatch(sourceTokens, originalPatchTokens, sourceCode) {
|
|
|
94
104
|
return { replaceStart, replaceEnd, patchInsertStart, patchInsertEnd };
|
|
95
105
|
}
|
|
96
106
|
catch (e) {
|
|
97
|
-
lastError = e;
|
|
107
|
+
lastError = updateLastError(lastError, e);
|
|
98
108
|
patchAttempt = patchAttempt.slice(0, -1);
|
|
99
109
|
}
|
|
100
110
|
}
|
|
101
111
|
throw new Error(`Failed to apply ${parsing_constants_1.SPECIAL_PATCH_END_FILE_MARKER} patch. Could not find a unique anchor, even after trimming tokens. Last known error: ${lastError?.message}`);
|
|
102
112
|
}
|
|
103
|
-
function
|
|
113
|
+
function _findPrefixLocation(sourceTokens, patchTokens) {
|
|
104
114
|
let prefixAnchor = null;
|
|
105
115
|
let prefixIndex = null;
|
|
106
116
|
let bestPrefixError = null;
|
|
@@ -116,7 +126,7 @@ function _findStandardPatchLocation(sourceTokens, patchTokens) {
|
|
|
116
126
|
if (prefixIndices.length > 1) {
|
|
117
127
|
const formatted = (0, tokens_1.formatAnchor)(currentPrefix);
|
|
118
128
|
const locations = prefixIndices
|
|
119
|
-
.map((i) => `line ${sourceTokens[i].startPosition.row + 1}`)
|
|
129
|
+
.map((i) => `line ${sourceTokens[i].startPosition?.row ? sourceTokens[i].startPosition.row + 1 : '?'}`)
|
|
120
130
|
.join(', ');
|
|
121
131
|
bestPrefixError = `Ambiguous prefix anchor. The sequence "${formatted}" was found at ${prefixIndices.length} locations: ${locations}.`;
|
|
122
132
|
}
|
|
@@ -131,6 +141,10 @@ function _findStandardPatchLocation(sourceTokens, patchTokens) {
|
|
|
131
141
|
if (!prefixAnchor || prefixIndex === null) {
|
|
132
142
|
throw new Error(bestPrefixError || 'Could not find a unique prefix anchor.');
|
|
133
143
|
}
|
|
144
|
+
const replaceStart = sourceTokens[prefixIndex].startIndex;
|
|
145
|
+
return { prefixAnchor, prefixIndex, replaceStart };
|
|
146
|
+
}
|
|
147
|
+
function _findSuffixLocation(sourceTokens, patchTokens, prefixAnchor, prefixIndex) {
|
|
134
148
|
let suffixAnchor = null;
|
|
135
149
|
let suffixIndex = null;
|
|
136
150
|
let bestSuffixError = null;
|
|
@@ -148,7 +162,9 @@ function _findStandardPatchLocation(sourceTokens, patchTokens) {
|
|
|
148
162
|
if (suffixIndices.length > 1) {
|
|
149
163
|
const formatted = (0, tokens_1.formatAnchor)(currentSuffix);
|
|
150
164
|
const locations = suffixIndices
|
|
151
|
-
.map((i) => `line ${sourceTokens[searchStartIndex + i].startPosition
|
|
165
|
+
.map((i) => `line ${sourceTokens[searchStartIndex + i].startPosition?.row
|
|
166
|
+
? sourceTokens[searchStartIndex + i].startPosition.row + 1
|
|
167
|
+
: '?'}`)
|
|
152
168
|
.join(', ');
|
|
153
169
|
bestSuffixError = `Ambiguous suffix anchor. The sequence "${formatted}" was found ${suffixIndices.length} times after the prefix: ${locations}.`;
|
|
154
170
|
}
|
|
@@ -157,34 +173,59 @@ function _findStandardPatchLocation(sourceTokens, patchTokens) {
|
|
|
157
173
|
if (bestSuffixError) {
|
|
158
174
|
throw new Error(bestSuffixError);
|
|
159
175
|
}
|
|
160
|
-
const prefixLocation = `line ${sourceTokens[prefixIndex].startPosition
|
|
176
|
+
const prefixLocation = `line ${sourceTokens[prefixIndex].startPosition?.row
|
|
177
|
+
? sourceTokens[prefixIndex].startPosition.row + 1
|
|
178
|
+
: '?'}`;
|
|
161
179
|
const formattedPrefix = (0, tokens_1.formatAnchor)(prefixAnchor);
|
|
162
180
|
const smallestSuffix = (0, tokens_1.formatAnchor)(patchTokens.slice(patchTokens.length - 1));
|
|
163
181
|
throw new Error(`Could not find a unique suffix anchor after the prefix anchor "${formattedPrefix}" (found at ${prefixLocation}). ` +
|
|
164
182
|
`The smallest suffix searched for ("${smallestSuffix}") was not found after it.`);
|
|
165
183
|
}
|
|
166
|
-
const replaceStart = sourceTokens[prefixIndex].startIndex;
|
|
167
184
|
const replaceEnd = sourceTokens[suffixIndex + suffixAnchor.length - 1].endIndex;
|
|
168
|
-
return {
|
|
185
|
+
return { replaceEnd };
|
|
169
186
|
}
|
|
170
187
|
function handleStandardPatch(sourceTokens, originalPatchTokens) {
|
|
171
188
|
if (originalPatchTokens.length < 2) {
|
|
172
189
|
throw new Error('Patch must contain at least two tokens to form a prefix and a suffix.');
|
|
173
190
|
}
|
|
174
|
-
let
|
|
191
|
+
let startTrim = 0;
|
|
192
|
+
const endTrim = 0;
|
|
175
193
|
let lastError = null;
|
|
194
|
+
let prefixInfo = null;
|
|
195
|
+
let patchAttempt = [...originalPatchTokens];
|
|
176
196
|
while (patchAttempt.length >= 2) {
|
|
177
197
|
try {
|
|
178
|
-
|
|
198
|
+
prefixInfo = _findPrefixLocation(sourceTokens, patchAttempt);
|
|
199
|
+
startTrim = originalPatchTokens.length - patchAttempt.length;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
catch (e) {
|
|
203
|
+
lastError = updateLastError(lastError, e);
|
|
204
|
+
patchAttempt = patchAttempt.slice(1);
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
if (!prefixInfo) {
|
|
208
|
+
throw new Error(`Failed to apply patch. Could not find a unique prefix anchor, even after trimming tokens. Last known error: ${lastError?.message}`);
|
|
209
|
+
}
|
|
210
|
+
patchAttempt = originalPatchTokens.slice(startTrim);
|
|
211
|
+
lastError = null;
|
|
212
|
+
while (patchAttempt.length >= prefixInfo.prefixAnchor.length + 1) {
|
|
213
|
+
try {
|
|
214
|
+
const { replaceEnd } = _findSuffixLocation(sourceTokens, patchAttempt, prefixInfo.prefixAnchor, prefixInfo.prefixIndex);
|
|
179
215
|
const patchInsertStart = patchAttempt[0].startIndex;
|
|
180
216
|
const patchInsertEnd = patchAttempt[patchAttempt.length - 1].endIndex;
|
|
181
|
-
return {
|
|
217
|
+
return {
|
|
218
|
+
replaceStart: prefixInfo.replaceStart,
|
|
219
|
+
replaceEnd,
|
|
220
|
+
patchInsertStart,
|
|
221
|
+
patchInsertEnd,
|
|
222
|
+
};
|
|
182
223
|
}
|
|
183
224
|
catch (e) {
|
|
184
|
-
lastError = e;
|
|
185
|
-
patchAttempt = patchAttempt.slice(
|
|
225
|
+
lastError = updateLastError(lastError, e);
|
|
226
|
+
patchAttempt = patchAttempt.slice(0, -1);
|
|
186
227
|
}
|
|
187
228
|
}
|
|
188
|
-
throw new Error(`Failed to apply patch. Could not find a unique anchor
|
|
229
|
+
throw new Error(`Failed to apply patch. Could not find a unique suffix anchor, even after trimming tokens. Last known error: ${lastError?.message}`);
|
|
189
230
|
}
|
|
190
231
|
//# sourceMappingURL=patcher.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"patcher.js","sourceRoot":"","sources":["../../../packages/tokenpatch/patcher.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"patcher.js","sourceRoot":"","sources":["../../../packages/tokenpatch/patcher.ts"],"names":[],"mappings":";;AAiFA,wDAgCC;AA8CD,oDAkCC;AA6HD,kDAsEC;AAnYD,qCAA0D;AAC1D,4FAG8D;AAe9D,MAAM,eAAe,GAAG,CACtB,YAA0B,EAC1B,QAAe,EACR,EAAE;IACT,IAAI,CAAC,YAAY;QAAE,OAAO,QAAQ,CAAC;IAGnC,MAAM,kBAAkB,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IACtE,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAE9D,IAAI,kBAAkB,IAAI,CAAC,cAAc,EAAE,CAAC;QAC1C,OAAO,YAAY,CAAC;IACtB,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC;AAGF,SAAS,6BAA6B,CACpC,YAAqB,EACrB,WAAoB;IAEpB,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,IAAI,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACxE,MAAM,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC;QACxE,MAAM,OAAO,GAAG,IAAA,yBAAgB,EAAC,YAAY,EAAE,YAAY,CAAC,CAAC;QAE7D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,YAAY,GAAG,CAAC,CAAC;YACjB,MAAM,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC/B,UAAU,GAAG,YAAY,CAAC,WAAW,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;YACjE,MAAM;QACR,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,eAAe,GAAG,IAAA,qBAAY,EAAC,YAAY,CAAC,CAAC;YACnD,MAAM,SAAS,GAAG,OAAO;iBACtB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,QAAQ,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAC7F;iBACA,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,SAAS,GAAG,0CAA0C,eAAe,kBAAkB,OAAO,CAAC,MAAM,eAAe,SAAS,GAAG,CAAC;QACnI,CAAC;IACH,CAAC;IAED,IAAI,YAAY,KAAK,IAAI,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACjE,MAAM,IAAI,KAAK,CACb,SAAS;YACP,0CAA0C,IAAA,qBAAY,EACpD,cAAc,CACf,yBAAyB,CAC7B,CAAC;IACJ,CAAC;IAED,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC;AACtC,CAAC;AAED,SAAgB,sBAAsB,CACpC,YAAqB,EACrB,mBAA4B;IAE5B,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,KAAK,CACb,iCAAiC,mDAA+B,UAAU,CAC3E,CAAC;IACJ,CAAC;IAED,IAAI,YAAY,GAAG,CAAC,GAAG,mBAAmB,CAAC,CAAC;IAC5C,IAAI,SAAS,GAAiB,IAAI,CAAC;IAGnC,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,6BAA6B,CAChE,YAAY,EACZ,YAAY,CACb,CAAC;YACF,MAAM,gBAAgB,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;YACpD,MAAM,cAAc,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;YACtE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,cAAc,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,GAAG,eAAe,CAAC,SAAS,EAAE,CAAU,CAAC,CAAC;YACnD,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mBAAmB,mDAA+B,yFAAyF,SAAS,EAAE,OAAO,EAAE,CAChK,CAAC;AACJ,CAAC;AAGD,SAAS,2BAA2B,CAClC,YAAqB,EACrB,WAAoB,EACpB,UAAkB;IAElB,IAAI,YAAY,GAAkB,IAAI,CAAC;IACvC,IAAI,UAAU,GAAkB,IAAI,CAAC;IACrC,IAAI,SAAS,GAAG,EAAE,CAAC;IAEnB,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,IAAI,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACxE,MAAM,YAAY,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QACtD,MAAM,OAAO,GAAG,IAAA,yBAAgB,EAAC,YAAY,EAAE,YAAY,CAAC,CAAC;QAE7D,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACzB,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;YACnD,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC;YAC/B,MAAM;QACR,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,MAAM,eAAe,GAAG,IAAA,qBAAY,EAAC,YAAY,CAAC,CAAC;YACnD,MAAM,SAAS,GAAG,OAAO;iBACtB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,QAAQ,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAC7F;iBACA,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,SAAS,GAAG,0CAA0C,eAAe,kBAAkB,OAAO,CAAC,MAAM,eAAe,SAAS,GAAG,CAAC;QACnI,CAAC;IACH,CAAC;IAED,IAAI,YAAY,KAAK,IAAI,IAAI,UAAU,KAAK,IAAI,EAAE,CAAC;QACjD,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC/C,MAAM,IAAI,KAAK,CACb,SAAS;YACP,0CAA0C,IAAA,qBAAY,EACpD,cAAc,CACf,yBAAyB,CAC7B,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC;AACtC,CAAC;AAED,SAAgB,oBAAoB,CAClC,YAAqB,EACrB,mBAA4B,EAC5B,UAAkB;IAElB,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,KAAK,CACb,iCAAiC,iDAA6B,UAAU,CACzE,CAAC;IACJ,CAAC;IAED,IAAI,YAAY,GAAG,CAAC,GAAG,mBAAmB,CAAC,CAAC;IAC5C,IAAI,SAAS,GAAiB,IAAI,CAAC;IAGnC,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,2BAA2B,CAC9D,YAAY,EACZ,YAAY,EACZ,UAAU,CACX,CAAC;YACF,MAAM,gBAAgB,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;YACpD,MAAM,cAAc,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;YACtE,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,gBAAgB,EAAE,cAAc,EAAE,CAAC;QACxE,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,GAAG,eAAe,CAAC,SAAS,EAAE,CAAU,CAAC,CAAC;YACnD,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,mBAAmB,iDAA6B,yFAAyF,SAAS,EAAE,OAAO,EAAE,CAC9J,CAAC;AACJ,CAAC;AAQD,SAAS,mBAAmB,CAC1B,YAAqB,EACrB,WAAoB;IAEpB,IAAI,YAAY,GAAmB,IAAI,CAAC;IACxC,IAAI,WAAW,GAAkB,IAAI,CAAC;IACtC,IAAI,eAAe,GAAkB,IAAI,CAAC;IAE1C,KAAK,IAAI,UAAU,GAAG,CAAC,EAAE,UAAU,GAAG,WAAW,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC;QACvE,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;QACvD,MAAM,aAAa,GAAG,IAAA,yBAAgB,EAAC,YAAY,EAAE,aAAa,CAAC,CAAC;QAEpE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,YAAY,GAAG,aAAa,CAAC;YAC7B,WAAW,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;YAC/B,eAAe,GAAG,IAAI,CAAC;YACvB,MAAM;QACR,CAAC;QACD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,IAAA,qBAAY,EAAC,aAAa,CAAC,CAAC;YAC9C,MAAM,SAAS,GAAG,aAAa;iBAC5B,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,QAAQ,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,CAC7F;iBACA,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,eAAe,GAAG,0CAA0C,SAAS,kBAAkB,aAAa,CAAC,MAAM,eAAe,SAAS,GAAG,CAAC;QACzI,CAAC;QACD,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,MAAM,SAAS,GAAG,IAAA,qBAAY,EAAC,aAAa,CAAC,CAAC;gBAC9C,eAAe,GAAG,6BAA6B,SAAS,GAAG,CAAC;YAC9D,CAAC;YACD,MAAM;QACR,CAAC;IACH,CAAC;IAED,IAAI,CAAC,YAAY,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QAC1C,MAAM,IAAI,KAAK,CAAC,eAAe,IAAI,wCAAwC,CAAC,CAAC;IAC/E,CAAC;IAED,MAAM,YAAY,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC,UAAU,CAAC;IAE1D,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAAE,CAAC;AACrD,CAAC;AAMD,SAAS,mBAAmB,CAC1B,YAAqB,EACrB,WAAoB,EACpB,YAAqB,EACrB,WAAmB;IAEnB,IAAI,YAAY,GAAmB,IAAI,CAAC;IACxC,IAAI,WAAW,GAAkB,IAAI,CAAC;IACtC,IAAI,eAAe,GAAkB,IAAI,CAAC;IAC1C,MAAM,gBAAgB,GAAG,WAAW,GAAG,YAAY,CAAC,MAAM,CAAC;IAC3D,MAAM,iBAAiB,GAAG,YAAY,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;IAE/D,KACE,IAAI,UAAU,GAAG,CAAC,EAClB,UAAU,IAAI,WAAW,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,EACtD,UAAU,EAAE,EACZ,CAAC;QACD,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC;QACzE,MAAM,aAAa,GAAG,IAAA,yBAAgB,EAAC,iBAAiB,EAAE,aAAa,CAAC,CAAC;QAEzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,YAAY,GAAG,aAAa,CAAC;YAC7B,WAAW,GAAG,gBAAgB,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;YAClD,eAAe,GAAG,IAAI,CAAC;YACvB,MAAM;QACR,CAAC;QACD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,SAAS,GAAG,IAAA,qBAAY,EAAC,aAAa,CAAC,CAAC;YAC9C,MAAM,SAAS,GAAG,aAAa;iBAC5B,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,QACE,YAAY,CAAC,gBAAgB,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,GAAG;gBACnD,CAAC,CAAC,YAAY,CAAC,gBAAgB,GAAG,CAAC,CAAC,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;gBAC1D,CAAC,CAAC,GACN,EAAE,CACL;iBACA,IAAI,CAAC,IAAI,CAAC,CAAC;YACd,eAAe,GAAG,0CAA0C,SAAS,eAAe,aAAa,CAAC,MAAM,4BAA4B,SAAS,GAAG,CAAC;QACnJ,CAAC;IACH,CAAC;IACD,IAAI,CAAC,YAAY,IAAI,WAAW,KAAK,IAAI,EAAE,CAAC;QAC1C,IAAI,eAAe,EAAE,CAAC;YACpB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAC;QACnC,CAAC;QACD,MAAM,cAAc,GAAG,QACrB,YAAY,CAAC,WAAW,CAAC,CAAC,aAAa,EAAE,GAAG;YAC1C,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC,aAAa,CAAC,GAAG,GAAG,CAAC;YACjD,CAAC,CAAC,GACN,EAAE,CAAC;QACH,MAAM,eAAe,GAAG,IAAA,qBAAY,EAAC,YAAY,CAAC,CAAC;QACnD,MAAM,cAAc,GAAG,IAAA,qBAAY,EACjC,WAAW,CAAC,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAC1C,CAAC;QAEF,MAAM,IAAI,KAAK,CACb,kEAAkE,eAAe,eAAe,cAAc,KAAK;YACjH,sCAAsC,cAAc,4BAA4B,CACnF,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GACd,YAAY,CAAC,WAAW,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;IAE/D,OAAO,EAAE,UAAU,EAAE,CAAC;AACxB,CAAC;AAED,SAAgB,mBAAmB,CACjC,YAAqB,EACrB,mBAA4B;IAE5B,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CACb,uEAAuE,CACxE,CAAC;IACJ,CAAC;IAED,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,MAAM,OAAO,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAiB,IAAI,CAAC;IAEnC,IAAI,UAAU,GAAwB,IAAI,CAAC;IAG3C,IAAI,YAAY,GAAG,CAAC,GAAG,mBAAmB,CAAC,CAAC;IAC5C,OAAO,YAAY,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QAChC,IAAI,CAAC;YACH,UAAU,GAAG,mBAAmB,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;YAE7D,SAAS,GAAG,mBAAmB,CAAC,MAAM,GAAG,YAAY,CAAC,MAAM,CAAC;YAC7D,MAAM;QACR,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,GAAG,eAAe,CAAC,SAAS,EAAE,CAAU,CAAC,CAAC;YAEnD,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CACb,+GAA+G,SAAS,EAAE,OAAO,EAAE,CACpI,CAAC;IACJ,CAAC;IAID,YAAY,GAAG,mBAAmB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACpD,SAAS,GAAG,IAAI,CAAC;IAEjB,OAAO,YAAY,CAAC,MAAM,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAEjE,IAAI,CAAC;YACH,MAAM,EAAE,UAAU,EAAE,GAAG,mBAAmB,CACxC,YAAY,EACZ,YAAY,EACZ,UAAU,CAAC,YAAY,EACvB,UAAU,CAAC,WAAW,CACvB,CAAC;YAEF,MAAM,gBAAgB,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;YACpD,MAAM,cAAc,GAAG,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC;YACtE,OAAO;gBACL,YAAY,EAAE,UAAU,CAAC,YAAY;gBACrC,UAAU;gBACV,gBAAgB;gBAChB,cAAc;aACf,CAAC;QACJ,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,GAAG,eAAe,CAAC,SAAS,EAAE,CAAU,CAAC,CAAC;YAEnD,YAAY,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAC3C,CAAC;IACH,CAAC;IAED,MAAM,IAAI,KAAK,CACb,+GAA+G,SAAS,EAAE,OAAO,EAAE,CACpI,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TiktokenTokenizer = void 0;
|
|
4
|
+
const js_tiktoken_1 = require("js-tiktoken");
|
|
5
|
+
class TiktokenTokenizer {
|
|
6
|
+
constructor() {
|
|
7
|
+
this.enc = (0, js_tiktoken_1.getEncoding)('cl100k_base');
|
|
8
|
+
}
|
|
9
|
+
tokenize(content) {
|
|
10
|
+
const tokens = [];
|
|
11
|
+
const encoded = this.enc.encode(content);
|
|
12
|
+
let currentIndex = 0;
|
|
13
|
+
for (const tokenId of encoded) {
|
|
14
|
+
const text = this.enc.decode([tokenId]);
|
|
15
|
+
const length = text.length;
|
|
16
|
+
tokens.push({
|
|
17
|
+
text,
|
|
18
|
+
type: 'bpe',
|
|
19
|
+
startIndex: currentIndex,
|
|
20
|
+
endIndex: currentIndex + length,
|
|
21
|
+
});
|
|
22
|
+
currentIndex += length;
|
|
23
|
+
}
|
|
24
|
+
return tokens;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
exports.TiktokenTokenizer = TiktokenTokenizer;
|
|
28
|
+
//# sourceMappingURL=tiktoken-tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tiktoken-tokenizer.js","sourceRoot":"","sources":["../../../../packages/tokenpatch/strategies/tiktoken-tokenizer.ts"],"names":[],"mappings":";;;AAAA,6CAA0C;AAI1C,MAAa,iBAAiB;IAA9B;QAEU,QAAG,GAAG,IAAA,yBAAW,EAAC,aAAa,CAAC,CAAC;IA4B3C,CAAC;IA1BC,QAAQ,CAAC,OAAe;QACtB,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAEzC,IAAI,YAAY,GAAG,CAAC,CAAC;QAIrB,KAAK,MAAM,OAAO,IAAI,OAAO,EAAE,CAAC;YAG5B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC;YAE3B,MAAM,CAAC,IAAI,CAAC;gBACR,IAAI;gBACJ,IAAI,EAAE,KAAK;gBACX,UAAU,EAAE,YAAY;gBACxB,QAAQ,EAAE,YAAY,GAAG,MAAM;aAGlC,CAAC,CAAC;YACH,YAAY,IAAI,MAAM,CAAC;QAC3B,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AA9BD,8CA8BC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { Token } from '../types';
|
|
2
|
+
import { TokenizerStrategy } from '../tokenizer.interface';
|
|
3
|
+
import type { Parser } from 'web-tree-sitter';
|
|
4
|
+
export declare class TreeSitterTokenizer implements TokenizerStrategy {
|
|
5
|
+
private parser;
|
|
6
|
+
constructor(parser: Parser);
|
|
7
|
+
tokenize(content: string): Token[];
|
|
8
|
+
private collectTokens;
|
|
9
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TreeSitterTokenizer = void 0;
|
|
4
|
+
class TreeSitterTokenizer {
|
|
5
|
+
constructor(parser) {
|
|
6
|
+
this.parser = parser;
|
|
7
|
+
}
|
|
8
|
+
tokenize(content) {
|
|
9
|
+
const tree = this.parser.parse(content);
|
|
10
|
+
return this.collectTokens(tree, content);
|
|
11
|
+
}
|
|
12
|
+
collectTokens(tree, code) {
|
|
13
|
+
const tokens = [];
|
|
14
|
+
function visit(node) {
|
|
15
|
+
if (node.childCount === 0) {
|
|
16
|
+
tokens.push({
|
|
17
|
+
text: code.slice(node.startIndex, node.endIndex),
|
|
18
|
+
type: node.type,
|
|
19
|
+
startIndex: node.startIndex,
|
|
20
|
+
endIndex: node.endIndex,
|
|
21
|
+
startPosition: node.startPosition,
|
|
22
|
+
});
|
|
23
|
+
return;
|
|
24
|
+
}
|
|
25
|
+
for (let i = 0; i < node.childCount; i++) {
|
|
26
|
+
const child = node.child(i);
|
|
27
|
+
if (child)
|
|
28
|
+
visit(child);
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
visit(tree.rootNode);
|
|
32
|
+
return tokens;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
exports.TreeSitterTokenizer = TreeSitterTokenizer;
|
|
36
|
+
//# sourceMappingURL=tree-sitter-tokenizer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tree-sitter-tokenizer.js","sourceRoot":"","sources":["../../../../packages/tokenpatch/strategies/tree-sitter-tokenizer.ts"],"names":[],"mappings":";;;AAIA,MAAa,mBAAmB;IAC9B,YAAoB,MAAc;QAAd,WAAM,GAAN,MAAM,CAAQ;IAAG,CAAC;IAEtC,QAAQ,CAAC,OAAe;QACtB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QACxC,OAAO,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC3C,CAAC;IAEO,aAAa,CAAC,IAAU,EAAE,IAAY;QAC5C,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,SAAS,KAAK,CAAC,IAAU;YACvB,IAAI,IAAI,CAAC,UAAU,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,QAAQ,CAAC;oBAChD,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,UAAU,EAAE,IAAI,CAAC,UAAU;oBAC3B,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,aAAa,EAAE,IAAI,CAAC,aAAa;iBAClC,CAAC,CAAC;gBACH,OAAO;YACT,CAAC;YAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;gBACzC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC5B,IAAI,KAAK;oBAAE,KAAK,CAAC,KAAK,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACrB,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAhCD,kDAgCC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tokenizer.interface.js","sourceRoot":"","sources":["../../../packages/tokenpatch/tokenizer.interface.ts"],"names":[],"mappings":""}
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
import type { Tree } from 'web-tree-sitter';
|
|
2
1
|
import type { Token } from './types';
|
|
3
|
-
export declare function collectTokens(tree: Tree, code: string): Token[];
|
|
4
2
|
export declare function tokensEqual(a: Token, b: Token): boolean;
|
|
5
3
|
export declare function formatAnchor(anchor: Token[]): string;
|
|
6
4
|
export declare function findAllSequences(haystack: Token[], needle: Token[]): number[];
|
|
@@ -1,32 +1,13 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.collectTokens = collectTokens;
|
|
4
3
|
exports.tokensEqual = tokensEqual;
|
|
5
4
|
exports.formatAnchor = formatAnchor;
|
|
6
5
|
exports.findAllSequences = findAllSequences;
|
|
7
|
-
function collectTokens(tree, code) {
|
|
8
|
-
const tokens = [];
|
|
9
|
-
function visit(node) {
|
|
10
|
-
if (node.childCount === 0) {
|
|
11
|
-
tokens.push({
|
|
12
|
-
text: code.slice(node.startIndex, node.endIndex),
|
|
13
|
-
type: node.type,
|
|
14
|
-
startIndex: node.startIndex,
|
|
15
|
-
endIndex: node.endIndex,
|
|
16
|
-
startPosition: node.startPosition,
|
|
17
|
-
});
|
|
18
|
-
return;
|
|
19
|
-
}
|
|
20
|
-
for (let i = 0; i < node.childCount; i++) {
|
|
21
|
-
const child = node.child(i);
|
|
22
|
-
if (child)
|
|
23
|
-
visit(child);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
visit(tree.rootNode);
|
|
27
|
-
return tokens;
|
|
28
|
-
}
|
|
29
6
|
function tokensEqual(a, b) {
|
|
7
|
+
if (a.type === 'bpe' || b.type === 'bpe') {
|
|
8
|
+
return (a.text.replace(/\r?\n/g, '').trim() ===
|
|
9
|
+
b.text.replace(/\r?\n/g, '').trim());
|
|
10
|
+
}
|
|
30
11
|
return a.text === b.text;
|
|
31
12
|
}
|
|
32
13
|
function formatAnchor(anchor) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tokens.js","sourceRoot":"","sources":["../../../packages/tokenpatch/tokens.ts"],"names":[],"mappings":";;
|
|
1
|
+
{"version":3,"file":"tokens.js","sourceRoot":"","sources":["../../../packages/tokenpatch/tokens.ts"],"names":[],"mappings":";;AAEA,kCAWC;AAED,oCAEC;AAED,4CAYC;AA7BD,SAAgB,WAAW,CAAC,CAAQ,EAAE,CAAQ;IAI5C,IAAI,CAAC,CAAC,IAAI,KAAK,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,KAAK,EAAE,CAAC;QACzC,OAAO,CACL,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE;YACnC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CACpC,CAAC;IACJ,CAAC;IACD,OAAO,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,IAAI,CAAC;AAC3B,CAAC;AAED,SAAgB,YAAY,CAAC,MAAe;IAC1C,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC7C,CAAC;AAED,SAAgB,gBAAgB,CAAC,QAAiB,EAAE,MAAe;IACjE,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IACnC,MAAM,OAAO,GAAa,EAAE,CAAC;IAC7B,KAAK,EAAE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACjE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,SAAS,KAAK,CAAC;YACjB,CAAC;QACH,CAAC;QACD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|