@oh-my-pi/hashline 15.5.12 → 15.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/types/format.d.ts +37 -23
- package/dist/types/input.d.ts +3 -3
- package/dist/types/messages.d.ts +14 -34
- package/dist/types/parser.d.ts +0 -53
- package/dist/types/recovery.d.ts +11 -13
- package/dist/types/snapshots.d.ts +36 -114
- package/dist/types/tokenizer.d.ts +10 -53
- package/dist/types/types.d.ts +7 -11
- package/package.json +3 -2
- package/src/apply.ts +334 -53
- package/src/format.ts +64 -28
- package/src/grammar.lark +10 -10
- package/src/input.ts +10 -13
- package/src/messages.ts +17 -36
- package/src/mismatch.ts +3 -4
- package/src/parser.ts +71 -329
- package/src/patcher.ts +21 -43
- package/src/prompt.md +43 -44
- package/src/recovery.ts +22 -72
- package/src/snapshots.ts +84 -390
- package/src/tokenizer.ts +102 -155
- package/src/types.ts +9 -13
package/src/tokenizer.ts
CHANGED
|
@@ -1,31 +1,27 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Stateful, line-oriented classifier for hashline diff text.
|
|
3
3
|
*
|
|
4
|
-
* The {@link Tokenizer} can be fed in chunks ({@link Tokenizer.feed}/{@link
|
|
5
|
-
* Tokenizer.end}) for streaming use, or in one shot ({@link
|
|
6
|
-
* Tokenizer.tokenizeAll}). Each emitted token carries its 1-indexed source
|
|
7
|
-
* line number so downstream consumers (parser, validators, error messages)
|
|
8
|
-
* can refer back to the input precisely.
|
|
9
|
-
*
|
|
10
4
|
* Format shape:
|
|
11
5
|
* ```
|
|
12
|
-
*
|
|
13
|
-
*
|
|
6
|
+
* ¶path/to/file.ts#0A3
|
|
7
|
+
* replace 5..7:
|
|
14
8
|
* +literal new line
|
|
15
|
-
* &3,4
|
|
16
9
|
* ```
|
|
17
|
-
* Each `***` line opens a new file section; each `@@ A,B @@` line opens a
|
|
18
|
-
* new hunk whose body (zero or more `+`/`&` rows) replaces the selected
|
|
19
|
-
* range. Empty body = delete the selected range.
|
|
20
10
|
*/
|
|
21
|
-
|
|
22
11
|
import {
|
|
23
12
|
describeAnchorExamples,
|
|
13
|
+
HL_DELETE_KEYWORD,
|
|
24
14
|
HL_FILE_HASH_LENGTH,
|
|
25
15
|
HL_FILE_HASH_SEP,
|
|
26
16
|
HL_FILE_PREFIX,
|
|
27
|
-
|
|
17
|
+
HL_HEADER_COLON,
|
|
18
|
+
HL_INSERT_AFTER,
|
|
19
|
+
HL_INSERT_BEFORE,
|
|
20
|
+
HL_INSERT_HEAD,
|
|
21
|
+
HL_INSERT_KEYWORD,
|
|
22
|
+
HL_INSERT_TAIL,
|
|
28
23
|
HL_PAYLOAD_REPLACE,
|
|
24
|
+
HL_REPLACE_KEYWORD,
|
|
29
25
|
} from "./format";
|
|
30
26
|
import { ABORT_MARKER, BEGIN_PATCH_MARKER, END_PATCH_MARKER } from "./messages";
|
|
31
27
|
import type { Anchor, Cursor, ParsedRange } from "./types";
|
|
@@ -46,10 +42,8 @@ const CHAR_UPPER_F = 70;
|
|
|
46
42
|
const CHAR_LOWER_A = 97;
|
|
47
43
|
const CHAR_LOWER_F = 102;
|
|
48
44
|
const CHAR_PAYLOAD_REPLACE = HL_PAYLOAD_REPLACE.charCodeAt(0);
|
|
49
|
-
const
|
|
45
|
+
const CHAR_COLON = HL_HEADER_COLON.charCodeAt(0);
|
|
50
46
|
const FILE_PREFIX_LENGTH = HL_FILE_PREFIX.length;
|
|
51
|
-
const BOF_ANCHOR = "BOF";
|
|
52
|
-
const EOF_ANCHOR = "EOF";
|
|
53
47
|
|
|
54
48
|
function isDigitCode(code: number): boolean {
|
|
55
49
|
return code >= CHAR_ZERO && code <= CHAR_NINE;
|
|
@@ -91,14 +85,8 @@ function markerLineEquals(line: string, marker: string): boolean {
|
|
|
91
85
|
return end === marker.length && line.startsWith(marker);
|
|
92
86
|
}
|
|
93
87
|
|
|
94
|
-
/**
|
|
95
|
-
* Split a hashline diff into individual lines without losing the trailing
|
|
96
|
-
* empty line that callers may rely on for explicit blank payloads. CRLF pairs
|
|
97
|
-
* are normalized to a single line break.
|
|
98
|
-
*/
|
|
99
88
|
export function splitHashlineLines(text: string): string[] {
|
|
100
89
|
if (text.length === 0) return [""];
|
|
101
|
-
|
|
102
90
|
const lines: string[] = [];
|
|
103
91
|
let start = 0;
|
|
104
92
|
for (let index = 0; index < text.length; index++) {
|
|
@@ -108,7 +96,6 @@ export function splitHashlineLines(text: string): string[] {
|
|
|
108
96
|
lines.push(text.slice(start, end));
|
|
109
97
|
start = index + 1;
|
|
110
98
|
}
|
|
111
|
-
|
|
112
99
|
if (start < text.length) {
|
|
113
100
|
let end = text.length;
|
|
114
101
|
if (end > start && text.charCodeAt(end - 1) === CHAR_CARRIAGE_RETURN) end--;
|
|
@@ -119,6 +106,7 @@ export function splitHashlineLines(text: string): string[] {
|
|
|
119
106
|
|
|
120
107
|
export function cloneCursor(cursor: Cursor): Cursor {
|
|
121
108
|
if (cursor.kind === "before_anchor") return { kind: "before_anchor", anchor: { ...cursor.anchor } };
|
|
109
|
+
if (cursor.kind === "after_anchor") return { kind: "after_anchor", anchor: { ...cursor.anchor } };
|
|
122
110
|
return cursor;
|
|
123
111
|
}
|
|
124
112
|
|
|
@@ -129,7 +117,6 @@ interface NumberScan {
|
|
|
129
117
|
|
|
130
118
|
function scanLineNumber(line: string, index: number, end: number): NumberScan | null {
|
|
131
119
|
if (index >= end || !isNonZeroDigitCode(line.charCodeAt(index))) return null;
|
|
132
|
-
|
|
133
120
|
let lineNumber = 0;
|
|
134
121
|
let nextIndex = index;
|
|
135
122
|
while (nextIndex < end) {
|
|
@@ -160,36 +147,6 @@ interface RangeScan {
|
|
|
160
147
|
nextIndex: number;
|
|
161
148
|
}
|
|
162
149
|
|
|
163
|
-
/**
|
|
164
|
-
* Scan a numeric range for a hunk header. Canonical form is `A B` (two
|
|
165
|
-
* numbers separated by whitespace); models also reflexively emit `A-B`,
|
|
166
|
-
* `A..B`, and `A…B` (unicode ellipsis), so we accept any of those as the
|
|
167
|
-
* range separator. A second number is REQUIRED — bare `A` is not a valid
|
|
168
|
-
* hunk header in this grammar. Repeat-row bodies (`&A..B`) keep their own
|
|
169
|
-
* parser and still accept the `&A` single-line shorthand; see
|
|
170
|
-
* {@link tryParseRepeatPayload}.
|
|
171
|
-
*/
|
|
172
|
-
function scanHeaderRange(line: string, index = 0, end = trimEndIndex(line)): RangeScan | null {
|
|
173
|
-
const numberStart = skipWhitespace(line, index, end);
|
|
174
|
-
const start = scanLineNumber(line, numberStart, end);
|
|
175
|
-
if (start === null) return null;
|
|
176
|
-
|
|
177
|
-
const afterFirst = scanRangeSeparator(line, start.nextIndex, end);
|
|
178
|
-
if (afterFirst === null) return null;
|
|
179
|
-
const endNumber = scanLineNumber(line, afterFirst, end);
|
|
180
|
-
if (endNumber === null) return null;
|
|
181
|
-
return {
|
|
182
|
-
range: { start: { line: start.line }, end: { line: endNumber.line } },
|
|
183
|
-
nextIndex: skipWhitespace(line, endNumber.nextIndex, end),
|
|
184
|
-
};
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/**
|
|
188
|
-
* Consume the mandatory range separator (whitespace, `-`, `..`, or `…`)
|
|
189
|
-
* between the two numbers of a hunk-header range. Returns the index of
|
|
190
|
-
* the second number, or `null` when the separator is missing or no digit
|
|
191
|
-
* follows it.
|
|
192
|
-
*/
|
|
193
150
|
function scanRangeSeparator(line: string, index: number, end: number): number | null {
|
|
194
151
|
let cursor = index;
|
|
195
152
|
let consumedSeparator = false;
|
|
@@ -217,39 +174,105 @@ function scanRangeSeparator(line: string, index: number, end: number): number |
|
|
|
217
174
|
return cursor;
|
|
218
175
|
}
|
|
219
176
|
|
|
220
|
-
|
|
177
|
+
function scanHeaderRange(line: string, index = 0, end = trimEndIndex(line), allowSingle = false): RangeScan | null {
|
|
178
|
+
const numberStart = skipWhitespace(line, index, end);
|
|
179
|
+
const start = scanLineNumber(line, numberStart, end);
|
|
180
|
+
if (start === null) return null;
|
|
181
|
+
const afterFirst = scanRangeSeparator(line, start.nextIndex, end);
|
|
182
|
+
if (afterFirst === null) {
|
|
183
|
+
if (!allowSingle) return null;
|
|
184
|
+
return {
|
|
185
|
+
range: { start: { line: start.line }, end: { line: start.line } },
|
|
186
|
+
nextIndex: skipWhitespace(line, start.nextIndex, end),
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
const endNumber = scanLineNumber(line, afterFirst, end);
|
|
190
|
+
if (endNumber === null) return null;
|
|
191
|
+
return {
|
|
192
|
+
range: { start: { line: start.line }, end: { line: endNumber.line } },
|
|
193
|
+
nextIndex: skipWhitespace(line, endNumber.nextIndex, end),
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
export type BlockTarget =
|
|
198
|
+
| { kind: "replace"; range: ParsedRange }
|
|
199
|
+
| { kind: "delete"; range: ParsedRange }
|
|
200
|
+
| { kind: "insert_before"; anchor: Anchor }
|
|
201
|
+
| { kind: "insert_after"; anchor: Anchor }
|
|
202
|
+
| { kind: "bof" }
|
|
203
|
+
| { kind: "eof" };
|
|
221
204
|
|
|
222
205
|
interface TargetScan {
|
|
223
206
|
target: BlockTarget;
|
|
224
207
|
nextIndex: number;
|
|
225
208
|
}
|
|
226
209
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
210
|
+
function scanKeyword(line: string, index: number, end: number, keyword: string): number | null {
|
|
211
|
+
if (!line.startsWith(keyword, index)) return null;
|
|
212
|
+
const next = index + keyword.length;
|
|
213
|
+
if (next < end) {
|
|
214
|
+
const code = line.charCodeAt(next);
|
|
215
|
+
if (!isWhitespaceCode(code) && code !== CHAR_COLON) return null;
|
|
216
|
+
}
|
|
217
|
+
return next;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
function consumeOptionalColon(line: string, index: number, end: number): number {
|
|
221
|
+
const cursor = skipWhitespace(line, index, end);
|
|
222
|
+
return cursor < end && line.charCodeAt(cursor) === CHAR_COLON ? skipWhitespace(line, cursor + 1, end) : cursor;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function scanInsertTarget(line: string, index: number, end: number): TargetScan | null {
|
|
226
|
+
const cursor = skipWhitespace(line, index, end);
|
|
227
|
+
const beforeEnd = scanKeyword(line, cursor, end, HL_INSERT_BEFORE);
|
|
228
|
+
if (beforeEnd !== null) {
|
|
229
|
+
const anchor = scanLineNumber(line, skipWhitespace(line, beforeEnd, end), end);
|
|
230
|
+
if (anchor === null) return null;
|
|
231
|
+
const nextIndex = consumeOptionalColon(line, anchor.nextIndex, end);
|
|
232
|
+
return { target: { kind: "insert_before", anchor: { line: anchor.line } }, nextIndex };
|
|
233
|
+
}
|
|
234
|
+
const afterEnd = scanKeyword(line, cursor, end, HL_INSERT_AFTER);
|
|
235
|
+
if (afterEnd !== null) {
|
|
236
|
+
const anchor = scanLineNumber(line, skipWhitespace(line, afterEnd, end), end);
|
|
237
|
+
if (anchor === null) return null;
|
|
238
|
+
const nextIndex = consumeOptionalColon(line, anchor.nextIndex, end);
|
|
239
|
+
return { target: { kind: "insert_after", anchor: { line: anchor.line } }, nextIndex };
|
|
240
|
+
}
|
|
241
|
+
const headEnd = scanKeyword(line, cursor, end, HL_INSERT_HEAD);
|
|
242
|
+
if (headEnd !== null) return { target: { kind: "bof" }, nextIndex: consumeOptionalColon(line, headEnd, end) };
|
|
243
|
+
const tailEnd = scanKeyword(line, cursor, end, HL_INSERT_TAIL);
|
|
244
|
+
if (tailEnd !== null) return { target: { kind: "eof" }, nextIndex: consumeOptionalColon(line, tailEnd, end) };
|
|
245
|
+
return null;
|
|
246
|
+
}
|
|
247
|
+
|
|
232
248
|
function scanHunkAnchor(line: string, start: number, end: number): TargetScan | null {
|
|
233
249
|
const cursor = skipWhitespace(line, start, end);
|
|
234
|
-
|
|
235
|
-
|
|
250
|
+
const replaceEnd = scanKeyword(line, cursor, end, HL_REPLACE_KEYWORD);
|
|
251
|
+
if (replaceEnd !== null) {
|
|
252
|
+
const range = scanHeaderRange(line, replaceEnd, end, true);
|
|
253
|
+
if (range === null) return null;
|
|
254
|
+
return {
|
|
255
|
+
target: { kind: "replace", range: range.range },
|
|
256
|
+
nextIndex: consumeOptionalColon(line, range.nextIndex, end),
|
|
257
|
+
};
|
|
236
258
|
}
|
|
237
|
-
|
|
238
|
-
|
|
259
|
+
const deleteEnd = scanKeyword(line, cursor, end, HL_DELETE_KEYWORD);
|
|
260
|
+
if (deleteEnd !== null) {
|
|
261
|
+
const range = scanHeaderRange(line, deleteEnd, end, true);
|
|
262
|
+
if (range === null) return null;
|
|
263
|
+
const next = skipWhitespace(line, range.nextIndex, end);
|
|
264
|
+
if (next < end && line.charCodeAt(next) === CHAR_COLON) return null;
|
|
265
|
+
return { target: { kind: "delete", range: range.range }, nextIndex: next };
|
|
239
266
|
}
|
|
240
|
-
const
|
|
241
|
-
if (
|
|
242
|
-
return
|
|
267
|
+
const insertEnd = scanKeyword(line, cursor, end, HL_INSERT_KEYWORD);
|
|
268
|
+
if (insertEnd !== null) return scanInsertTarget(line, insertEnd, end);
|
|
269
|
+
return null;
|
|
243
270
|
}
|
|
244
271
|
|
|
245
272
|
interface ParsedHunkHeader {
|
|
246
273
|
target: BlockTarget;
|
|
247
274
|
}
|
|
248
275
|
|
|
249
|
-
/**
|
|
250
|
-
* Parse a bare hunk-header line: `A B` (range) or the keywords
|
|
251
|
-
* `BOF` / `EOF`. Returns `null` for lines that do not match the shape.
|
|
252
|
-
*/
|
|
253
276
|
function tryParseHunkHeader(line: string): ParsedHunkHeader | null {
|
|
254
277
|
const end = trimEndIndex(line);
|
|
255
278
|
const start = skipWhitespace(line, 0, end);
|
|
@@ -260,46 +283,11 @@ function tryParseHunkHeader(line: string): ParsedHunkHeader | null {
|
|
|
260
283
|
return { target: scan.target };
|
|
261
284
|
}
|
|
262
285
|
|
|
263
|
-
/**
|
|
264
|
-
* Parse a `&A,B` repeat payload row (or `&A` shorthand for `&A,A`). Returns
|
|
265
|
-
* `null` when the line does not match.
|
|
266
|
-
*/
|
|
267
|
-
function tryParseRepeatPayload(line: string): ParsedRange | null {
|
|
268
|
-
const end = trimEndIndex(line);
|
|
269
|
-
if (line.length === 0 || line.charCodeAt(0) !== CHAR_PAYLOAD_REPEAT) return null;
|
|
270
|
-
|
|
271
|
-
const start = scanLineNumber(line, 1, end);
|
|
272
|
-
if (start === null) return null;
|
|
273
|
-
if (start.nextIndex === end) {
|
|
274
|
-
// `&A` shorthand → `&A,A`.
|
|
275
|
-
return { start: { line: start.line }, end: { line: start.line } };
|
|
276
|
-
}
|
|
277
|
-
if (
|
|
278
|
-
start.nextIndex + 1 >= end ||
|
|
279
|
-
line.charCodeAt(start.nextIndex) !== CHAR_DOT ||
|
|
280
|
-
line.charCodeAt(start.nextIndex + 1) !== CHAR_DOT
|
|
281
|
-
)
|
|
282
|
-
return null;
|
|
283
|
-
|
|
284
|
-
const finish = scanLineNumber(line, start.nextIndex + 2, end);
|
|
285
|
-
if (finish === null) return null;
|
|
286
|
-
if (skipWhitespace(line, finish.nextIndex, end) !== end) return null;
|
|
287
|
-
return { start: { line: start.line }, end: { line: finish.line } };
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
/**
|
|
291
|
-
* Parse a `¶PATH[#hash]` file-header line. Returns `null` for lines that
|
|
292
|
-
* do not start with the file prefix or that fail the strict shape.
|
|
293
|
-
*
|
|
294
|
-
* `*** Begin Patch` / `*** End Patch` / `*** Abort` markers are matched
|
|
295
|
-
* earlier in {@link classifyLine}, so envelope markers never reach here.
|
|
296
|
-
*/
|
|
297
286
|
function tryParseHeader(line: string): { path: string; fileHash?: string } | null {
|
|
298
287
|
if (!line.startsWith(HL_FILE_PREFIX)) return null;
|
|
299
288
|
const end = trimEndIndex(line);
|
|
300
289
|
let index = FILE_PREFIX_LENGTH;
|
|
301
290
|
if (index >= end) return null;
|
|
302
|
-
|
|
303
291
|
const pathStart = index;
|
|
304
292
|
while (index < end) {
|
|
305
293
|
const code = line.charCodeAt(index);
|
|
@@ -308,7 +296,6 @@ function tryParseHeader(line: string): { path: string; fileHash?: string } | nul
|
|
|
308
296
|
}
|
|
309
297
|
if (index === pathStart) return null;
|
|
310
298
|
const path = line.slice(pathStart, index);
|
|
311
|
-
|
|
312
299
|
let fileHash: string | undefined;
|
|
313
300
|
if (index < end && line.charCodeAt(index) === CHAR_HASH) {
|
|
314
301
|
const hashStart = index + 1;
|
|
@@ -320,15 +307,11 @@ function tryParseHeader(line: string): { path: string; fileHash?: string } | nul
|
|
|
320
307
|
fileHash = line.slice(hashStart, hashEnd).toUpperCase();
|
|
321
308
|
index = hashEnd;
|
|
322
309
|
}
|
|
323
|
-
|
|
324
|
-
// Anything other than trailing whitespace disqualifies the header.
|
|
325
310
|
if (skipWhitespace(line, index, end) !== end) return null;
|
|
326
|
-
|
|
327
311
|
return fileHash !== undefined ? { path, fileHash } : { path };
|
|
328
312
|
}
|
|
329
313
|
|
|
330
314
|
interface TokenBase {
|
|
331
|
-
/** 1-indexed line number in the original input stream. */
|
|
332
315
|
lineNum: number;
|
|
333
316
|
}
|
|
334
317
|
|
|
@@ -340,7 +323,6 @@ export type Token =
|
|
|
340
323
|
| (TokenBase & { kind: "header"; path: string; fileHash?: string })
|
|
341
324
|
| (TokenBase & { kind: "op-block"; target: BlockTarget })
|
|
342
325
|
| (TokenBase & { kind: "payload-literal"; text: string })
|
|
343
|
-
| (TokenBase & { kind: "payload-repeat"; range: ParsedRange })
|
|
344
326
|
| (TokenBase & { kind: "raw"; text: string });
|
|
345
327
|
|
|
346
328
|
function classifyLine(line: string, lineNum: number): Token {
|
|
@@ -348,9 +330,7 @@ function classifyLine(line: string, lineNum: number): Token {
|
|
|
348
330
|
if (markerLineEquals(line, BEGIN_PATCH_MARKER)) return { kind: "envelope-begin", lineNum };
|
|
349
331
|
if (markerLineEquals(line, END_PATCH_MARKER)) return { kind: "envelope-end", lineNum };
|
|
350
332
|
if (markerLineEquals(line, ABORT_MARKER)) return { kind: "abort", lineNum };
|
|
351
|
-
|
|
352
333
|
const firstCode = line.charCodeAt(0);
|
|
353
|
-
|
|
354
334
|
if (line.startsWith(HL_FILE_PREFIX)) {
|
|
355
335
|
const header = tryParseHeader(line);
|
|
356
336
|
if (header !== null) {
|
|
@@ -359,48 +339,24 @@ function classifyLine(line: string, lineNum: number): Token {
|
|
|
359
339
|
: { kind: "header", lineNum, path: header.path };
|
|
360
340
|
}
|
|
361
341
|
}
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
const isHunkLead = isNonZeroDigitCode(firstCode) || line.startsWith(BOF_ANCHOR) || line.startsWith(EOF_ANCHOR);
|
|
342
|
+
const lead = skipWhitespace(line, 0);
|
|
343
|
+
const isHunkLead =
|
|
344
|
+
line.startsWith(HL_REPLACE_KEYWORD, lead) ||
|
|
345
|
+
line.startsWith(HL_DELETE_KEYWORD, lead) ||
|
|
346
|
+
line.startsWith(HL_INSERT_KEYWORD, lead);
|
|
368
347
|
if (isHunkLead) {
|
|
369
348
|
const hunk = tryParseHunkHeader(line);
|
|
370
349
|
if (hunk !== null) return { kind: "op-block", lineNum, target: hunk.target };
|
|
371
350
|
}
|
|
372
|
-
|
|
373
|
-
if (firstCode === CHAR_PAYLOAD_REPLACE) {
|
|
374
|
-
return { kind: "payload-literal", lineNum, text: line.slice(1) };
|
|
375
|
-
}
|
|
376
|
-
if (firstCode === CHAR_PAYLOAD_REPEAT) {
|
|
377
|
-
const range = tryParseRepeatPayload(line);
|
|
378
|
-
if (range !== null) return { kind: "payload-repeat", lineNum, range };
|
|
379
|
-
}
|
|
380
|
-
|
|
351
|
+
if (firstCode === CHAR_PAYLOAD_REPLACE) return { kind: "payload-literal", lineNum, text: line.slice(1) };
|
|
381
352
|
return { kind: "raw", lineNum, text: line };
|
|
382
353
|
}
|
|
383
354
|
|
|
384
|
-
/**
|
|
385
|
-
* Stateful, line-oriented classifier for hashline diff text. Use the
|
|
386
|
-
* streaming {@link feed}/{@link end} pair to ingest text in chunks (each
|
|
387
|
-
* completed line emits exactly one token; a trailing partial line stays
|
|
388
|
-
* buffered until the next chunk or {@link end}). Use the stateless
|
|
389
|
-
* {@link tokenize}/predicate methods for callers that already hold whole
|
|
390
|
-
* lines and only need classification without buffering.
|
|
391
|
-
*/
|
|
392
355
|
export class Tokenizer {
|
|
393
356
|
#buffer = "";
|
|
394
357
|
#nextLineNum = 1;
|
|
395
358
|
#closed = false;
|
|
396
359
|
|
|
397
|
-
/**
|
|
398
|
-
* Ingest a chunk of input text. Each newline-terminated line in the
|
|
399
|
-
* combined buffer produces one token. A trailing partial line (no `\n`
|
|
400
|
-
* yet, possibly ending in a lone `\r`) stays buffered until the next
|
|
401
|
-
* `feed`/`end` call so CRLF pairs that straddle chunk boundaries are
|
|
402
|
-
* still normalized correctly.
|
|
403
|
-
*/
|
|
404
360
|
feed(chunk: string): Token[] {
|
|
405
361
|
if (this.#closed) throw new Error("Tokenizer is closed; call reset() before reusing.");
|
|
406
362
|
if (chunk.length === 0) return [];
|
|
@@ -408,11 +364,6 @@ export class Tokenizer {
|
|
|
408
364
|
return this.#drainCompleteLines();
|
|
409
365
|
}
|
|
410
366
|
|
|
411
|
-
/**
|
|
412
|
-
* Flush any buffered residual line (the last line of input when it lacks
|
|
413
|
-
* a trailing newline) and mark the tokenizer closed. Calling `end` a
|
|
414
|
-
* second time returns `[]`; reuse requires `reset`.
|
|
415
|
-
*/
|
|
416
367
|
end(): Token[] {
|
|
417
368
|
if (this.#closed) return [];
|
|
418
369
|
this.#closed = true;
|
|
@@ -421,18 +372,15 @@ export class Tokenizer {
|
|
|
421
372
|
if (buf.length === 0) return [];
|
|
422
373
|
let stop = buf.length;
|
|
423
374
|
if (buf.charCodeAt(stop - 1) === CHAR_CARRIAGE_RETURN) stop--;
|
|
424
|
-
|
|
425
|
-
return [token];
|
|
375
|
+
return [classifyLine(buf.slice(0, stop), this.#nextLineNum++)];
|
|
426
376
|
}
|
|
427
377
|
|
|
428
|
-
/** Discard any buffered text and reset the line counter to 1. */
|
|
429
378
|
reset(): void {
|
|
430
379
|
this.#buffer = "";
|
|
431
380
|
this.#nextLineNum = 1;
|
|
432
381
|
this.#closed = false;
|
|
433
382
|
}
|
|
434
383
|
|
|
435
|
-
/** Convenience: feed an entire text and immediately flush. */
|
|
436
384
|
tokenizeAll(text: string): Token[] {
|
|
437
385
|
this.reset();
|
|
438
386
|
const first = this.feed(text);
|
|
@@ -440,7 +388,6 @@ export class Tokenizer {
|
|
|
440
388
|
return last.length === 0 ? first : first.concat(last);
|
|
441
389
|
}
|
|
442
390
|
|
|
443
|
-
/** Stateless one-shot classification. Does not touch the streaming buffer. */
|
|
444
391
|
tokenize(line: string, lineNum = 0): Token {
|
|
445
392
|
return classifyLine(line, lineNum);
|
|
446
393
|
}
|
package/src/types.ts
CHANGED
|
@@ -9,14 +9,18 @@ export interface Anchor {
|
|
|
9
9
|
line: number;
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
/** Where an `insert`
|
|
13
|
-
export type Cursor =
|
|
12
|
+
/** Where an `insert` edit should land relative to existing content. */
|
|
13
|
+
export type Cursor =
|
|
14
|
+
| { kind: "bof" }
|
|
15
|
+
| { kind: "eof" }
|
|
16
|
+
| { kind: "before_anchor"; anchor: Anchor }
|
|
17
|
+
| { kind: "after_anchor"; anchor: Anchor };
|
|
14
18
|
|
|
15
19
|
/**
|
|
16
20
|
* A single low-level edit produced by the parser and consumed by the applier.
|
|
17
|
-
* Multi-line replacements decompose to one `insert
|
|
18
|
-
*
|
|
19
|
-
*
|
|
21
|
+
* Multi-line replacements decompose to one `insert` per replacement line plus
|
|
22
|
+
* one `delete` per consumed line. Replacement payloads are tagged so the
|
|
23
|
+
* applier can distinguish literal insertion from new content for a deleted
|
|
20
24
|
* line.
|
|
21
25
|
*/
|
|
22
26
|
export type Edit =
|
|
@@ -28,14 +32,6 @@ export type Edit =
|
|
|
28
32
|
index: number;
|
|
29
33
|
mode?: "replacement";
|
|
30
34
|
}
|
|
31
|
-
| {
|
|
32
|
-
kind: "repeat";
|
|
33
|
-
cursor: Cursor;
|
|
34
|
-
range: ParsedRange;
|
|
35
|
-
lineNum: number;
|
|
36
|
-
index: number;
|
|
37
|
-
mode?: "replacement";
|
|
38
|
-
}
|
|
39
35
|
| { kind: "delete"; anchor: Anchor; lineNum: number; index: number; oldAssertion?: string };
|
|
40
36
|
|
|
41
37
|
/** Result of applying a parsed set of edits to a text body. */
|