@oh-my-pi/hashline 15.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -0
- package/dist/types/apply.d.ts +10 -0
- package/dist/types/diff-preview.d.ts +12 -0
- package/dist/types/format.d.ts +65 -0
- package/dist/types/fs.d.ts +80 -0
- package/dist/types/index.d.ts +16 -0
- package/dist/types/input.d.ts +85 -0
- package/dist/types/messages.d.ts +56 -0
- package/dist/types/mismatch.d.ts +35 -0
- package/dist/types/normalize.d.ts +20 -0
- package/dist/types/parser.d.ts +50 -0
- package/dist/types/patcher.d.ts +112 -0
- package/dist/types/prefixes.d.ts +34 -0
- package/dist/types/recovery.d.ts +38 -0
- package/dist/types/snapshots.d.ts +67 -0
- package/dist/types/stream.d.ts +2 -0
- package/dist/types/tokenizer.d.ts +104 -0
- package/dist/types/types.d.ts +93 -0
- package/package.json +61 -0
- package/src/apply.ts +799 -0
- package/src/diff-preview.ts +49 -0
- package/src/format.ts +110 -0
- package/src/fs.ts +167 -0
- package/src/grammar.lark +22 -0
- package/src/index.ts +16 -0
- package/src/input.ts +319 -0
- package/src/messages.ts +76 -0
- package/src/mismatch.ts +121 -0
- package/src/normalize.ts +38 -0
- package/src/parser.ts +350 -0
- package/src/patcher.ts +360 -0
- package/src/prefixes.ts +130 -0
- package/src/prompt.md +83 -0
- package/src/recovery.ts +163 -0
- package/src/snapshots.ts +171 -0
- package/src/stream.ts +132 -0
- package/src/tokenizer.ts +486 -0
- package/src/types.ts +87 -0
package/src/tokenizer.ts
ADDED
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stateful, line-oriented classifier for hashline diff text.
|
|
3
|
+
*
|
|
4
|
+
* The {@link Tokenizer} can be fed in chunks ({@link Tokenizer.feed}/{@link
|
|
5
|
+
* Tokenizer.end}) for streaming use, or in one shot ({@link
|
|
6
|
+
* Tokenizer.tokenizeAll}). Each emitted token carries its 1-indexed source
|
|
7
|
+
* line number so downstream consumers (parser, validators, error messages)
|
|
8
|
+
* can refer back to the input precisely.
|
|
9
|
+
*
|
|
10
|
+
* The tokenizer is intentionally permissive about decorations and prefixes
|
|
11
|
+
* the model may echo back from `read`/`search` output — leading `*`/`>`/`-`
|
|
12
|
+
* markers, CR-terminated lines, leading whitespace before line numbers, and
|
|
13
|
+
* so on are all stripped before classification.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import {
|
|
17
|
+
describeAnchorExamples,
|
|
18
|
+
HL_FILE_HASH_SEP,
|
|
19
|
+
HL_FILE_PREFIX,
|
|
20
|
+
HL_OP_DELETE,
|
|
21
|
+
HL_OP_INSERT_AFTER,
|
|
22
|
+
HL_OP_INSERT_BEFORE,
|
|
23
|
+
HL_OP_REPLACE,
|
|
24
|
+
HL_PAYLOAD_PREFIX,
|
|
25
|
+
} from "./format";
|
|
26
|
+
import { ABORT_MARKER, BEGIN_PATCH_MARKER, END_PATCH_MARKER } from "./messages";
|
|
27
|
+
import type { Anchor, Cursor, ParsedRange } from "./types";
|
|
28
|
+
|
|
29
|
+
const CHAR_LINE_FEED = 10;
|
|
30
|
+
const CHAR_CARRIAGE_RETURN = 13;
|
|
31
|
+
const CHAR_ZERO = 48;
|
|
32
|
+
const CHAR_NINE = 57;
|
|
33
|
+
const CHAR_HASH = 35;
|
|
34
|
+
const CHAR_TAB = 9;
|
|
35
|
+
const CHAR_SPACE = 32;
|
|
36
|
+
const CHAR_LOWER_A = 97;
|
|
37
|
+
const CHAR_LOWER_F = 102;
|
|
38
|
+
const CHAR_PILCROW = HL_FILE_PREFIX.charCodeAt(0);
|
|
39
|
+
const CHAR_PAYLOAD_PREFIX = HL_PAYLOAD_PREFIX.charCodeAt(0);
|
|
40
|
+
const FILE_HASH_LENGTH = 4;
|
|
41
|
+
|
|
42
|
+
function isDigitCode(code: number): boolean {
|
|
43
|
+
return code >= CHAR_ZERO && code <= CHAR_NINE;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function isNonZeroDigitCode(code: number): boolean {
|
|
47
|
+
return code > CHAR_ZERO && code <= CHAR_NINE;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function isDecorationCode(code: number): boolean {
|
|
51
|
+
return code === 42 || code === 45 || code === 62;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function isHexDigitCode(code: number): boolean {
|
|
55
|
+
return isDigitCode(code) || (code >= CHAR_LOWER_A && code <= CHAR_LOWER_F);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function skipWhitespace(line: string, index: number, end = line.length): number {
|
|
59
|
+
return end - line.slice(index, end).trimStart().length;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function trimEndIndex(line: string): number {
|
|
63
|
+
return line.trimEnd().length;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function isEmptyLine(line: string): boolean {
|
|
67
|
+
return line.length === 0;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function markerLineEquals(line: string, marker: string): boolean {
|
|
71
|
+
return line.trimEnd() === marker;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Split a hashline diff into individual lines without losing the trailing
|
|
76
|
+
* empty line that callers may rely on for explicit blank payloads. CRLF pairs
|
|
77
|
+
* are normalized to a single line break.
|
|
78
|
+
*
|
|
79
|
+
* This mirrors the line-splitting performed by {@link Tokenizer}'s streaming
|
|
80
|
+
* drain loop and is kept for non-streaming callers that prefer a single-shot
|
|
81
|
+
* split.
|
|
82
|
+
*/
|
|
83
|
+
export function splitHashlineLines(text: string): string[] {
|
|
84
|
+
if (text.length === 0) return [""];
|
|
85
|
+
|
|
86
|
+
const lines: string[] = [];
|
|
87
|
+
let start = 0;
|
|
88
|
+
for (let index = 0; index < text.length; index++) {
|
|
89
|
+
if (text.charCodeAt(index) !== CHAR_LINE_FEED) continue;
|
|
90
|
+
let end = index;
|
|
91
|
+
if (end > start && text.charCodeAt(end - 1) === CHAR_CARRIAGE_RETURN) end--;
|
|
92
|
+
lines.push(text.slice(start, end));
|
|
93
|
+
start = index + 1;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
if (start < text.length) {
|
|
97
|
+
let end = text.length;
|
|
98
|
+
if (end > start && text.charCodeAt(end - 1) === CHAR_CARRIAGE_RETURN) end--;
|
|
99
|
+
lines.push(text.slice(start, end));
|
|
100
|
+
}
|
|
101
|
+
return lines;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export function cloneCursor(cursor: Cursor): Cursor {
|
|
105
|
+
if (cursor.kind === "before_anchor") return { kind: "before_anchor", anchor: { ...cursor.anchor } };
|
|
106
|
+
if (cursor.kind === "after_anchor") return { kind: "after_anchor", anchor: { ...cursor.anchor } };
|
|
107
|
+
return cursor;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Leniently accept anchors copied from read/search output:
|
|
111
|
+
// - optional leading line-marker decoration (`*`, `>`, `-`)
|
|
112
|
+
// - the required bare line number
|
|
113
|
+
function skipDecoratedAnchorPrefix(line: string, end = trimEndIndex(line)): number {
|
|
114
|
+
let index = skipWhitespace(line, 0, end);
|
|
115
|
+
while (index < end && isDecorationCode(line.charCodeAt(index))) index++;
|
|
116
|
+
return skipWhitespace(line, index, end);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface NumberScan {
|
|
120
|
+
line: number;
|
|
121
|
+
nextIndex: number;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function scanLineNumber(line: string, index: number, end: number): NumberScan | null {
|
|
125
|
+
if (index >= end || !isNonZeroDigitCode(line.charCodeAt(index))) return null;
|
|
126
|
+
|
|
127
|
+
let lineNumber = 0;
|
|
128
|
+
let nextIndex = index;
|
|
129
|
+
while (nextIndex < end) {
|
|
130
|
+
const code = line.charCodeAt(nextIndex);
|
|
131
|
+
if (!isDigitCode(code)) break;
|
|
132
|
+
lineNumber = lineNumber * 10 + (code - CHAR_ZERO);
|
|
133
|
+
nextIndex++;
|
|
134
|
+
}
|
|
135
|
+
return { line: lineNumber, nextIndex };
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Parse a bare line-number anchor (used by insert ops). Throws on malformed input. */
|
|
139
|
+
export function parseLid(raw: string, lineNum: number): Anchor {
|
|
140
|
+
const end = trimEndIndex(raw);
|
|
141
|
+
const numberStart = skipDecoratedAnchorPrefix(raw, end);
|
|
142
|
+
const number = scanLineNumber(raw, numberStart, end);
|
|
143
|
+
if (number === null || skipWhitespace(raw, number.nextIndex, end) !== end) {
|
|
144
|
+
throw new Error(
|
|
145
|
+
`line ${lineNum}: expected a line number such as ${describeAnchorExamples("119")}; ` +
|
|
146
|
+
`got ${JSON.stringify(raw)}. Use ${HL_FILE_PREFIX}PATH${HL_FILE_HASH_SEP}hash from your latest read for file-version binding.`,
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
return { line: number.line };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
interface RangeScan {
|
|
153
|
+
range: ParsedRange;
|
|
154
|
+
nextIndex: number;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function scanRange(line: string, end = trimEndIndex(line)): RangeScan | null {
|
|
158
|
+
const numberStart = skipDecoratedAnchorPrefix(line, end);
|
|
159
|
+
const start = scanLineNumber(line, numberStart, end);
|
|
160
|
+
if (start === null) return null;
|
|
161
|
+
|
|
162
|
+
let nextIndex = start.nextIndex;
|
|
163
|
+
let rangeEnd = start.line;
|
|
164
|
+
if (nextIndex < end && line.charCodeAt(nextIndex) === 45) {
|
|
165
|
+
const endNumber = scanLineNumber(line, nextIndex + 1, end);
|
|
166
|
+
if (endNumber === null) return null;
|
|
167
|
+
rangeEnd = endNumber.line;
|
|
168
|
+
nextIndex = endNumber.nextIndex;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return {
|
|
172
|
+
range: { start: { line: start.line }, end: { line: rangeEnd } },
|
|
173
|
+
nextIndex: skipWhitespace(line, nextIndex, end),
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function startsWithWord(line: string, index: number, end: number, word: string): boolean {
|
|
178
|
+
if (index + word.length > end) return false;
|
|
179
|
+
for (let offset = 0; offset < word.length; offset++) {
|
|
180
|
+
if (line.charCodeAt(index + offset) !== word.charCodeAt(offset)) return false;
|
|
181
|
+
}
|
|
182
|
+
return true;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function parseInsertTarget(raw: string, lineNum: number, kind: "before" | "after"): Cursor {
|
|
186
|
+
const end = trimEndIndex(raw);
|
|
187
|
+
const targetStart = skipDecoratedAnchorPrefix(raw, end);
|
|
188
|
+
|
|
189
|
+
if (startsWithWord(raw, targetStart, end, "BOF") && skipWhitespace(raw, targetStart + 3, end) === end) {
|
|
190
|
+
return { kind: "bof" };
|
|
191
|
+
}
|
|
192
|
+
if (startsWithWord(raw, targetStart, end, "EOF") && skipWhitespace(raw, targetStart + 3, end) === end) {
|
|
193
|
+
return { kind: "eof" };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const cursorKind = kind === "before" ? "before_anchor" : "after_anchor";
|
|
197
|
+
return { kind: cursorKind, anchor: parseLid(raw, lineNum) };
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function scanInlineBody(line: string, index: number): string | undefined {
|
|
201
|
+
const end = trimEndIndex(line);
|
|
202
|
+
return index < end ? line.slice(index, end) : undefined;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
interface ParsedInsertOp {
|
|
206
|
+
kind: "insert";
|
|
207
|
+
cursor: Cursor;
|
|
208
|
+
inlineBody: string | undefined;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
interface ParsedReplaceOp {
|
|
212
|
+
kind: "replace";
|
|
213
|
+
range: ParsedRange;
|
|
214
|
+
inlineBody: string | undefined;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
interface ParsedDeleteOp {
|
|
218
|
+
kind: "delete";
|
|
219
|
+
range: ParsedRange;
|
|
220
|
+
trailingPayload: boolean;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
type ParsedOp = ParsedInsertOp | ParsedReplaceOp | ParsedDeleteOp;
|
|
224
|
+
|
|
225
|
+
function tryParseInsertOp(line: string, sigil: string, kind: "before" | "after"): ParsedInsertOp | null {
|
|
226
|
+
const end = trimEndIndex(line);
|
|
227
|
+
const targetStart = skipDecoratedAnchorPrefix(line, end);
|
|
228
|
+
|
|
229
|
+
let targetEnd: number;
|
|
230
|
+
if (startsWithWord(line, targetStart, end, "BOF") || startsWithWord(line, targetStart, end, "EOF")) {
|
|
231
|
+
targetEnd = targetStart + 3;
|
|
232
|
+
} else {
|
|
233
|
+
const anchor = scanLineNumber(line, targetStart, end);
|
|
234
|
+
if (anchor === null) return null;
|
|
235
|
+
targetEnd = anchor.nextIndex;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
const opIndex = skipWhitespace(line, targetEnd, end);
|
|
239
|
+
if (opIndex >= end || line[opIndex] !== sigil) return null;
|
|
240
|
+
|
|
241
|
+
// parseInsertTarget can only throw on inputs that already passed the
|
|
242
|
+
// BOF/EOF/line-number scan above, but guard the throw anyway — the
|
|
243
|
+
// tokenizer contract forbids it and a future refactor of the prefix
|
|
244
|
+
// scan must not silently start raising here.
|
|
245
|
+
try {
|
|
246
|
+
return {
|
|
247
|
+
kind: "insert",
|
|
248
|
+
cursor: parseInsertTarget(line.slice(0, opIndex), 0, kind),
|
|
249
|
+
inlineBody: scanInlineBody(line, opIndex + sigil.length),
|
|
250
|
+
};
|
|
251
|
+
} catch {
|
|
252
|
+
return null;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function tryParseReplaceOp(line: string): ParsedReplaceOp | null {
|
|
257
|
+
const end = trimEndIndex(line);
|
|
258
|
+
const range = scanRange(line, end);
|
|
259
|
+
if (range === null || range.nextIndex >= end || line[range.nextIndex] !== HL_OP_REPLACE) return null;
|
|
260
|
+
return {
|
|
261
|
+
kind: "replace",
|
|
262
|
+
range: range.range,
|
|
263
|
+
inlineBody: scanInlineBody(line, range.nextIndex + HL_OP_REPLACE.length),
|
|
264
|
+
};
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
function tryParseDeleteOp(line: string): ParsedDeleteOp | null {
|
|
268
|
+
const end = trimEndIndex(line);
|
|
269
|
+
const range = scanRange(line, end);
|
|
270
|
+
if (range === null || range.nextIndex >= end || line[range.nextIndex] !== HL_OP_DELETE) return null;
|
|
271
|
+
const afterSigil = range.nextIndex + HL_OP_DELETE.length;
|
|
272
|
+
return { kind: "delete", range: range.range, trailingPayload: afterSigil !== end };
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function tryParseOp(line: string): ParsedOp | null {
|
|
276
|
+
return (
|
|
277
|
+
tryParseInsertOp(line, HL_OP_INSERT_BEFORE, "before") ??
|
|
278
|
+
tryParseInsertOp(line, HL_OP_INSERT_AFTER, "after") ??
|
|
279
|
+
tryParseReplaceOp(line) ??
|
|
280
|
+
tryParseDeleteOp(line)
|
|
281
|
+
);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
/**
|
|
285
|
+
* Strict header scan: `¶+` prefix, optional whitespace, path body that
|
|
286
|
+
* excludes whitespace, `#`, and `¶`, optional `#[0-9a-f]{4}` hash suffix,
|
|
287
|
+
* optional trailing whitespace. Returns `null` when any byte deviates from
|
|
288
|
+
* the shape.
|
|
289
|
+
*/
|
|
290
|
+
function tryParseHeader(line: string): { path: string; fileHash?: string } | null {
|
|
291
|
+
const end = trimEndIndex(line);
|
|
292
|
+
if (end === 0 || line.charCodeAt(0) !== CHAR_PILCROW) return null;
|
|
293
|
+
|
|
294
|
+
let index = 0;
|
|
295
|
+
while (index < end && line.charCodeAt(index) === CHAR_PILCROW) index++;
|
|
296
|
+
index = skipWhitespace(line, index, end);
|
|
297
|
+
if (index >= end) return null;
|
|
298
|
+
|
|
299
|
+
const pathStart = index;
|
|
300
|
+
while (index < end) {
|
|
301
|
+
const code = line.charCodeAt(index);
|
|
302
|
+
if (code === CHAR_HASH || code === CHAR_PILCROW || code === CHAR_SPACE || code === CHAR_TAB) break;
|
|
303
|
+
index++;
|
|
304
|
+
}
|
|
305
|
+
if (index === pathStart) return null;
|
|
306
|
+
const path = line.slice(pathStart, index);
|
|
307
|
+
|
|
308
|
+
let fileHash: string | undefined;
|
|
309
|
+
if (index < end && line.charCodeAt(index) === CHAR_HASH) {
|
|
310
|
+
const hashStart = index + 1;
|
|
311
|
+
const hashEnd = hashStart + FILE_HASH_LENGTH;
|
|
312
|
+
if (hashEnd > end) return null;
|
|
313
|
+
for (let probe = hashStart; probe < hashEnd; probe++) {
|
|
314
|
+
if (!isHexDigitCode(line.charCodeAt(probe))) return null;
|
|
315
|
+
}
|
|
316
|
+
fileHash = line.slice(hashStart, hashEnd);
|
|
317
|
+
index = hashEnd;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Anything other than trailing whitespace disqualifies the header.
|
|
321
|
+
if (skipWhitespace(line, index, end) !== end) return null;
|
|
322
|
+
|
|
323
|
+
return fileHash !== undefined ? { path, fileHash } : { path };
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Returns true when the line scans as `LINE!payload` (delete sigil followed
|
|
328
|
+
* by additional content). The parser uses this for the dedicated "deletes
|
|
329
|
+
* only" diagnostic, separate from the standard "unrecognized op" path.
|
|
330
|
+
*/
|
|
331
|
+
export function isDeleteOpWithPayload(line: string): boolean {
|
|
332
|
+
const range = scanRange(line, line.length);
|
|
333
|
+
return (
|
|
334
|
+
range !== null &&
|
|
335
|
+
range.nextIndex < line.length &&
|
|
336
|
+
line[range.nextIndex] === HL_OP_DELETE &&
|
|
337
|
+
range.nextIndex + HL_OP_DELETE.length < line.length
|
|
338
|
+
);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
interface TokenBase {
|
|
342
|
+
/** 1-indexed line number in the original input stream. */
|
|
343
|
+
lineNum: number;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
export type Token =
|
|
347
|
+
| (TokenBase & { kind: "blank" })
|
|
348
|
+
| (TokenBase & { kind: "envelope-begin" })
|
|
349
|
+
| (TokenBase & { kind: "envelope-end" })
|
|
350
|
+
| (TokenBase & { kind: "abort" })
|
|
351
|
+
| (TokenBase & { kind: "header"; path: string; fileHash?: string })
|
|
352
|
+
| (TokenBase & { kind: "op-insert"; cursor: Cursor; inlineBody: string | undefined })
|
|
353
|
+
| (TokenBase & { kind: "op-replace"; range: ParsedRange; inlineBody: string | undefined })
|
|
354
|
+
| (TokenBase & { kind: "op-delete"; range: ParsedRange; trailingPayload: boolean })
|
|
355
|
+
| (TokenBase & { kind: "payload"; text: string })
|
|
356
|
+
| (TokenBase & { kind: "raw"; text: string });
|
|
357
|
+
|
|
358
|
+
function classifyLine(line: string, lineNum: number): Token {
|
|
359
|
+
if (isEmptyLine(line)) return { kind: "blank", lineNum };
|
|
360
|
+
if (markerLineEquals(line, BEGIN_PATCH_MARKER)) return { kind: "envelope-begin", lineNum };
|
|
361
|
+
if (markerLineEquals(line, END_PATCH_MARKER)) return { kind: "envelope-end", lineNum };
|
|
362
|
+
if (markerLineEquals(line, ABORT_MARKER)) return { kind: "abort", lineNum };
|
|
363
|
+
|
|
364
|
+
if (line.charCodeAt(0) === CHAR_PILCROW) {
|
|
365
|
+
const header = tryParseHeader(line);
|
|
366
|
+
if (header !== null) {
|
|
367
|
+
return header.fileHash !== undefined
|
|
368
|
+
? { kind: "header", lineNum, path: header.path, fileHash: header.fileHash }
|
|
369
|
+
: { kind: "header", lineNum, path: header.path };
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
if (line.charCodeAt(0) === CHAR_PAYLOAD_PREFIX) {
|
|
374
|
+
return { kind: "payload", lineNum, text: line.slice(HL_PAYLOAD_PREFIX.length) };
|
|
375
|
+
}
|
|
376
|
+
const op = tryParseOp(line);
|
|
377
|
+
if (op !== null) {
|
|
378
|
+
if (op.kind === "insert") {
|
|
379
|
+
return { kind: "op-insert", lineNum, cursor: op.cursor, inlineBody: op.inlineBody };
|
|
380
|
+
}
|
|
381
|
+
if (op.kind === "replace") {
|
|
382
|
+
return { kind: "op-replace", lineNum, range: op.range, inlineBody: op.inlineBody };
|
|
383
|
+
}
|
|
384
|
+
return { kind: "op-delete", lineNum, range: op.range, trailingPayload: op.trailingPayload };
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return { kind: "raw", lineNum, text: line };
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/**
|
|
391
|
+
* Stateful, line-oriented classifier for hashline diff text. Use the
|
|
392
|
+
* streaming {@link feed}/{@link end} pair to ingest text in chunks (each
|
|
393
|
+
* completed line emits exactly one token; a trailing partial line stays
|
|
394
|
+
* buffered until the next chunk or {@link end}). Use the stateless
|
|
395
|
+
* {@link tokenize}/predicate methods for callers that already hold whole
|
|
396
|
+
* lines and only need classification without buffering.
|
|
397
|
+
*/
|
|
398
|
+
export class Tokenizer {
|
|
399
|
+
#buffer = "";
|
|
400
|
+
#nextLineNum = 1;
|
|
401
|
+
#closed = false;
|
|
402
|
+
|
|
403
|
+
/**
|
|
404
|
+
* Ingest a chunk of input text. Each newline-terminated line in the
|
|
405
|
+
* combined buffer produces one token. A trailing partial line (no `\n`
|
|
406
|
+
* yet, possibly ending in a lone `\r`) stays buffered until the next
|
|
407
|
+
* `feed`/`end` call so CRLF pairs that straddle chunk boundaries are
|
|
408
|
+
* still normalized correctly.
|
|
409
|
+
*/
|
|
410
|
+
feed(chunk: string): Token[] {
|
|
411
|
+
if (this.#closed) throw new Error("Tokenizer is closed; call reset() before reusing.");
|
|
412
|
+
if (chunk.length === 0) return [];
|
|
413
|
+
this.#buffer = this.#buffer ? this.#buffer + chunk : chunk;
|
|
414
|
+
return this.#drainCompleteLines();
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
/**
|
|
418
|
+
* Flush any buffered residual line (the last line of input when it lacks
|
|
419
|
+
* a trailing newline) and mark the tokenizer closed. Calling `end` a
|
|
420
|
+
* second time returns `[]`; reuse requires `reset`.
|
|
421
|
+
*/
|
|
422
|
+
end(): Token[] {
|
|
423
|
+
if (this.#closed) return [];
|
|
424
|
+
this.#closed = true;
|
|
425
|
+
const buf = this.#buffer;
|
|
426
|
+
this.#buffer = "";
|
|
427
|
+
if (buf.length === 0) return [];
|
|
428
|
+
let stop = buf.length;
|
|
429
|
+
if (buf.charCodeAt(stop - 1) === CHAR_CARRIAGE_RETURN) stop--;
|
|
430
|
+
const token = classifyLine(buf.slice(0, stop), this.#nextLineNum++);
|
|
431
|
+
return [token];
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/** Discard any buffered text and reset the line counter to 1. */
|
|
435
|
+
reset(): void {
|
|
436
|
+
this.#buffer = "";
|
|
437
|
+
this.#nextLineNum = 1;
|
|
438
|
+
this.#closed = false;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
/** Convenience: feed an entire text and immediately flush. */
|
|
442
|
+
tokenizeAll(text: string): Token[] {
|
|
443
|
+
this.reset();
|
|
444
|
+
const first = this.feed(text);
|
|
445
|
+
const last = this.end();
|
|
446
|
+
return last.length === 0 ? first : first.concat(last);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
/** Stateless one-shot classification. Does not touch the streaming buffer. */
|
|
450
|
+
tokenize(line: string, lineNum = 0): Token {
|
|
451
|
+
return classifyLine(line, lineNum);
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
isOp(line: string): boolean {
|
|
455
|
+
return tryParseOp(line) !== null;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
isHeader(line: string): boolean {
|
|
459
|
+
return tryParseHeader(line) !== null;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
isEnvelopeMarker(line: string): boolean {
|
|
463
|
+
return (
|
|
464
|
+
markerLineEquals(line, BEGIN_PATCH_MARKER) ||
|
|
465
|
+
markerLineEquals(line, END_PATCH_MARKER) ||
|
|
466
|
+
markerLineEquals(line, ABORT_MARKER)
|
|
467
|
+
);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
#drainCompleteLines(): Token[] {
|
|
471
|
+
const tokens: Token[] = [];
|
|
472
|
+
const buf = this.#buffer;
|
|
473
|
+
let start = 0;
|
|
474
|
+
for (let index = 0; index < buf.length; index++) {
|
|
475
|
+
if (buf.charCodeAt(index) !== CHAR_LINE_FEED) continue;
|
|
476
|
+
let stop = index;
|
|
477
|
+
if (stop > start && buf.charCodeAt(stop - 1) === CHAR_CARRIAGE_RETURN) stop--;
|
|
478
|
+
tokens.push(classifyLine(buf.slice(start, stop), this.#nextLineNum++));
|
|
479
|
+
start = index + 1;
|
|
480
|
+
}
|
|
481
|
+
this.#buffer = start < buf.length ? buf.slice(start) : "";
|
|
482
|
+
return tokens;
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
export type { ParsedRange } from "./types";
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure data types shared across the hashline parser, applier, and patcher.
|
|
3
|
+
* Nothing in this file references a filesystem, agent runtime, or schema
|
|
4
|
+
* library — keep it that way.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/** A line-number anchor (1-indexed). */
|
|
8
|
+
export interface Anchor {
|
|
9
|
+
line: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/** Where an `insert` edit should land relative to existing content. */
|
|
13
|
+
export type Cursor =
|
|
14
|
+
| { kind: "bof" }
|
|
15
|
+
| { kind: "eof" }
|
|
16
|
+
| { kind: "before_anchor"; anchor: Anchor }
|
|
17
|
+
| { kind: "after_anchor"; anchor: Anchor };
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* A single low-level edit produced by the parser and consumed by the applier.
|
|
21
|
+
* Multi-line replacements decompose to one `insert` per replacement line plus
|
|
22
|
+
* one `delete` per consumed line.
|
|
23
|
+
*/
|
|
24
|
+
export type Edit =
|
|
25
|
+
| { kind: "insert"; cursor: Cursor; text: string; lineNum: number; index: number }
|
|
26
|
+
| { kind: "delete"; anchor: Anchor; lineNum: number; index: number; oldAssertion?: string };
|
|
27
|
+
|
|
28
|
+
/** Result of applying a parsed set of edits to a text body. */
|
|
29
|
+
export interface ApplyResult {
|
|
30
|
+
/** Post-edit text body. */
|
|
31
|
+
text: string;
|
|
32
|
+
/** First line number (1-indexed) that changed, or `undefined` for a no-op apply. */
|
|
33
|
+
firstChangedLine?: number;
|
|
34
|
+
/** Diagnostic warnings collected by the applier (auto-absorb, boundary checks, …). */
|
|
35
|
+
warnings?: string[];
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/** Optional knobs forwarded to {@link Edit} application. */
|
|
39
|
+
export interface ApplyOptions {
|
|
40
|
+
/**
|
|
41
|
+
* When `true`, pure-insert and single-line replacement-boundary duplicates
|
|
42
|
+
* are dropped opportunistically. Default `false`: only multi-line block
|
|
43
|
+
* duplicates and structural-boundary single lines are absorbed.
|
|
44
|
+
*/
|
|
45
|
+
autoDropPureInsertDuplicates?: boolean;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** A parsed `[A..B]` line range. */
|
|
49
|
+
export interface ParsedRange {
|
|
50
|
+
start: Anchor;
|
|
51
|
+
end: Anchor;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Optional hints for {@link splitPatchInput}. */
|
|
55
|
+
export interface SplitOptions {
|
|
56
|
+
/** Resolves absolute paths inside hashline headers to cwd-relative form. */
|
|
57
|
+
cwd?: string;
|
|
58
|
+
/**
|
|
59
|
+
* Fallback path used when the input lacks a `¶PATH` header but contains
|
|
60
|
+
* recognizable hashline operations. Lets streaming previews work before
|
|
61
|
+
* the model has written the header.
|
|
62
|
+
*/
|
|
63
|
+
path?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/** Streaming-formatter knobs for {@link streamHashLines}. */
|
|
67
|
+
export interface StreamOptions {
|
|
68
|
+
/** First line number to use when formatting (1-indexed, default 1). */
|
|
69
|
+
startLine?: number;
|
|
70
|
+
/** Maximum formatted lines per yielded chunk (default 200). */
|
|
71
|
+
maxChunkLines?: number;
|
|
72
|
+
/** Maximum UTF-8 bytes per yielded chunk (default 64 KiB). */
|
|
73
|
+
maxChunkBytes?: number;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Result of {@link buildCompactDiffPreview}. */
|
|
77
|
+
export interface CompactDiffPreview {
|
|
78
|
+
preview: string;
|
|
79
|
+
addedLines: number;
|
|
80
|
+
removedLines: number;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Optional knobs for {@link buildCompactDiffPreview}. Reserved for future use. */
|
|
84
|
+
export interface CompactDiffOptions {
|
|
85
|
+
/** Maximum entries kept on each side of an unchanged-context truncation (default 2). */
|
|
86
|
+
maxUnchangedRun?: number;
|
|
87
|
+
}
|