@prometheus-ai/hashline 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ /**
2
+ * Minimal text-shape normalization: line-ending detection / round-trip and
3
+ * BOM stripping. The patcher uses these to canonicalize text to LF before
4
+ * applying edits and to restore the original shape on write-back.
5
+ */
6
+
7
+ export type LineEnding = "\r\n" | "\n";
8
+
9
+ /** Detect the first line ending style in `content`. Defaults to LF when neither is present. */
10
+ export function detectLineEnding(content: string): LineEnding {
11
+ const crlfIdx = content.indexOf("\r\n");
12
+ const lfIdx = content.indexOf("\n");
13
+ if (lfIdx === -1) return "\n";
14
+ if (crlfIdx === -1) return "\n";
15
+ return crlfIdx < lfIdx ? "\r\n" : "\n";
16
+ }
17
+
18
+ /** Normalize every line ending to LF. */
19
+ export function normalizeToLF(text: string): string {
20
+ return text.replace(/\r\n?/g, "\n");
21
+ }
22
+
23
+ /** Re-encode LF text with the requested line ending. */
24
+ export function restoreLineEndings(text: string, ending: LineEnding): string {
25
+ return ending === "\r\n" ? text.replace(/\n/g, "\r\n") : text;
26
+ }
27
+
28
+ export interface BomResult {
29
+ /** Either the empty string or the BOM sequence (currently UTF-8 BOM). */
30
+ bom: string;
31
+ /** Text with any leading BOM removed. */
32
+ text: string;
33
+ }
34
+
35
+ /** Strip a UTF-8 BOM if present and return both the BOM and the trailing text. */
36
+ export function stripBom(content: string): BomResult {
37
+ return content.startsWith("\uFEFF") ? { bom: "\uFEFF", text: content.slice(1) } : { bom: "", text: content };
38
+ }
package/src/parser.ts ADDED
@@ -0,0 +1,325 @@
1
+ /**
2
+ * Token-driven state machine that turns a stream of {@link Token}s into a
3
+ * flat list of {@link Edit}s. Sits between the {@link Tokenizer} and the
4
+ * applier.
5
+ */
6
+ import { HL_PAYLOAD_REPLACE } from "./format";
7
+ import {
8
+ BARE_BODY_AUTO_PIPED_WARNING,
9
+ DELETE_BLOCK_TAKES_NO_BODY,
10
+ DELETE_TAKES_NO_BODY,
11
+ EMPTY_BLOCK,
12
+ EMPTY_INSERT,
13
+ MINUS_ROW_REJECTED,
14
+ } from "./messages";
15
+ import { type BlockTarget, cloneCursor, type ParsedRange, type Token, Tokenizer } from "./tokenizer";
16
+ import type { Anchor, Cursor, Edit } from "./types";
17
+
18
+ function validateRangeOrder(range: ParsedRange, lineNum: number): void {
19
+ if (range.end.line < range.start.line) {
20
+ throw new Error(`line ${lineNum}: range ${range.start.line}..${range.end.line} ends before it starts.`);
21
+ }
22
+ }
23
+
24
+ function expandRange(range: ParsedRange): Anchor[] {
25
+ const anchors: Anchor[] = [];
26
+ for (let line = range.start.line; line <= range.end.line; line++) anchors.push({ line });
27
+ return anchors;
28
+ }
29
+
30
+ function isSkippableCommentLine(line: string): boolean {
31
+ return line.trimStart().startsWith("#");
32
+ }
33
+
34
+ function detectApplyPatchContamination(text: string, _hasPending: boolean): string | null {
35
+ const trimmed = text.trimStart();
36
+ if (trimmed.length === 0) return null;
37
+ if (
38
+ trimmed.startsWith("*** Update File:") ||
39
+ trimmed.startsWith("*** Add File:") ||
40
+ trimmed.startsWith("*** Delete File:") ||
41
+ trimmed.startsWith("*** Move to:")
42
+ ) {
43
+ const preview = trimmed.length > 48 ? `${trimmed.slice(0, 48)}…` : trimmed;
44
+ return (
45
+ `apply_patch sentinel ${JSON.stringify(preview)} is not valid in hashline. ` +
46
+ "File sections start with `[path#HASH]` (no `Update File:` / `Add File:` keyword). " +
47
+ "Use `replace N..M:`, `delete N..M`, or `insert before|after|head|tail:` ops."
48
+ );
49
+ }
50
+ if (/^@@\s+[-+]?\d+,\d+\s+[-+]?\d+,\d+\s+@@/.test(trimmed)) {
51
+ return (
52
+ "unified-diff hunk header (`@@ -N,M +N,M @@`) is not valid in hashline. " +
53
+ "Use `replace N..M:`, `delete N..M`, or `insert before|after|head|tail:` ops."
54
+ );
55
+ }
56
+ if (trimmed.startsWith("@@")) {
57
+ const preview = trimmed.length > 48 ? `${trimmed.slice(0, 48)}…` : trimmed;
58
+ return (
59
+ `\`@@\`-bracketed hunk header ${JSON.stringify(preview)} is not valid in hashline. ` +
60
+ "Drop the `@@ ... @@` brackets and write a verb header such as `replace N..M:`."
61
+ );
62
+ }
63
+ if (/^delete\s+[1-9]\d*(?:\s*(?:\.\.|-|…|\s)\s*[1-9]\d*)?\s*:/.test(trimmed)) {
64
+ return "`delete N..M` has no colon and no body. Remove the colon and body rows.";
65
+ }
66
+ if (/^[1-9]\d*\s*$/.test(trimmed)) {
67
+ return `hunk headers need a verb. Use \`replace ${trimmed}..${trimmed}:\` to replace, or \`delete ${trimmed}\` to delete.`;
68
+ }
69
+ const bareRange = /^([1-9]\d*)\s*[-. …]+\s*([1-9]\d*)\s*:?$/.exec(trimmed);
70
+ if (bareRange !== null) {
71
+ return (
72
+ `bare range hunk header ${JSON.stringify(trimmed)} is not valid. ` +
73
+ `Hunk headers need a verb: write \`replace ${bareRange[1]}..${bareRange[2]}:\` or \`delete ${bareRange[1]}..${bareRange[2]}\`.`
74
+ );
75
+ }
76
+ return null;
77
+ }
78
+
79
+ interface PendingComment {
80
+ lineNum: number;
81
+ text: string;
82
+ }
83
+
84
+ type PayloadRow = { kind: "literal"; text: string; lineNum: number };
85
+
86
+ interface Pending {
87
+ target: BlockTarget;
88
+ lineNum: number;
89
+ payloads: PayloadRow[];
90
+ }
91
+
92
+ export class Executor {
93
+ #edits: Edit[] = [];
94
+ #warnings: string[] = [];
95
+ #editIndex = 0;
96
+ #pending: Pending | undefined;
97
+ #terminated = false;
98
+ #skippableComments: PendingComment[] = [];
99
+
100
+ #discardPendingSkippableComments(): void {
101
+ this.#skippableComments = [];
102
+ }
103
+
104
+ #consumePendingSkippableComments(): void {
105
+ if (this.#skippableComments.length === 0) return;
106
+ for (const comment of this.#skippableComments) this.#handleRaw(comment.text, comment.lineNum);
107
+ this.#skippableComments = [];
108
+ }
109
+
110
+ feed(token: Token): void {
111
+ if (this.#terminated) return;
112
+ switch (token.kind) {
113
+ case "envelope-begin":
114
+ this.#consumePendingSkippableComments();
115
+ return;
116
+ case "envelope-end":
117
+ this.#consumePendingSkippableComments();
118
+ this.#terminated = true;
119
+ return;
120
+ case "abort":
121
+ this.#terminated = true;
122
+ return;
123
+ case "header":
124
+ this.#consumePendingSkippableComments();
125
+ this.#flushPending();
126
+ return;
127
+ case "blank":
128
+ this.#consumePendingSkippableComments();
129
+ return;
130
+ case "payload-literal":
131
+ this.#consumePendingSkippableComments();
132
+ this.#handleLiteralPayload(token.text, token.lineNum);
133
+ return;
134
+ case "raw":
135
+ if (this.#pending === undefined && isSkippableCommentLine(token.text)) {
136
+ this.#skippableComments.push({ text: token.text, lineNum: token.lineNum });
137
+ return;
138
+ }
139
+ this.#consumePendingSkippableComments();
140
+ this.#handleRaw(token.text, token.lineNum);
141
+ return;
142
+ case "op-block":
143
+ this.#discardPendingSkippableComments();
144
+ if (token.target.kind === "replace" || token.target.kind === "delete") {
145
+ validateRangeOrder(token.target.range, token.lineNum);
146
+ }
147
+ this.#flushPending();
148
+ this.#pending = { target: token.target, lineNum: token.lineNum, payloads: [] };
149
+ return;
150
+ }
151
+ }
152
+
153
+ end(): { edits: Edit[]; warnings: string[] } {
154
+ this.#consumePendingSkippableComments();
155
+ this.#flushPending();
156
+ this.#validateNoOverlappingDeletes();
157
+ return { edits: this.#edits, warnings: this.#warnings };
158
+ }
159
+
160
+ endStreaming(): { edits: Edit[]; warnings: string[] } {
161
+ this.#consumePendingSkippableComments();
162
+ if (this.#pending && this.#pending.payloads.length > 0) this.#flushPending();
163
+ else if (this.#pending?.target.kind === "delete" || this.#pending?.target.kind === "delete_block")
164
+ this.#flushPending();
165
+ else this.#pending = undefined;
166
+ this.#validateNoOverlappingDeletes();
167
+ return { edits: this.#edits, warnings: this.#warnings };
168
+ }
169
+
170
+ reset(): void {
171
+ this.#edits = [];
172
+ this.#warnings = [];
173
+ this.#editIndex = 0;
174
+ this.#pending = undefined;
175
+ this.#skippableComments = [];
176
+ this.#terminated = false;
177
+ }
178
+
179
+ #validateNoOverlappingDeletes(): void {
180
+ const sourceLinesByAnchor = new Map<number, number[]>();
181
+ for (const edit of this.#edits) {
182
+ if (edit.kind !== "delete") continue;
183
+ let sourceLines = sourceLinesByAnchor.get(edit.anchor.line);
184
+ if (sourceLines === undefined) {
185
+ sourceLines = [];
186
+ sourceLinesByAnchor.set(edit.anchor.line, sourceLines);
187
+ }
188
+ if (!sourceLines.includes(edit.lineNum)) sourceLines.push(edit.lineNum);
189
+ }
190
+ for (const [anchorLine, sourceLines] of sourceLinesByAnchor) {
191
+ if (sourceLines.length < 2) continue;
192
+ const [firstBlock, secondBlock] = [...sourceLines].sort((a, b) => a - b);
193
+ throw new Error(
194
+ `line ${secondBlock}: anchor line ${anchorLine} is already targeted by another hunk on line ${firstBlock}. ` +
195
+ "Issue ONE hunk per range; payload is only the final desired content, never a before/after pair.",
196
+ );
197
+ }
198
+ }
199
+
200
+ #handleLiteralPayload(text: string, lineNum: number): void {
201
+ const pending = this.#pending;
202
+ if (!pending) {
203
+ throw new Error(
204
+ `line ${lineNum}: payload line has no preceding hunk header. ` +
205
+ `Got ${JSON.stringify(`${HL_PAYLOAD_REPLACE}${text}`)}.`,
206
+ );
207
+ }
208
+ if (pending.target.kind === "delete") throw new Error(`line ${lineNum}: ${DELETE_TAKES_NO_BODY}`);
209
+ if (pending.target.kind === "delete_block") throw new Error(`line ${lineNum}: ${DELETE_BLOCK_TAKES_NO_BODY}`);
210
+ pending.payloads.push({ kind: "literal", text, lineNum });
211
+ }
212
+
213
+ #handleRaw(text: string, lineNum: number): void {
214
+ const contamination = detectApplyPatchContamination(text, this.#pending !== undefined);
215
+ if (contamination !== null) throw new Error(`line ${lineNum}: ${contamination}`);
216
+ if (this.#pending) {
217
+ if (text.trim().length === 0) return;
218
+ if (this.#pending.target.kind === "delete") throw new Error(`line ${lineNum}: ${DELETE_TAKES_NO_BODY}`);
219
+ if (this.#pending.target.kind === "delete_block")
220
+ throw new Error(`line ${lineNum}: ${DELETE_BLOCK_TAKES_NO_BODY}`);
221
+ if (text.trimStart().charCodeAt(0) === 45 /* - */) throw new Error(`line ${lineNum}: ${MINUS_ROW_REJECTED}`);
222
+ if (!this.#warnings.includes(BARE_BODY_AUTO_PIPED_WARNING)) this.#warnings.push(BARE_BODY_AUTO_PIPED_WARNING);
223
+ this.#pending.payloads.push({ kind: "literal", text, lineNum });
224
+ return;
225
+ }
226
+ if (text.trim().length === 0) return;
227
+ throw new Error(
228
+ `line ${lineNum}: payload line has no preceding hunk header. ` +
229
+ `Use \`replace N..M:\`, \`delete N..M\`, or \`insert before|after|head|tail:\` above the body. Got ${JSON.stringify(text)}.`,
230
+ );
231
+ }
232
+
233
+ #pushInsert(cursor: Cursor, text: string, lineNum: number, mode?: "replacement"): void {
234
+ this.#edits.push({
235
+ kind: "insert",
236
+ cursor: cloneCursor(cursor),
237
+ text,
238
+ lineNum,
239
+ index: this.#editIndex++,
240
+ ...(mode === undefined ? {} : { mode }),
241
+ });
242
+ }
243
+
244
+ #pushDelete(anchor: Anchor, lineNum: number): void {
245
+ this.#edits.push({ kind: "delete", anchor: { ...anchor }, lineNum, index: this.#editIndex++ });
246
+ }
247
+
248
+ #pushBlock(anchor: Anchor, payloads: readonly PayloadRow[], lineNum: number): void {
249
+ this.#edits.push({
250
+ kind: "block",
251
+ anchor: { ...anchor },
252
+ payloads: payloads.map(payload => payload.text),
253
+ lineNum,
254
+ index: this.#editIndex++,
255
+ });
256
+ }
257
+
258
+ #emitPayloadRows(cursor: Cursor, payloads: readonly PayloadRow[], lineNum: number, mode?: "replacement"): void {
259
+ for (const payload of payloads) this.#pushInsert(cursor, payload.text, lineNum, mode);
260
+ }
261
+
262
+ #flushPending(): void {
263
+ const pending = this.#pending;
264
+ if (!pending) return;
265
+ const { target, lineNum, payloads } = pending;
266
+ this.#pending = undefined;
267
+ if (target.kind === "delete") {
268
+ for (const anchor of expandRange(target.range)) this.#pushDelete(anchor, lineNum);
269
+ return;
270
+ }
271
+ if (target.kind === "delete_block") {
272
+ // A block edit with no payloads resolves to a pure block deletion.
273
+ this.#pushBlock(target.anchor, [], lineNum);
274
+ return;
275
+ }
276
+ if (target.kind === "block") {
277
+ if (payloads.length === 0) throw new Error(`line ${lineNum}: ${EMPTY_BLOCK}`);
278
+ this.#pushBlock(target.anchor, payloads, lineNum);
279
+ return;
280
+ }
281
+ if (payloads.length === 0) {
282
+ if (target.kind === "replace") {
283
+ for (const anchor of expandRange(target.range)) this.#pushDelete(anchor, lineNum);
284
+ return;
285
+ }
286
+ throw new Error(`line ${lineNum}: ${EMPTY_INSERT}`);
287
+ }
288
+ if (target.kind === "replace") {
289
+ const cursor: Cursor = { kind: "before_anchor", anchor: { ...target.range.start } };
290
+ this.#emitPayloadRows(cursor, payloads, lineNum, "replacement");
291
+ for (const anchor of expandRange(target.range)) this.#pushDelete(anchor, lineNum);
292
+ return;
293
+ }
294
+ if (target.kind === "insert_before") {
295
+ this.#emitPayloadRows({ kind: "before_anchor", anchor: { ...target.anchor } }, payloads, lineNum);
296
+ return;
297
+ }
298
+ if (target.kind === "insert_after") {
299
+ this.#emitPayloadRows({ kind: "after_anchor", anchor: { ...target.anchor } }, payloads, lineNum);
300
+ return;
301
+ }
302
+ const cursor: Cursor = target.kind === "bof" ? { kind: "bof" } : { kind: "eof" };
303
+ this.#emitPayloadRows(cursor, payloads, lineNum);
304
+ }
305
+ }
306
+
307
+ function drain(executor: Executor, tokenizer: Tokenizer): { edits: Edit[]; warnings: string[] } {
308
+ for (const token of tokenizer.end()) executor.feed(token);
309
+ return executor.end();
310
+ }
311
+
312
+ export function parsePatch(diff: string): { edits: Edit[]; warnings: string[] } {
313
+ const tokenizer = new Tokenizer();
314
+ const executor = new Executor();
315
+ for (const token of tokenizer.feed(diff)) executor.feed(token);
316
+ return drain(executor, tokenizer);
317
+ }
318
+
319
+ export function parsePatchStreaming(diff: string): { edits: Edit[]; warnings: string[] } {
320
+ const tokenizer = new Tokenizer();
321
+ const executor = new Executor();
322
+ for (const token of tokenizer.feed(diff)) executor.feed(token);
323
+ for (const token of tokenizer.end()) executor.feed(token);
324
+ return executor.endStreaming();
325
+ }