@mammothb/pi-hashline 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ /**
2
+ * Line classifier for hashline diff text.
3
+ * Converts raw text lines into typed tokens consumed by the parser.
4
+ */
5
+
6
+ import {
7
+ HL_BLOCK_KEYWORD,
8
+ HL_DELETE_KEYWORD,
9
+ HL_FILE_HASH_LENGTH,
10
+ HL_FILE_HASH_SEP,
11
+ HL_FILE_PREFIX,
12
+ HL_HEADER_COLON,
13
+ HL_INSERT_AFTER,
14
+ HL_INSERT_BEFORE,
15
+ HL_INSERT_HEAD,
16
+ HL_INSERT_KEYWORD,
17
+ HL_INSERT_TAIL,
18
+ HL_PAYLOAD_REPLACE,
19
+ HL_REPLACE_KEYWORD,
20
+ } from "./format";
21
+ import type { Anchor, Cursor, ParsedRange } from "./types";
22
+
23
+ // ─── BlockTarget (the target of a hunk header) ───────────────────────
24
+
25
+ export type BlockTarget =
26
+ | { kind: "replace"; range: ParsedRange }
27
+ | { kind: "block"; anchor: Anchor }
28
+ | { kind: "delete"; range: ParsedRange }
29
+ | { kind: "delete_block"; anchor: Anchor }
30
+ | { kind: "insert_before"; anchor: Anchor }
31
+ | { kind: "insert_after"; anchor: Anchor }
32
+ | { kind: "bof" }
33
+ | { kind: "eof" };
34
+
35
+ // ─── Token types ─────────────────────────────────────────────────────
36
+
37
+ interface TokenBase {
38
+ lineNum: number;
39
+ }
40
+
41
+ export type Token =
42
+ | (TokenBase & { kind: "blank" })
43
+ | (TokenBase & { kind: "envelope-begin" })
44
+ | (TokenBase & { kind: "envelope-end" })
45
+ | (TokenBase & { kind: "envelope-separator" })
46
+ | (TokenBase & { kind: "abort" })
47
+ | (TokenBase & { kind: "header"; path: string; fileHash?: string })
48
+ | (TokenBase & { kind: "op-block"; target: BlockTarget })
49
+ | (TokenBase & { kind: "payload-literal"; text: string })
50
+ | (TokenBase & { kind: "raw"; text: string });
51
+
52
+ // ─── Envelope / abort markers ────────────────────────────────────────
53
+
54
+ const ENVELOPE_BEGIN = "<<<";
55
+ const ENVELOPE_END = ">>>";
56
+ const ENVELOPE_SEPARATOR = "---";
57
+ const ABORT_MARKER = "...";
58
+
59
+ // ─── Line-number scanner ─────────────────────────────────────────────
60
+
61
+ const LINE_RE = /^[1-9]\d*$/;
62
+
63
+ function parseLineNumber(raw: string, lineNum: number): number {
64
+ const trimmed = raw.trim();
65
+ if (!LINE_RE.test(trimmed)) {
66
+ throw new Error(
67
+ `line ${lineNum}: expected a positive line number; got ${JSON.stringify(raw)}.`,
68
+ );
69
+ }
70
+ return Number.parseInt(trimmed, 10);
71
+ }
72
+
73
+ // ─── Range scanner ───────────────────────────────────────────────────
74
+
75
+ function parseRange(raw: string): ParsedRange | null {
76
+ // Accept "N..M", "N...M", "N-M", "N–M" (en-dash), "N—M" (em-dash)
77
+ const match = raw.match(
78
+ /^([1-9]\d*)\s*(?:\.\.|\.\.\.|--?|[–—])\s*([1-9]\d*)$/,
79
+ );
80
+ if (!match || match[1] === undefined || match[2] === undefined) {
81
+ return null;
82
+ }
83
+ const start = Number.parseInt(match[1], 10);
84
+ const end = Number.parseInt(match[2], 10);
85
+ return { start: { line: start }, end: { line: end } };
86
+ }
87
+
88
+ // ─── Keyword scanning ────────────────────────────────────────────────
89
+
90
+ /** Scan a required keyword followed by whitespace or colon/end. */
91
+ function scanKeyword(
92
+ line: string,
93
+ pos: number,
94
+ keyword: string,
95
+ ): number | null {
96
+ if (!line.startsWith(keyword, pos)) {
97
+ return null;
98
+ }
99
+ const next = pos + keyword.length;
100
+ if (next < line.length) {
101
+ const ch = line.charCodeAt(next);
102
+ // Must be followed by whitespace, colon, or end of meaningful text
103
+ if (
104
+ ch !== 0x20 && // space
105
+ ch !== 0x09 && // tab
106
+ ch !== HL_HEADER_COLON.charCodeAt(0)
107
+ ) {
108
+ return null;
109
+ }
110
+ }
111
+ return next;
112
+ }
113
+
114
+ // ─── Hunk header parser ──────────────────────────────────────────────
115
+
116
+ function parseHunkHeader(line: string, lineNum: number): BlockTarget | null {
117
+ const trimmed = line.trimStart();
118
+ let pos = 0;
119
+
120
+ // --- replace --------------------------------------------------------
121
+ const repPos = scanKeyword(trimmed, pos, HL_REPLACE_KEYWORD);
122
+ if (repPos !== null) {
123
+ pos = repPos;
124
+ // Check for `replace block N:`
125
+ const remainder = trimmed.slice(pos).trimStart();
126
+ if (remainder.startsWith(HL_BLOCK_KEYWORD)) {
127
+ const afterBlock = remainder
128
+ .slice(HL_BLOCK_KEYWORD.length)
129
+ .trimStart()
130
+ .replace(/:$/, "");
131
+ const num = parseLineNumber(afterBlock, lineNum);
132
+ return { kind: "block", anchor: { line: num } };
133
+ }
134
+ // Parse `replace N..M:`
135
+ const rangeText = remainder.replace(/:$/, "").trim();
136
+ if (rangeText.length === 0) {
137
+ return null;
138
+ }
139
+ const range = parseRange(rangeText);
140
+ if (range === null) {
141
+ return null;
142
+ }
143
+ // Must end with colon
144
+ if (!trimmed.endsWith(HL_HEADER_COLON)) {
145
+ return null;
146
+ }
147
+ return { kind: "replace", range };
148
+ }
149
+
150
+ // --- delete ---------------------------------------------------------
151
+ const delPos = scanKeyword(trimmed, pos, HL_DELETE_KEYWORD);
152
+ if (delPos !== null) {
153
+ const remainder = trimmed.slice(delPos).trimStart();
154
+ // Check for `delete block N`
155
+ if (remainder.startsWith(HL_BLOCK_KEYWORD)) {
156
+ const afterBlock = remainder.slice(HL_BLOCK_KEYWORD.length).trimStart();
157
+ const num = parseLineNumber(afterBlock, lineNum);
158
+ // No colon allowed
159
+ const afterNum = afterBlock
160
+ .slice(afterBlock.indexOf(String(num)) + String(num).length)
161
+ .trim();
162
+ if (afterNum.length > 0) {
163
+ return null;
164
+ }
165
+ return { kind: "delete_block", anchor: { line: num } };
166
+ }
167
+ // Parse `delete N` or `delete N..M` (no colon)
168
+ if (remainder.endsWith(HL_HEADER_COLON)) {
169
+ return null; // delete doesn't take colon
170
+ }
171
+ const rangeText = remainder.trim();
172
+ if (rangeText.length === 0) {
173
+ return null;
174
+ }
175
+ // Could be range "N..M" or single line "N"
176
+ const range = parseRange(rangeText);
177
+ if (range !== null) {
178
+ return { kind: "delete", range };
179
+ }
180
+ const num = parseLineNumber(rangeText, lineNum);
181
+ return {
182
+ kind: "delete",
183
+ range: { start: { line: num }, end: { line: num } },
184
+ };
185
+ }
186
+
187
+ // --- insert ---------------------------------------------------------
188
+ const insPos = scanKeyword(trimmed, pos, HL_INSERT_KEYWORD);
189
+ if (insPos !== null) {
190
+ const remainder = trimmed.slice(insPos).trimStart();
191
+
192
+ // insert before N:
193
+ if (remainder.startsWith(HL_INSERT_BEFORE)) {
194
+ const afterKw = remainder
195
+ .slice(HL_INSERT_BEFORE.length)
196
+ .trimStart()
197
+ .replace(/:$/, "");
198
+ const num = parseLineNumber(afterKw, lineNum);
199
+ if (!trimmed.endsWith(HL_HEADER_COLON)) {
200
+ return null;
201
+ }
202
+ return { kind: "insert_before", anchor: { line: num } };
203
+ }
204
+ // insert after N:
205
+ if (remainder.startsWith(HL_INSERT_AFTER)) {
206
+ const afterKw = remainder
207
+ .slice(HL_INSERT_AFTER.length)
208
+ .trimStart()
209
+ .replace(/:$/, "");
210
+ const num = parseLineNumber(afterKw, lineNum);
211
+ if (!trimmed.endsWith(HL_HEADER_COLON)) {
212
+ return null;
213
+ }
214
+ return { kind: "insert_after", anchor: { line: num } };
215
+ }
216
+ // insert head:
217
+ if (remainder.startsWith(HL_INSERT_HEAD)) {
218
+ const afterKw = remainder.slice(HL_INSERT_HEAD.length).trimStart();
219
+ if (afterKw !== HL_HEADER_COLON) {
220
+ return null;
221
+ }
222
+ return { kind: "bof" };
223
+ }
224
+ // insert tail:
225
+ if (remainder.startsWith(HL_INSERT_TAIL)) {
226
+ const afterKw = remainder.slice(HL_INSERT_TAIL.length).trimStart();
227
+ if (afterKw !== HL_HEADER_COLON) {
228
+ return null;
229
+ }
230
+ return { kind: "eof" };
231
+ }
232
+ return null;
233
+ }
234
+
235
+ return null;
236
+ }
237
+
238
+ // ─── Header parser ───────────────────────────────────────────────────
239
+
240
+ function parseHashlineHeader(
241
+ line: string,
242
+ ): { path: string; fileHash?: string } | null {
243
+ const trimmed = line.trimEnd();
244
+ if (!trimmed.startsWith(HL_FILE_PREFIX)) {
245
+ return null;
246
+ }
247
+ const body = trimmed.slice(HL_FILE_PREFIX.length);
248
+ const hashIdx = body.lastIndexOf(HL_FILE_HASH_SEP);
249
+
250
+ let path: string;
251
+ let fileHash: string | undefined;
252
+
253
+ if (hashIdx >= 0) {
254
+ path = body.slice(0, hashIdx);
255
+ const hashCandidate = body.slice(hashIdx + 1);
256
+ // Validate hash: exactly HL_FILE_HASH_LENGTH hex chars
257
+ if (
258
+ hashCandidate.length === HL_FILE_HASH_LENGTH &&
259
+ /^[0-9A-Fa-f]{4}$/.test(hashCandidate)
260
+ ) {
261
+ fileHash = hashCandidate.toUpperCase();
262
+ } else {
263
+ // # not followed by valid hash — treat as part of path
264
+ path = body;
265
+ }
266
+ } else {
267
+ path = body;
268
+ }
269
+
270
+ if (path.length === 0) {
271
+ return null;
272
+ }
273
+ return { path, fileHash };
274
+ }
275
+
276
+ // ─── Line classifier ─────────────────────────────────────────────────
277
+
278
+ /** Classify a single line into a token. */
279
+ function classifyLine(line: string, lineNum: number): Token {
280
+ // Blank line
281
+ if (line.trim().length === 0) {
282
+ return { kind: "blank", lineNum };
283
+ }
284
+
285
+ // Envelope / abort markers (exact match on trimmed line)
286
+ const trimmed = line.trimEnd();
287
+ if (trimmed === ENVELOPE_BEGIN) {
288
+ return { kind: "envelope-begin", lineNum };
289
+ }
290
+ if (trimmed === ENVELOPE_END) {
291
+ return { kind: "envelope-end", lineNum };
292
+ }
293
+ if (trimmed === ENVELOPE_SEPARATOR) {
294
+ return { kind: "envelope-separator", lineNum };
295
+ }
296
+ if (trimmed === ABORT_MARKER) {
297
+ return { kind: "abort", lineNum };
298
+ }
299
+ // File section header
300
+ if (trimmed.startsWith(HL_FILE_PREFIX)) {
301
+ const header = parseHashlineHeader(line);
302
+ if (header !== null) {
303
+ return header.fileHash !== undefined
304
+ ? {
305
+ kind: "header",
306
+ lineNum,
307
+ path: header.path,
308
+ fileHash: header.fileHash,
309
+ }
310
+ : { kind: "header", lineNum, path: header.path };
311
+ }
312
+ }
313
+
314
+ // Hunk header
315
+ const hunk = parseHunkHeader(line, lineNum);
316
+ if (hunk !== null) {
317
+ return { kind: "op-block", lineNum, target: hunk };
318
+ }
319
+
320
+ // Payload literal
321
+ if (line.startsWith(HL_PAYLOAD_REPLACE)) {
322
+ return { kind: "payload-literal", lineNum, text: line.slice(1) };
323
+ }
324
+
325
+ // Bare text (auto-piped body row or noise)
326
+ return { kind: "raw", lineNum, text: line };
327
+ }
328
+
329
+ // ─── Tokenizer class ─────────────────────────────────────────────────
330
+
331
+ /**
332
+ * Stateful line-by-line tokenizer for hashline diff text.
333
+ *
334
+ * Usage:
335
+ * const tokenizer = new Tokenizer();
336
+ * const tokens = tokenizer.tokenizeAll(input);
337
+ */
338
+ export class Tokenizer {
339
+ /** Tokenize full text into an array of tokens. */
340
+ tokenizeAll(text: string): Token[] {
341
+ if (text.length === 0) {
342
+ return [{ kind: "blank", lineNum: 1 }];
343
+ }
344
+ const lines = text.split("\n");
345
+ const tokens: Token[] = [];
346
+ for (let i = 0; i < lines.length; i++) {
347
+ // Strip trailing \r for CRLF support
348
+ let line = lines[i] ?? "";
349
+ if (line.endsWith("\r")) line = line.slice(0, -1);
350
+ tokens.push(classifyLine(line, i + 1));
351
+ }
352
+ return tokens;
353
+ }
354
+
355
+ /** Classify a single line. */
356
+ tokenize(line: string, lineNum = 0): Token {
357
+ return classifyLine(line, lineNum);
358
+ }
359
+
360
+ /** Check if a line is an op header. */
361
+ isOp(line: string): boolean {
362
+ return parseHunkHeader(line, 0) !== null;
363
+ }
364
+
365
+ /** Check if a line is a file section header. */
366
+ isHeader(line: string): boolean {
367
+ return parseHashlineHeader(line) !== null;
368
+ }
369
+
370
+ /** Check if a line is an envelope or abort marker. */
371
+ isEnvelopeMarker(line: string): boolean {
372
+ const trimmed = line.trimEnd();
373
+ return (
374
+ trimmed === ENVELOPE_BEGIN ||
375
+ trimmed === ENVELOPE_END ||
376
+ trimmed === ENVELOPE_SEPARATOR ||
377
+ trimmed === ABORT_MARKER
378
+ );
379
+ }
380
+ }
381
+
382
+ /** Clone a cursor so edits own their anchor references. */
383
+ export function cloneCursor(cursor: Cursor): Cursor {
384
+ if (cursor.kind === "before_anchor" || cursor.kind === "after_anchor") {
385
+ return {
386
+ kind: cursor.kind,
387
+ anchor: { line: cursor.anchor.line },
388
+ };
389
+ }
390
+ return cursor;
391
+ }
392
+
393
+ // Re-export for convenience
394
+ export type { Anchor, Cursor, ParsedRange } from "./types";
package/src/types.ts ADDED
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Pure data types shared across the hashline parser, applier, and patcher.
3
+ * Nothing in this file references a filesystem, agent runtime, or schema
4
+ * library — keep it that way.
5
+ */
6
+
7
+ /** A line-number anchor (1-indexed). */
8
+ export interface Anchor {
9
+ line: number;
10
+ }
11
+
12
+ /** Where an `insert` edit should land relative to existing content. */
13
+ export type Cursor =
14
+ | { kind: "bof" }
15
+ | { kind: "eof" }
16
+ | { kind: "before_anchor"; anchor: Anchor }
17
+ | { kind: "after_anchor"; anchor: Anchor };
18
+
19
+ /**
20
+ * A single low-level edit produced by the parser and consumed by the applier.
21
+ * Multi-line replacements decompose to one `insert` per replacement line plus
22
+ * one `delete` per consumed line. Replacement payloads are tagged so the
23
+ * applier can distinguish literal insertion from new content for a deleted
24
+ * line.
25
+ */
26
+ export type Edit =
27
+ | {
28
+ kind: "insert";
29
+ cursor: Cursor;
30
+ text: string;
31
+ lineNum: number;
32
+ index: number;
33
+ mode?: "replacement";
34
+ }
35
+ | {
36
+ kind: "delete";
37
+ anchor: Anchor;
38
+ lineNum: number;
39
+ index: number;
40
+ oldAssertion?: string;
41
+ }
42
+ | {
43
+ /**
44
+ * Deferred block edit (`replace block N:` / `delete block N`). The exact
45
+ * line span is unknown at parse time — it is computed by
46
+ * {@link resolveBlockEdits} once file text + path (→ language) are
47
+ * available, then expanded into concrete edits.
48
+ */
49
+ kind: "block";
50
+ anchor: Anchor;
51
+ payloads: string[];
52
+ lineNum: number;
53
+ index: number;
54
+ };
55
+
56
+ /** Result of applying a parsed set of edits to a text body. */
57
+ export interface ApplyResult {
58
+ /** Post-edit text body. */
59
+ text: string;
60
+ /** First line number (1-indexed) that changed, or `undefined` for a no-op apply. */
61
+ firstChangedLine?: number;
62
+ /** Diagnostic warnings collected by the parser, patcher, or recovery. */
63
+ warnings?: string[];
64
+ }
65
+
66
+ /** A parsed `[A..B]` line range. */
67
+ export interface ParsedRange {
68
+ start: Anchor;
69
+ end: Anchor;
70
+ }
71
+
72
+ /** Optional hints for {@link splitPatchInput}. */
73
+ export interface SplitOptions {
74
+ /** Resolves absolute paths inside hashline headers to cwd-relative form. */
75
+ cwd?: string;
76
+ /**
77
+ * Fallback path used when the input lacks a `¶PATH` header but contains
78
+ * recognizable hashline operations.
79
+ */
80
+ path?: string;
81
+ }
82
+
83
+ /**
84
+ * Resolved 1-indexed inclusive line span of a `replace block N:` target.
85
+ */
86
+ export interface BlockSpan {
87
+ /** First line of the block (1-indexed, inclusive). */
88
+ start: number;
89
+ /** Last line of the block (1-indexed, inclusive). */
90
+ end: number;
91
+ }
92
+
93
+ /** Request handed to a {@link BlockResolver} to resolve one `replace block N:` anchor. */
94
+ export interface BlockResolverRequest {
95
+ /** Target file path (used to infer language by extension). */
96
+ path: string;
97
+ /** Full text the block must be resolved against. */
98
+ text: string;
99
+ /** 1-indexed line the block must begin on. */
100
+ line: number;
101
+ }
102
+
103
+ /**
104
+ * Resolves a `replace block N:` anchor to the line span of the syntactic block
105
+ * that begins on line N. Returns `null` when no block can be resolved.
106
+ * Pure seam: the hashline core declares the contract; the host injects a
107
+ * tree-sitter-backed implementation.
108
+ */
109
+ export type BlockResolver = (request: BlockResolverRequest) => BlockSpan | null;
package/src/write.ts ADDED
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Hashline write tool override.
3
+ *
4
+ * Overrides the built-in `write` tool to record content snapshots so the
5
+ * agent can immediately `edit` the newly created file with a hashline tag.
6
+ * The result includes a `¶PATH#TAG` header matching the read tool output
7
+ * format.
8
+ */
9
+
10
+ import { mkdir, writeFile } from "node:fs/promises";
11
+ import { dirname, isAbsolute, relative, resolve } from "node:path";
12
+ import type { ToolDefinition } from "@earendil-works/pi-coding-agent";
13
+ import { Type } from "typebox";
14
+
15
+ import {
16
+ computeFileHash,
17
+ formatHashlineHeader,
18
+ formatNumberedLines,
19
+ } from "./format";
20
+ import { normalizeToLF } from "./normalize";
21
+ import type { SnapshotStore } from "./snapshots";
22
+
23
+ // ─── Schema ──────────────────────────────────────────────────────────
24
+
25
+ const WriteSchema = Type.Object({
26
+ path: Type.String({
27
+ description: "Path to the file to write (relative or absolute)",
28
+ }),
29
+ content: Type.String({
30
+ description: "Content to write to the file",
31
+ }),
32
+ });
33
+
34
+ // ─── Details type ────────────────────────────────────────────────────
35
+
36
+ export interface WriteToolDetails {
37
+ /** Total lines written. */
38
+ totalLines: number;
39
+ /** Content hash of the written file. */
40
+ fileHash: string;
41
+ /** Hashline header for the new version. */
42
+ header: string;
43
+ }
44
+
45
+ // ─── Helpers ─────────────────────────────────────────────────────────
46
+
47
+ function resolveDisplayPath(rawPath: string, cwd: string): string {
48
+ const resolved = resolve(cwd, rawPath);
49
+ try {
50
+ const rel = relative(cwd, resolved);
51
+ if (!rel.startsWith("..") && !isAbsolute(rel)) {
52
+ return rel || ".";
53
+ }
54
+ } catch {
55
+ // Fall through.
56
+ }
57
+ return resolved;
58
+ }
59
+
60
+ // ─── Tool creator ────────────────────────────────────────────────────
61
+
62
+ export function createWriteTool(
63
+ snapshots: SnapshotStore,
64
+ ): ToolDefinition<typeof WriteSchema, WriteToolDetails> {
65
+ return {
66
+ name: "write",
67
+ label: "Write",
68
+ description:
69
+ "Create or overwrite a file. The result includes a ¶PATH#TAG header — " +
70
+ "copy this tag when editing the file so the edit tool can validate " +
71
+ "you're working against the current version.",
72
+ promptSnippet:
73
+ "Create or overwrite files — returns a ¶PATH#TAG header for immediate editing",
74
+ promptGuidelines: [
75
+ "Use write to create new files or completely overwrite existing ones. The result includes a ¶PATH#TAG header — use this tag to edit the file immediately without re-reading.",
76
+ ],
77
+ parameters: WriteSchema,
78
+
79
+ async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
80
+ const { path: rawPath, content } = params;
81
+ const absPath = resolve(ctx.cwd, rawPath);
82
+ const displayPath = resolveDisplayPath(rawPath, ctx.cwd);
83
+
84
+ // Create parent directories.
85
+ await mkdir(dirname(absPath), { recursive: true });
86
+
87
+ // Write content to disk.
88
+ await writeFile(absPath, content, "utf-8");
89
+
90
+ // Normalize to LF for consistent hashing.
91
+ const normalized = normalizeToLF(content);
92
+
93
+ // Compute hash and record snapshot.
94
+ const fileHash = computeFileHash(normalized);
95
+ snapshots.record(absPath, normalized);
96
+
97
+ const header = formatHashlineHeader(displayPath, fileHash);
98
+
99
+ // Count lines (strip trailing empty line from trailing newline).
100
+ const rawLines = normalized.split("\n");
101
+ if (rawLines.length > 0 && rawLines[rawLines.length - 1] === "") {
102
+ rawLines.pop();
103
+ }
104
+ const allLines = rawLines;
105
+ const totalLines = allLines.length;
106
+
107
+ // Format output matching read tool format.
108
+ const output = `${header}\n${formatNumberedLines(allLines.join("\n"), 1)}`;
109
+
110
+ return {
111
+ content: [{ type: "text", text: output }],
112
+ details: {
113
+ totalLines,
114
+ fileHash,
115
+ header,
116
+ } satisfies WriteToolDetails,
117
+ };
118
+ },
119
+ };
120
+ }