@mammothb/pi-hashline 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/read.ts ADDED
@@ -0,0 +1,239 @@
1
+ /**
2
+ * Hashline read tool override.
3
+ *
4
+ * Overrides the built-in `read` tool to emit `¶PATH#TAG` headers and
5
+ * record content snapshots. Text files get tagged; images delegate to the
6
+ * native read implementation.
7
+ */
8
+
9
+ import { constants } from "node:fs";
10
+ import { access, readFile } from "node:fs/promises";
11
+ import { extname, isAbsolute, relative, resolve } from "node:path";
12
+ import {
13
+ createReadToolDefinition,
14
+ type ToolDefinition,
15
+ } from "@earendil-works/pi-coding-agent";
16
+ import { Type } from "typebox";
17
+
18
+ import {
19
+ computeFileHash,
20
+ formatHashlineHeader,
21
+ formatNumberedLines,
22
+ } from "./format";
23
+ import type { SnapshotStore } from "./snapshots";
24
+
25
+ const DEFAULT_MAX_BYTES = 50 * 1024;
26
+
27
+ const ReadSchema = Type.Object({
28
+ path: Type.String({
29
+ description: "Path to the file to read (relative or absolute)",
30
+ }),
31
+ offset: Type.Optional(
32
+ Type.Number({
33
+ description: "Line number to start reading from (1-indexed)",
34
+ }),
35
+ ),
36
+ limit: Type.Optional(
37
+ Type.Number({
38
+ description: "Maximum number of lines to read",
39
+ }),
40
+ ),
41
+ });
42
+
43
+ /** File extensions handled by the native read tool (images, etc.). */
44
+ const IMAGE_EXTENSIONS = new Set([
45
+ ".png",
46
+ ".jpg",
47
+ ".jpeg",
48
+ ".gif",
49
+ ".webp",
50
+ ".bmp",
51
+ ".ico",
52
+ ".tiff",
53
+ ".tif",
54
+ ".svg",
55
+ ]);
56
+
57
+ /**
58
+ * Resolve a user-provided path relative to cwd.
59
+ * Returns a display-friendly relative path when within cwd.
60
+ */
61
+ function resolvePath(rawPath: string, cwd: string): string {
62
+ const resolved = resolve(cwd, rawPath);
63
+ try {
64
+ const rel = relative(cwd, resolved);
65
+ if (!rel.startsWith("..") && !isAbsolute(rel)) {
66
+ return rel || ".";
67
+ }
68
+ } catch {
69
+ // Fall through — use absolute path.
70
+ }
71
+ return resolved;
72
+ }
73
+
74
+ export interface ReadToolDetails {
75
+ /** Total lines in the file (before offset/limit). */
76
+ totalLines: number;
77
+ /** Total bytes in the file. */
78
+ totalBytes: number;
79
+ /** Whether the displayed output was truncated. */
80
+ truncated: boolean;
81
+ /** Content hash of the full file. */
82
+ fileHash: string;
83
+ /** Hashline header for this file snapshot. */
84
+ header: string;
85
+ }
86
+
87
+ function errorResult(message: string): {
88
+ content: { type: "text"; text: string }[];
89
+ details: ReadToolDetails;
90
+ } {
91
+ return {
92
+ content: [{ type: "text", text: message }],
93
+ details: {
94
+ totalLines: 0,
95
+ totalBytes: 0,
96
+ truncated: false,
97
+ fileHash: "",
98
+ header: "",
99
+ },
100
+ };
101
+ }
102
+
103
+ export function createReadTool(
104
+ snapshots: SnapshotStore,
105
+ ): ToolDefinition<typeof ReadSchema, ReadToolDetails> {
106
+ // Lazily create the native read tool so we can delegate image/file-not-found
107
+ // handling to it. Created once per session on first delegate call.
108
+ let _nativeRead: ToolDefinition<typeof ReadSchema, unknown> | undefined;
109
+
110
+ function nativeRead(ctx: {
111
+ cwd: string;
112
+ }): ToolDefinition<typeof ReadSchema, unknown> {
113
+ if (!_nativeRead) {
114
+ _nativeRead = createReadToolDefinition(ctx.cwd) as ToolDefinition<
115
+ typeof ReadSchema,
116
+ unknown
117
+ >;
118
+ }
119
+ return _nativeRead;
120
+ }
121
+
122
+ return {
123
+ name: "read",
124
+ label: "Read",
125
+ description:
126
+ "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). " +
127
+ "Every text file view includes a ¶PATH#TAG header — copy this tag when editing " +
128
+ "the file so the edit tool can validate you're working against the current version.",
129
+ promptSnippet:
130
+ "Read file contents — every output includes a ¶PATH#TAG header required by the edit tool",
131
+ promptGuidelines: [
132
+ "Use read to inspect file content instead of cat or tail. Every text output starts with a ¶PATH#TAG header — copy the entire header (including the tag) into edit tool calls to validate you're editing the current file version.",
133
+ "Use offset/limit to read large files in sections. Tags are per-file, not per-section — any section of a file carries the same tag.",
134
+ ],
135
+ parameters: ReadSchema,
136
+
137
+ async execute(toolCallId, params, signal, onUpdate, ctx) {
138
+ const { path: rawPath } = params;
139
+ const absolutePath = resolve(ctx.cwd, rawPath);
140
+
141
+ // Delegate to native read for images and SVGs.
142
+ const ext = extname(absolutePath).toLowerCase();
143
+ if (IMAGE_EXTENSIONS.has(ext)) {
144
+ // Native read details type differs from hashline. Safe cast.
145
+ const result = nativeRead(ctx).execute(
146
+ toolCallId,
147
+ params,
148
+ signal,
149
+ onUpdate,
150
+ ctx,
151
+ );
152
+ return result;
153
+ }
154
+
155
+ // Check readability.
156
+ try {
157
+ await access(absolutePath, constants.R_OK);
158
+ } catch (err: unknown) {
159
+ const message = err instanceof Error ? err.message : "unknown error";
160
+ return errorResult(`Cannot read file: ${message}`);
161
+ }
162
+
163
+ // Text file — read fully, compute hash, record snapshot, format output.
164
+ try {
165
+ const rawContent = await readFile(absolutePath, "utf-8");
166
+ const totalBytes = Buffer.byteLength(rawContent, "utf-8");
167
+
168
+ // Normalize to LF for line counting and hash computation.
169
+ const normalized = rawContent.replace(/\r\n/g, "\n");
170
+
171
+ // Split into lines. A trailing newline produces an empty string at
172
+ // the end — strip it for display purposes (it's a line terminator,
173
+ // not a blank line).
174
+ const rawLines = normalized.split("\n");
175
+ if (rawLines.length > 0 && rawLines[rawLines.length - 1] === "") {
176
+ rawLines.pop();
177
+ }
178
+ const allLines = rawLines;
179
+ const totalLines = allLines.length;
180
+
181
+ // Compute hash over full normalized content.
182
+ const fileHash = computeFileHash(normalized);
183
+
184
+ // Record snapshot (keyed by absolute path for consistency).
185
+ snapshots.record(absolutePath, normalized);
186
+
187
+ const displayPath = resolvePath(rawPath, ctx.cwd);
188
+ const header = formatHashlineHeader(displayPath, fileHash);
189
+
190
+ // Apply offset and limit.
191
+ const startLine = params.offset ? Math.max(0, params.offset - 1) : 0;
192
+ const endLine = params.limit
193
+ ? startLine + params.limit
194
+ : allLines.length;
195
+ const selectedLines = allLines.slice(startLine, endLine);
196
+
197
+ let text = selectedLines.join("\n");
198
+ let truncated = false;
199
+
200
+ // Truncate at 50KB.
201
+ const textBytes = Buffer.byteLength(text, "utf-8");
202
+ if (textBytes > DEFAULT_MAX_BYTES) {
203
+ let truncatedText = text.slice(0, DEFAULT_MAX_BYTES);
204
+ while (
205
+ Buffer.byteLength(truncatedText, "utf-8") > DEFAULT_MAX_BYTES
206
+ ) {
207
+ truncatedText = truncatedText.slice(0, -1);
208
+ }
209
+ const truncatedLines = truncatedText.split("\n");
210
+ const originalLastLine = selectedLines[truncatedLines.length - 1];
211
+ const displayLines =
212
+ truncatedLines[truncatedLines.length - 1] === originalLastLine
213
+ ? truncatedLines
214
+ : truncatedLines.slice(0, -1);
215
+
216
+ text = `${displayLines.join("\n")}\n\n[Output truncated at 50KB. Use offset/limit to read specific sections.]`;
217
+ truncated = true;
218
+ }
219
+
220
+ // Format with hashline header + numbered lines.
221
+ const output = `${header}\n${formatNumberedLines(text, startLine + 1)}`;
222
+
223
+ return {
224
+ content: [{ type: "text", text: output }],
225
+ details: {
226
+ totalLines,
227
+ totalBytes,
228
+ truncated,
229
+ fileHash,
230
+ header,
231
+ } satisfies ReadToolDetails,
232
+ };
233
+ } catch (err: unknown) {
234
+ const message = err instanceof Error ? err.message : "unknown error";
235
+ return errorResult(`Error reading file: ${message}`);
236
+ }
237
+ },
238
+ };
239
+ }
@@ -0,0 +1,141 @@
1
+ /**
2
+ * Stale-tag recovery: attempts to salvage edits when the file's content
3
+ * hash no longer matches the tag the edit was authored against.
4
+ *
5
+ * Two strategies, tried in order:
6
+ *
7
+ * 1. **Structured-patch 3-way merge**: apply edits to cached snapshot,
8
+ * create a structured patch, then apply that patch to the live content.
9
+ * Handles most drift (unrelated lines changed, formatters, etc.).
10
+ *
11
+ * 2. **Session-chain replay**: when the structured merge refuses but the
12
+ * anchors still point at identical content, apply the edits directly to
13
+ * the live file. Less certain — the model must verify the result.
14
+ */
15
+
16
+ import { applyPatch, structuredPatch } from "diff";
17
+ import { applyEdits } from "./apply";
18
+ import {
19
+ RECOVERY_EXTERNAL_WARNING,
20
+ RECOVERY_SESSION_CHAIN_WARNING,
21
+ RECOVERY_SESSION_REPLAY_WARNING,
22
+ } from "./messages";
23
+ import type { SnapshotStore } from "./snapshots";
24
+ import type { Edit } from "./types";
25
+
26
+ // ─── Recovery result ─────────────────────────────────────────────────
27
+
28
+ export interface RecoveryResult {
29
+ /** Recovered text (normalized LF). */
30
+ text: string;
31
+ /** Human-readable warning describing what recovery did. */
32
+ warning: string;
33
+ }
34
+
35
+ // ─── Helpers ─────────────────────────────────────────────────────────
36
+
37
+ /**
38
+ * Check whether all anchor lines at the same positions in `live` still
39
+ * have the same content as in `snapshot`. If any anchor moved or changed,
40
+ * direct replay is unsafe.
41
+ */
42
+ function anchorsStillMatch(
43
+ snapshotText: string,
44
+ liveText: string,
45
+ anchorLines: readonly number[],
46
+ ): boolean {
47
+ const snapshotLines = snapshotText.split("\n");
48
+ const liveLines = liveText.split("\n");
49
+
50
+ for (const line of anchorLines) {
51
+ const idx = line - 1;
52
+ if (idx >= snapshotLines.length || idx >= liveLines.length) {
53
+ return false;
54
+ }
55
+ if (snapshotLines[idx] !== liveLines[idx]) {
56
+ return false;
57
+ }
58
+ }
59
+ return true;
60
+ }
61
+
62
+ // ─── Recovery ────────────────────────────────────────────────────────
63
+
64
+ /**
65
+ * Attempt to recover from a stale tag.
66
+ *
67
+ * @param snapshots — the session's snapshot store
68
+ * @param path — absolute file path
69
+ * @param currentText — normalized LF content of the live file
70
+ * @param fileHash — the stale tag from the edit's header
71
+ * @param edits — the parsed edits to apply
72
+ * @param anchorLines — anchor lines targeted by the edits (1-indexed, deduplicated)
73
+ * @returns RecoveryResult on success, null if recovery is impossible
74
+ */
75
+ export function tryRecover(
76
+ snapshots: SnapshotStore,
77
+ path: string,
78
+ currentText: string,
79
+ fileHash: string,
80
+ edits: readonly Edit[],
81
+ anchorLines: readonly number[],
82
+ ): RecoveryResult | null {
83
+ // 1. Look up the snapshot the edit was authored against.
84
+ const snapshot = snapshots.byHash(path, fileHash);
85
+ if (snapshot === null) {
86
+ return null; // hash never recorded — agent forged it or from another session
87
+ }
88
+
89
+ // 2. Apply edits to the snapshot text to get the would-be result.
90
+ const { text: wouldBeResult } = applyEdits(snapshot.text, edits);
91
+
92
+ // 3. Create a structured patch from snapshot → would-be result.
93
+ const patch = structuredPatch(
94
+ path,
95
+ path,
96
+ snapshot.text,
97
+ wouldBeResult,
98
+ undefined, // oldHeader
99
+ undefined, // newHeader
100
+ { context: 3 },
101
+ );
102
+
103
+ // 4. Apply the patch to the current live text.
104
+ const merged = applyPatch(currentText, patch);
105
+
106
+ // Strategy 1: structured-patch 3-way merge succeeded.
107
+ if (merged !== false && merged !== currentText) {
108
+ const head = snapshots.head(path);
109
+ const isExternal = head !== null && head.hash === snapshot.hash;
110
+ return {
111
+ text: merged,
112
+ warning: isExternal
113
+ ? RECOVERY_EXTERNAL_WARNING
114
+ : RECOVERY_SESSION_CHAIN_WARNING,
115
+ };
116
+ }
117
+
118
+ // 5. Strategy 2: direct anchor-based replay.
119
+ // Structured merge failed (or produced no change), but if the anchor
120
+ // lines still match between snapshot and live, apply edits directly.
121
+ // This handles both external changes and session-chain when context
122
+ // diverged but anchor content is identical.
123
+ if (anchorLines.length > 0) {
124
+ if (anchorsStillMatch(snapshot.text, currentText, anchorLines)) {
125
+ const { text: replayed } = applyEdits(currentText, edits);
126
+ if (replayed !== currentText) {
127
+ const head = snapshots.head(path);
128
+ const isExternal = head !== null && head.hash === snapshot.hash;
129
+ return {
130
+ text: replayed,
131
+ warning: isExternal
132
+ ? RECOVERY_EXTERNAL_WARNING
133
+ : RECOVERY_SESSION_REPLAY_WARNING,
134
+ };
135
+ }
136
+ }
137
+ }
138
+
139
+ // Recovery impossible.
140
+ return null;
141
+ }
@@ -0,0 +1,166 @@
1
+ /**
2
+ * Per-session snapshot store used by recovery and the patcher to bind
3
+ * hashline section tags to the exact file content that minted them.
4
+ *
5
+ * A section tag is a content-derived hash of the *whole file* (see
6
+ * {@link computeFileHash}). Any read of byte-identical content mints the
7
+ * same tag, so reads of one file state fuse onto one anchor and a follow-up
8
+ * edit anchored at any line validates whenever the live file still hashes
9
+ * to it.
10
+ *
11
+ * Producers (typically `read` / `grep` / `write` tools) call
12
+ * {@link SnapshotStore.record} with the full normalized text they observed.
13
+ * The store hashes it, dedups against the per-path history, and returns the
14
+ * tag. Consumers (the patcher) resolve a stale tag back to the recorded
15
+ * full text via {@link SnapshotStore.byHash} and 3-way-merge the would-be
16
+ * edit onto the live content.
17
+ */
18
+
19
+ import { computeFileHash } from "./format";
20
+
21
+ /**
22
+ * One full-file version observed at a point in time. The tag the model sees
23
+ * is {@link Snapshot.hash}; recovery replays edits against
24
+ * {@link Snapshot.text}.
25
+ */
26
+ export interface Snapshot {
27
+ /** Canonical path this version belongs to. */
28
+ readonly path: string;
29
+ /** Full normalized (LF, no BOM) file text as observed. */
30
+ readonly text: string;
31
+ /** Content-derived tag for {@link Snapshot.text} (see {@link computeFileHash}). */
32
+ readonly hash: string;
33
+ /** Timestamp (ms since epoch) the version was recorded. */
34
+ recordedAt: number;
35
+ }
36
+
37
+ /**
38
+ * Storage seam for full-file version snapshots. The patcher calls
39
+ * {@link head} for the latest version of a path and {@link byHash} when it
40
+ * needs the specific historical version a section's stale tag names.
41
+ */
42
+ export abstract class SnapshotStore {
43
+ /** Most-recently recorded version for `path`, or `null` if none. */
44
+ abstract head(path: string): Snapshot | null;
45
+
46
+ /** Recorded version for `path` whose tag equals `hash`, or `null`. */
47
+ abstract byHash(path: string, hash: string): Snapshot | null;
48
+
49
+ /** Record the full normalized text of `path` and return its content tag. */
50
+ abstract record(path: string, fullText: string): string;
51
+
52
+ /** Drop the version history for a single path. */
53
+ abstract invalidate(path: string): void;
54
+
55
+ /** Drop every version history. */
56
+ abstract clear(): void;
57
+ }
58
+
59
+ const DEFAULT_MAX_PATHS = 30;
60
+ const DEFAULT_MAX_VERSIONS_PER_PATH = 4;
61
+
62
+ export interface InMemorySnapshotStoreOptions {
63
+ /** Maximum number of distinct paths tracked at once (default 30). */
64
+ maxPaths?: number;
65
+ /** Maximum full-file versions retained per path (default 4). Oldest dropped first. */
66
+ maxVersionsPerPath?: number;
67
+ }
68
+
69
+ /**
70
+ * In-memory {@link SnapshotStore} with a simple LRU eviction policy.
71
+ * Per-path history is a short ring of full-file versions (oldest dropped
72
+ * first); per-session path tracking is LRU-bounded so cold paths age out
73
+ * automatically.
74
+ *
75
+ * Recording byte-identical content again refreshes recency and reuses the
76
+ * existing tag (read fusion); recording new content unshifts a fresh
77
+ * version onto the front of the path history.
78
+ */
79
+ export class InMemorySnapshotStore extends SnapshotStore {
80
+ // Map preserves insertion order — we use that for LRU eviction.
81
+ // Each value is a list of snapshots, newest first.
82
+ readonly #versions = new Map<string, Snapshot[]>();
83
+ readonly #maxPaths: number;
84
+ readonly #maxVersionsPerPath: number;
85
+
86
+ constructor(options: InMemorySnapshotStoreOptions = {}) {
87
+ super();
88
+ this.#maxPaths = options.maxPaths ?? DEFAULT_MAX_PATHS;
89
+ this.#maxVersionsPerPath =
90
+ options.maxVersionsPerPath ?? DEFAULT_MAX_VERSIONS_PER_PATH;
91
+ }
92
+
93
+ head(path: string): Snapshot | null {
94
+ return this.#versions.get(path)?.[0] ?? null;
95
+ }
96
+
97
+ byHash(path: string, hash: string): Snapshot | null {
98
+ const history = this.#versions.get(path);
99
+ return history?.find((version) => version.hash === hash) ?? null;
100
+ }
101
+
102
+ record(path: string, fullText: string): string {
103
+ const hash = computeFileHash(fullText);
104
+
105
+ // Refresh LRU recency: delete-then-set moves path to end of insertion order.
106
+ const history = this.#versions.get(path);
107
+ if (history) {
108
+ this.#versions.delete(path);
109
+
110
+ const existing = history.find((version) => version.hash === hash);
111
+ if (existing) {
112
+ // Same content state observed again: refresh recency and promote to
113
+ // head (it is the current file content), then reuse the tag.
114
+ existing.recordedAt = Date.now();
115
+ const filtered = history.filter((version) => version !== existing);
116
+ this.#versions.set(path, [existing, ...filtered]);
117
+ this.#evictPathsIfNeeded();
118
+ return hash;
119
+ }
120
+
121
+ // New content: prepend to history, cap versions per path.
122
+ const snapshot: Snapshot = {
123
+ path,
124
+ text: fullText,
125
+ hash,
126
+ recordedAt: Date.now(),
127
+ };
128
+ this.#versions.set(
129
+ path,
130
+ [snapshot, ...history].slice(0, this.#maxVersionsPerPath),
131
+ );
132
+ this.#evictPathsIfNeeded();
133
+ return hash;
134
+ }
135
+
136
+ // First version for this path.
137
+ const snapshot: Snapshot = {
138
+ path,
139
+ text: fullText,
140
+ hash,
141
+ recordedAt: Date.now(),
142
+ };
143
+ this.#versions.set(path, [snapshot]);
144
+ this.#evictPathsIfNeeded();
145
+ return hash;
146
+ }
147
+
148
+ invalidate(path: string): void {
149
+ this.#versions.delete(path);
150
+ }
151
+
152
+ clear(): void {
153
+ this.#versions.clear();
154
+ }
155
+
156
+ /** Evict the least-recently-used path when the path cap is exceeded. */
157
+ #evictPathsIfNeeded(): void {
158
+ while (this.#versions.size > this.#maxPaths) {
159
+ // Map keys iterate in insertion order — first key is LRU.
160
+ const firstKey = this.#versions.keys().next().value;
161
+ if (firstKey !== undefined) {
162
+ this.#versions.delete(firstKey);
163
+ }
164
+ }
165
+ }
166
+ }