@oh-my-pi/hashline 15.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Recover from a stale section file-hash by replaying the would-be edit
3
+ * against a cached pre-edit snapshot of the file and 3-way-merging the
4
+ * result onto the current on-disk content.
5
+ *
6
+ * The patcher consults this when it sees a section hash that doesn't match
7
+ * the live file content. The recovery class is stateless apart from the
8
+ * {@link SnapshotStore} it queries; the snapshot store is the seam that
9
+ * lets you plug in your own caching strategy.
10
+ */
11
+ import * as Diff from "diff";
12
+ import { applyEdits } from "./apply";
13
+ import { computeFileHash } from "./format";
14
+ import { RECOVERY_EXTERNAL_WARNING, RECOVERY_SESSION_CHAIN_WARNING, RECOVERY_SESSION_REPLAY_WARNING } from "./messages";
15
+ import type { Snapshot, SnapshotStore } from "./snapshots";
16
+ import type { ApplyOptions, ApplyResult, Edit } from "./types";
17
+
18
+ // Section hashes are line-precise; never let Diff.applyPatch slide a hunk
19
+ // onto a duplicate closer 100+ lines away. If snapshot replay does not
20
+ // align exactly, refuse and let the caller re-read.
21
+ const RECOVERY_FUZZ_FACTOR = 0;
22
+
23
+ export interface RecoveryArgs {
24
+ path: string;
25
+ currentText: string;
26
+ fileHash: string;
27
+ edits: readonly Edit[];
28
+ options?: ApplyOptions;
29
+ }
30
+
31
+ export interface RecoveryResult {
32
+ /** Post-recovery text. */
33
+ text: string;
34
+ /** First changed line (1-indexed) relative to the live `currentText`, or `undefined`. */
35
+ firstChangedLine: number | undefined;
36
+ /** Warnings collected during recovery, including the user-facing recovery banner. */
37
+ warnings: string[];
38
+ }
39
+
40
+ function applyEditsToSnapshot(
41
+ previousText: string,
42
+ currentText: string,
43
+ edits: readonly Edit[],
44
+ options: ApplyOptions,
45
+ recoveryWarning: string,
46
+ ): RecoveryResult | null {
47
+ let applied: ApplyResult;
48
+ try {
49
+ applied = applyEdits(previousText, [...edits], options);
50
+ } catch {
51
+ return null;
52
+ }
53
+ if (applied.text === previousText) return null;
54
+
55
+ const patch = Diff.structuredPatch("file", "file", previousText, applied.text, "", "", { context: 3 });
56
+ const merged = Diff.applyPatch(currentText, patch, { fuzzFactor: RECOVERY_FUZZ_FACTOR });
57
+ if (typeof merged !== "string" || merged === currentText) return null;
58
+
59
+ const firstChangedLine = findFirstChangedLine(currentText, merged) ?? applied.firstChangedLine;
60
+ const hasNetChange = firstChangedLine !== undefined;
61
+ const warnings = hasNetChange ? [recoveryWarning, ...(applied.warnings ?? [])] : [...(applied.warnings ?? [])];
62
+
63
+ return { text: merged, firstChangedLine, warnings };
64
+ }
65
+
66
+ function replaySessionChainOnCurrent(
67
+ previousText: string,
68
+ currentText: string,
69
+ edits: readonly Edit[],
70
+ options: ApplyOptions,
71
+ ): RecoveryResult | null {
72
+ // Only safe when no insert/delete shifted line counts in the prior edit
73
+ // chain: if total line counts match, every line number in `edits` still
74
+ // resolves to the same logical row.
75
+ if (previousText.split("\n").length !== currentText.split("\n").length) return null;
76
+ let applied: ApplyResult;
77
+ try {
78
+ applied = applyEdits(currentText, [...edits], options);
79
+ } catch {
80
+ return null;
81
+ }
82
+ if (applied.text === currentText) return null;
83
+ return {
84
+ text: applied.text,
85
+ firstChangedLine: applied.firstChangedLine,
86
+ warnings: [RECOVERY_SESSION_REPLAY_WARNING, ...(applied.warnings ?? [])],
87
+ };
88
+ }
89
+
90
+ function buildSparseOverlayText(currentText: string, snapshotLines: ReadonlyMap<number, string>): string {
91
+ const overlaid = currentText.split("\n");
92
+ let maxCachedLine = 0;
93
+ for (const lineNum of snapshotLines.keys()) {
94
+ if (lineNum > maxCachedLine) maxCachedLine = lineNum;
95
+ }
96
+ while (overlaid.length < maxCachedLine) overlaid.push("");
97
+ for (const [lineNum, content] of snapshotLines) {
98
+ overlaid[lineNum - 1] = content;
99
+ }
100
+ return overlaid.join("\n");
101
+ }
102
+
103
+ /** First 1-indexed line at which `a` and `b` diverge, or `undefined` if equal. */
104
+ function findFirstChangedLine(a: string, b: string): number | undefined {
105
+ if (a === b) return undefined;
106
+ const aLines = a.split("\n");
107
+ const bLines = b.split("\n");
108
+ const max = Math.max(aLines.length, bLines.length);
109
+ for (let i = 0; i < max; i++) {
110
+ if (aLines[i] !== bLines[i]) return i + 1;
111
+ }
112
+ return undefined;
113
+ }
114
+
115
+ function isHeadSnapshot(head: Snapshot | null, snapshot: Snapshot): boolean {
116
+ return head === snapshot;
117
+ }
118
+
119
+ /**
120
+ * Stateless recovery driver over a {@link SnapshotStore}. Construct once and
121
+ * call {@link Recovery.tryRecover} per stale-hash incident. The default
122
+ * implementation tries three strategies in order:
123
+ *
124
+ * 1. Apply on the cached `fullText` snapshot, then 3-way-merge onto current.
125
+ * 2. (Session chain) If the snapshot wasn't the head, retry on current text
126
+ * when line counts match — the user's previous edit advanced the hash but
127
+ * didn't shift line numbers.
128
+ * 3. Reconstruct from a sparse snapshot (lines map only), verify the rebuilt
129
+ * text hashes to the expected value, then 3-way-merge.
130
+ */
131
+ export class Recovery {
132
+ constructor(readonly store: SnapshotStore) {}
133
+
134
+ /**
135
+ * Attempt recovery. Returns `null` when no path forward is found — the
136
+ * caller should then surface a {@link MismatchError}.
137
+ */
138
+ tryRecover(args: RecoveryArgs): RecoveryResult | null {
139
+ const { path, currentText, fileHash, edits, options = {} } = args;
140
+ const head = this.store.head(path);
141
+ const snapshot = this.store.byHash(path, fileHash);
142
+ if (!snapshot || snapshot.lines.size === 0) return null;
143
+
144
+ const isHead = isHeadSnapshot(head, snapshot);
145
+ const recoveryWarning = isHead ? RECOVERY_EXTERNAL_WARNING : RECOVERY_SESSION_CHAIN_WARNING;
146
+ const isSessionChain = !isHead;
147
+
148
+ if (snapshot.fullText !== undefined) {
149
+ const merged = applyEditsToSnapshot(snapshot.fullText, currentText, edits, options, recoveryWarning);
150
+ if (merged !== null) return merged;
151
+ // Session-chain fast-path: prior in-session edit changed the same
152
+ // line(s) the user is now re-targeting with the stale hash. When
153
+ // line counts match, the edits' line numbers still resolve to the
154
+ // right rows — replay onto the current text directly.
155
+ if (isSessionChain) return replaySessionChainOnCurrent(snapshot.fullText, currentText, edits, options);
156
+ return null;
157
+ }
158
+
159
+ const overlayText = buildSparseOverlayText(currentText, snapshot.lines);
160
+ if (computeFileHash(overlayText) !== fileHash) return null;
161
+ return applyEditsToSnapshot(overlayText, currentText, edits, options, recoveryWarning);
162
+ }
163
+ }
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Per-session snapshot store used by {@link Recovery} to rescue patches when
3
+ * a section's file hash has drifted (file changed externally or a prior
4
+ * in-session edit advanced the hash).
5
+ *
6
+ * Producers (typically a `read` tool) record snapshots as they observe file
7
+ * content. Consumers (the patcher) query for the snapshot whose hash matches
8
+ * a stale section, then 3-way-merge the would-be edit onto the live content.
9
+ *
10
+ * The abstract base class lets callers plug in whatever storage they like
11
+ * (LRU, persistent SQLite, etc.). {@link InMemorySnapshotStore} ships as a
12
+ * sensible default backed by `lru-cache` so paths age out automatically.
13
+ */
14
+ import { LRUCache } from "lru-cache/raw";
15
+
16
+ /**
17
+ * One snapshot of a file as it was observed at a point in time. Either the
18
+ * full text is recorded (`fullText` set) for full-file reads, or a sparse
19
+ * map of `(lineNumber, content)` pairs for partial views (search matches,
20
+ * range reads).
21
+ */
22
+ export interface Snapshot {
23
+ /** 1-indexed line number → exact content as observed. */
24
+ readonly lines: Map<number, string>;
25
+ /** Full normalized text when the read observed the whole file. */
26
+ fullText?: string;
27
+ /** 4-hex hash carried alongside the read, when known. */
28
+ fileHash?: string;
29
+ /** Timestamp (ms since epoch) the snapshot was recorded. */
30
+ recordedAt: number;
31
+ }
32
+
33
+ /** Optional metadata supplied at snapshot record time. */
34
+ export interface SnapshotMetadata {
35
+ /** Full normalized text, when the producer observed the whole file. */
36
+ fullText?: string;
37
+ /** 4-hex hash carried by the read, when known. */
38
+ fileHash?: string;
39
+ }
40
+
41
+ /**
42
+ * Storage seam for file-content snapshots. The patcher calls {@link head}
43
+ * for the latest snapshot of a path and {@link byHash} when it needs the
44
+ * specific historical snapshot that matches a section's stale hash.
45
+ */
46
+ export abstract class SnapshotStore {
47
+ /** Most-recent snapshot for `path`, or `null` if none. */
48
+ abstract head(path: string): Snapshot | null;
49
+
50
+ /** Most-recent snapshot for `path` whose `fileHash` equals `fileHash`. */
51
+ abstract byHash(path: string, fileHash: string): Snapshot | null;
52
+
53
+ /** Record a contiguous run of lines (e.g. from a `read` tool). `startLine` is 1-indexed. */
54
+ abstract recordContiguous(
55
+ path: string,
56
+ startLine: number,
57
+ lines: readonly string[],
58
+ metadata?: SnapshotMetadata,
59
+ ): void;
60
+
61
+ /** Record sparse `(lineNumber, content)` pairs (e.g. a `search` match plus context). */
62
+ abstract recordSparse(path: string, entries: Iterable<readonly [number, string]>, metadata?: SnapshotMetadata): void;
63
+
64
+ /** Drop the snapshot history for a single path. */
65
+ abstract invalidate(path: string): void;
66
+
67
+ /** Drop every snapshot history. */
68
+ abstract clear(): void;
69
+ }
70
+
71
+ const DEFAULT_MAX_PATHS = 30;
72
+ const DEFAULT_MAX_SNAPSHOTS_PER_PATH = 4;
73
+
74
+ function hasConflict(
75
+ existing: ReadonlyMap<number, string>,
76
+ incoming: ReadonlyArray<readonly [number, string]>,
77
+ ): boolean {
78
+ for (const [lineNum, content] of incoming) {
79
+ const prior = existing.get(lineNum);
80
+ if (prior !== undefined && prior !== content) return true;
81
+ }
82
+ return false;
83
+ }
84
+
85
+ function hasHashConflict(existing: Snapshot, metadata: SnapshotMetadata): boolean {
86
+ return metadata.fileHash !== undefined && existing.fileHash !== undefined && metadata.fileHash !== existing.fileHash;
87
+ }
88
+
89
+ function isSameSnapshotIdentity(left: Snapshot, right: Snapshot): boolean {
90
+ if (left.fileHash !== undefined && right.fileHash !== undefined) return left.fileHash === right.fileHash;
91
+ if (left.fullText !== undefined && right.fullText !== undefined) return left.fullText === right.fullText;
92
+ return false;
93
+ }
94
+
95
+ export interface InMemorySnapshotStoreOptions {
96
+ /** Maximum number of distinct paths tracked at once (default 30). LRU eviction. */
97
+ maxPaths?: number;
98
+ /** Maximum snapshots retained per path (default 4). Oldest dropped first. */
99
+ maxSnapshotsPerPath?: number;
100
+ }
101
+
102
+ /**
103
+ * In-memory {@link SnapshotStore} backed by `lru-cache`. Per-path snapshot
104
+ * history is a short ring (oldest dropped first); per-session path tracking
105
+ * is LRU-bounded so cold paths age out automatically.
106
+ *
107
+ * Newer snapshots merge into the head when their entries don't conflict and
108
+ * the recorded `fileHash` (if any) still agrees; otherwise a fresh snapshot
109
+ * is pushed onto the front of the history list.
110
+ */
111
+ export class InMemorySnapshotStore extends SnapshotStore {
112
+ readonly #snapshots: LRUCache<string, Snapshot[]>;
113
+ readonly #maxSnapshotsPerPath: number;
114
+
115
+ constructor(options: InMemorySnapshotStoreOptions = {}) {
116
+ super();
117
+ this.#snapshots = new LRUCache<string, Snapshot[]>({ max: options.maxPaths ?? DEFAULT_MAX_PATHS });
118
+ this.#maxSnapshotsPerPath = options.maxSnapshotsPerPath ?? DEFAULT_MAX_SNAPSHOTS_PER_PATH;
119
+ }
120
+
121
+ head(path: string): Snapshot | null {
122
+ return this.#snapshots.get(path)?.[0] ?? null;
123
+ }
124
+
125
+ byHash(path: string, fileHash: string): Snapshot | null {
126
+ const history = this.#snapshots.get(path);
127
+ return history?.find(entry => entry.fileHash === fileHash) ?? null;
128
+ }
129
+
130
+ recordContiguous(path: string, startLine: number, lines: readonly string[], metadata: SnapshotMetadata = {}): void {
131
+ if (lines.length === 0 && metadata.fullText === undefined) return;
132
+ const entries: Array<readonly [number, string]> = lines.map((line, idx) => [startLine + idx, line] as const);
133
+ this.#record(path, entries, metadata);
134
+ }
135
+
136
+ recordSparse(path: string, entries: Iterable<readonly [number, string]>, metadata: SnapshotMetadata = {}): void {
137
+ const arr = Array.from(entries);
138
+ if (arr.length === 0 && metadata.fullText === undefined) return;
139
+ this.#record(path, arr, metadata);
140
+ }
141
+
142
+ invalidate(path: string): void {
143
+ this.#snapshots.delete(path);
144
+ }
145
+
146
+ clear(): void {
147
+ this.#snapshots.clear();
148
+ }
149
+
150
+ #record(path: string, entries: ReadonlyArray<readonly [number, string]>, metadata: SnapshotMetadata): void {
151
+ const history = this.#snapshots.get(path) ?? [];
152
+ const head = history[0];
153
+ const now = Date.now();
154
+ if (head && !hasConflict(head.lines, entries) && !hasHashConflict(head, metadata)) {
155
+ for (const [lineNum, content] of entries) head.lines.set(lineNum, content);
156
+ if (metadata.fullText !== undefined) head.fullText = metadata.fullText;
157
+ if (metadata.fileHash !== undefined) head.fileHash = metadata.fileHash;
158
+ head.recordedAt = now;
159
+ // `get` above already touched LRU recency for this key.
160
+ return;
161
+ }
162
+
163
+ const nextSnapshot: Snapshot = {
164
+ lines: new Map(entries),
165
+ ...metadata,
166
+ recordedAt: now,
167
+ };
168
+ const deduped = history.filter(entry => !isSameSnapshotIdentity(entry, nextSnapshot));
169
+ this.#snapshots.set(path, [nextSnapshot, ...deduped].slice(0, this.#maxSnapshotsPerPath));
170
+ }
171
+ }
package/src/stream.ts ADDED
@@ -0,0 +1,132 @@
1
+ /**
2
+ * Lazily format a stream of UTF-8 bytes into hashline-numbered lines, yielded
3
+ * as bounded text chunks. Used to send `read`-style file content to consumers
4
+ * without materializing the full file at once.
5
+ *
6
+ * Each yielded chunk is at most {@link StreamOptions.maxChunkLines} lines and
7
+ * at most {@link StreamOptions.maxChunkBytes} UTF-8 bytes (whichever fires
8
+ * first).
9
+ */
10
+ import { formatNumberedLine } from "./format";
11
+ import type { StreamOptions } from "./types";
12
+
13
+ interface ResolvedStreamOptions {
14
+ startLine: number;
15
+ maxChunkLines: number;
16
+ maxChunkBytes: number;
17
+ }
18
+
19
+ function resolveStreamOptions(options: StreamOptions): ResolvedStreamOptions {
20
+ return {
21
+ startLine: options.startLine ?? 1,
22
+ maxChunkLines: options.maxChunkLines ?? 200,
23
+ maxChunkBytes: options.maxChunkBytes ?? 64 * 1024,
24
+ };
25
+ }
26
+
27
+ interface ChunkEmitter {
28
+ pushLine: (line: string) => string[];
29
+ flush: () => string | undefined;
30
+ }
31
+
32
+ function createChunkEmitter(options: ResolvedStreamOptions): ChunkEmitter {
33
+ let lineNumber = options.startLine;
34
+ let outLines: string[] = [];
35
+ let outBytes = 0;
36
+
37
+ const flush = (): string | undefined => {
38
+ if (outLines.length === 0) return undefined;
39
+ const chunk = outLines.join("\n");
40
+ outLines = [];
41
+ outBytes = 0;
42
+ return chunk;
43
+ };
44
+
45
+ const pushLine = (line: string): string[] => {
46
+ const formatted = formatNumberedLine(lineNumber, line);
47
+ lineNumber++;
48
+
49
+ const chunks: string[] = [];
50
+ const sepBytes = outLines.length === 0 ? 0 : 1;
51
+ const lineBytes = Buffer.byteLength(formatted, "utf-8");
52
+ const wouldOverflow =
53
+ outLines.length >= options.maxChunkLines || outBytes + sepBytes + lineBytes > options.maxChunkBytes;
54
+
55
+ if (outLines.length > 0 && wouldOverflow) {
56
+ const flushed = flush();
57
+ if (flushed) chunks.push(flushed);
58
+ }
59
+
60
+ outLines.push(formatted);
61
+ outBytes += (outLines.length === 1 ? 0 : 1) + lineBytes;
62
+
63
+ if (outLines.length >= options.maxChunkLines || outBytes >= options.maxChunkBytes) {
64
+ const flushed = flush();
65
+ if (flushed) chunks.push(flushed);
66
+ }
67
+ return chunks;
68
+ };
69
+
70
+ return { pushLine, flush };
71
+ }
72
+
73
+ function isReadableStream(value: unknown): value is ReadableStream<Uint8Array> {
74
+ return (
75
+ typeof value === "object" &&
76
+ value !== null &&
77
+ "getReader" in value &&
78
+ typeof (value as { getReader?: unknown }).getReader === "function"
79
+ );
80
+ }
81
+
82
+ async function* bytesFromReadableStream(stream: ReadableStream<Uint8Array>): AsyncGenerator<Uint8Array> {
83
+ const reader = stream.getReader();
84
+ try {
85
+ while (true) {
86
+ const { done, value } = await reader.read();
87
+ if (done) return;
88
+ if (value) yield value;
89
+ }
90
+ } finally {
91
+ reader.releaseLock();
92
+ }
93
+ }
94
+
95
+ export async function* streamHashLines(
96
+ source: ReadableStream<Uint8Array> | AsyncIterable<Uint8Array>,
97
+ options: StreamOptions = {},
98
+ ): AsyncGenerator<string> {
99
+ const resolved = resolveStreamOptions(options);
100
+ const decoder = new TextDecoder("utf-8");
101
+ const chunks = isReadableStream(source) ? bytesFromReadableStream(source) : source;
102
+ const emitter = createChunkEmitter(resolved);
103
+
104
+ let pending = "";
105
+ let sawAnyLine = false;
106
+
107
+ for await (const chunk of chunks) {
108
+ pending += decoder.decode(chunk, { stream: true });
109
+ let nl = pending.indexOf("\n");
110
+ while (nl !== -1) {
111
+ const raw = pending.slice(0, nl);
112
+ const line = raw.endsWith("\r") ? raw.slice(0, -1) : raw;
113
+ sawAnyLine = true;
114
+ for (const out of emitter.pushLine(line)) yield out;
115
+ pending = pending.slice(nl + 1);
116
+ nl = pending.indexOf("\n");
117
+ }
118
+ }
119
+
120
+ pending += decoder.decode();
121
+ if (pending.length > 0) {
122
+ sawAnyLine = true;
123
+ const tail = pending.endsWith("\r") ? pending.slice(0, -1) : pending;
124
+ for (const out of emitter.pushLine(tail)) yield out;
125
+ }
126
+ if (!sawAnyLine) {
127
+ for (const out of emitter.pushLine("")) yield out;
128
+ }
129
+
130
+ const last = emitter.flush();
131
+ if (last) yield last;
132
+ }