@oh-my-pi/hashline 16.2.13 → 16.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/types/messages.d.ts +3 -1
- package/dist/types/recovery.d.ts +6 -2
- package/dist/types/snapshots.d.ts +30 -3
- package/package.json +1 -1
- package/src/messages.ts +5 -1
- package/src/patcher.ts +34 -12
- package/src/prompt.md +10 -2
- package/src/recovery.ts +240 -8
- package/src/snapshots.ts +59 -7
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [16.3.0] - 2026-07-02
|
|
6
|
+
|
|
7
|
+
### Changed
|
|
8
|
+
|
|
9
|
+
- Significantly improved performance on large files by optimizing stale-anchor remap validation.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- Fixed an issue where snapshot tag collisions could cause line-anchored edits to be incorrectly applied to unrelated content, improving recovery and edit-preview safety.
|
|
14
|
+
- Fixed tracking of edit anchors when earlier in-session insertions or deletions shift unchanged target lines.
|
|
15
|
+
- Fixed hashline edit guidance and parsing errors for Markdown list rows.
|
|
16
|
+
|
|
5
17
|
## [16.2.8] - 2026-06-30
|
|
6
18
|
|
|
7
19
|
### Fixed
|
package/dist/types/messages.d.ts
CHANGED
|
@@ -21,7 +21,7 @@ export declare const REPLACE_PAIR_COALESCED_WARNING = "Two hunks targeted the sa
|
|
|
21
21
|
/** Bare body rows auto-converted to literal `+` rows. */
|
|
22
22
|
export declare const BARE_BODY_AUTO_PIPED_WARNING = "Auto-prefixed bare body row(s) with `+`. Body rows must be `+TEXT` literal lines.";
|
|
23
23
|
/** Unified-diff-style `-` row in a hunk body. */
|
|
24
|
-
export declare const MINUS_ROW_REJECTED = "`-` rows are not valid; the range already names the lines being changed. For
|
|
24
|
+
export declare const MINUS_ROW_REJECTED = "`-` rows are not valid; the range already names the lines being changed. For Markdown bullets or other literal `-` lines, prefix the literal row with `+`: `+- item`.";
|
|
25
25
|
/** Replace hunk with no body. */
|
|
26
26
|
export declare const EMPTY_REPLACE = "`SWAP N.=M:` needs at least one `+TEXT` body row. To delete lines, use `DEL N.=M`.";
|
|
27
27
|
/** `replace_block N:` hunk with no body. */
|
|
@@ -87,6 +87,8 @@ export declare const RECOVERY_SESSION_CHAIN_WARNING = "Recovered from a stale fi
|
|
|
87
87
|
* the chain could still misplace an anchor — hence the verify hedge.
|
|
88
88
|
*/
|
|
89
89
|
export declare const RECOVERY_SESSION_REPLAY_WARNING = "Recovered by replaying your edits onto the current file content (a prior in-session edit changed the lines you re-targeted with a stale hash). Verify the diff matches your intent.";
|
|
90
|
+
/** `Recovery`: stale anchors were relocated to unchanged live lines after drift. */
|
|
91
|
+
export declare const RECOVERY_LINE_REMAP_WARNING = "Recovered by remapping stale line anchors to unchanged current lines (file changed since the tagged read). Verify the diff matches your intent.";
|
|
90
92
|
/**
|
|
91
93
|
* `insert head:`/`insert tail:` applied despite a stale snapshot tag.
|
|
92
94
|
* Head/tail position is content-independent, so drift is non-fatal: apply
|
package/dist/types/recovery.d.ts
CHANGED
|
@@ -17,11 +17,15 @@ export interface RecoveryResult {
|
|
|
17
17
|
/**
|
|
18
18
|
* Stateless recovery driver over a {@link SnapshotStore}. Construct once and
|
|
19
19
|
* call {@link Recovery.tryRecover} per stale-tag incident. The default
|
|
20
|
-
* implementation tries
|
|
20
|
+
* implementation tries three strategies in order:
|
|
21
21
|
*
|
|
22
22
|
* 1. Apply the edits on the full-file version the tag names, then 3-way-merge
|
|
23
23
|
* the resulting patch onto the live content (handles external writes).
|
|
24
|
-
* 2.
|
|
24
|
+
* 2. Remap every stale anchor through the unchanged-line diff from the tagged
|
|
25
|
+
* snapshot to the live text, then replay on live content. This handles a
|
|
26
|
+
* prior insertion/deletion before the target while refusing changed anchors
|
|
27
|
+
* and mixed offsets across the same edit range.
|
|
28
|
+
* 3. (Session chain) If that version wasn't the head, replay the edits onto
|
|
25
29
|
* the live content directly when line counts match AND every edit's anchor
|
|
26
30
|
* line content is unchanged between version and current — a prior in-session
|
|
27
31
|
* edit advanced the tag and the model's anchors still name the same logical
|
|
@@ -23,14 +23,36 @@ export interface Snapshot {
|
|
|
23
23
|
}
|
|
24
24
|
/**
|
|
25
25
|
* Storage seam for full-file version snapshots. The patcher calls {@link head}
|
|
26
|
-
* for the latest version of a path and {@link
|
|
26
|
+
* for the latest version of a path and {@link byHashExact} when it needs the
|
|
27
27
|
* specific historical version a section's stale tag names.
|
|
28
28
|
*/
|
|
29
29
|
export declare abstract class SnapshotStore {
|
|
30
30
|
/** Most-recently recorded version for `path`, or `null` if none. */
|
|
31
31
|
abstract head(path: string): Snapshot | null;
|
|
32
|
-
/**
|
|
32
|
+
/**
|
|
33
|
+
* Recorded version for `path` whose tag equals `hash`, or `null`. When two
|
|
34
|
+
* distinct texts collide on the 16-bit tag, returns the most-recently
|
|
35
|
+
* recorded one; callers that treat the tag as content identity must use
|
|
36
|
+
* {@link byHashExact} (or verify {@link Snapshot.text} via {@link byContent}).
|
|
37
|
+
*/
|
|
33
38
|
abstract byHash(path: string, hash: string): Snapshot | null;
|
|
39
|
+
/**
|
|
40
|
+
* Collision-safe {@link byHash}: the single retained version for `path`
|
|
41
|
+
* whose tag equals `hash`, or `null` when none is retained OR when two or
|
|
42
|
+
* more distinct texts collide on the tag. In the collision case there is
|
|
43
|
+
* no way to know which retained text the model's line anchors were minted
|
|
44
|
+
* against, so consumers that replay anchors (recovery, previews) must
|
|
45
|
+
* refuse rather than pick one.
|
|
46
|
+
*/
|
|
47
|
+
abstract byHashExact(path: string, hash: string): Snapshot | null;
|
|
48
|
+
/**
|
|
49
|
+
* Recorded version for `path` whose {@link Snapshot.text} equals `fullText`,
|
|
50
|
+
* or `null`. Disambiguates hash collisions where two distinct file states
|
|
51
|
+
* share the same 4-hex tag: the patcher consults this before taking the
|
|
52
|
+
* no-drift path so a colliding live text is never accepted as the exact
|
|
53
|
+
* snapshot the model's line anchors were minted against.
|
|
54
|
+
*/
|
|
55
|
+
abstract byContent(path: string, fullText: string): Snapshot | null;
|
|
34
56
|
/**
|
|
35
57
|
* Every retained version whose tag equals `hash`, across all tracked
|
|
36
58
|
* paths. The patcher uses this to recover the intended file when a section
|
|
@@ -84,13 +106,18 @@ export interface InMemorySnapshotStoreOptions {
|
|
|
84
106
|
*
|
|
85
107
|
* Recording byte-identical content again refreshes recency and reuses the
|
|
86
108
|
* existing tag (read fusion); recording new content unshifts a fresh version
|
|
87
|
-
* onto the front of the path history.
|
|
109
|
+
* onto the front of the path history. Two distinct texts that collide on the
|
|
110
|
+
* short 4-hex tag are retained as separate versions so callers can still tell
|
|
111
|
+
* them apart via {@link Snapshot.text} — the tag is only a fast index, never
|
|
112
|
+
* the identity.
|
|
88
113
|
*/
|
|
89
114
|
export declare class InMemorySnapshotStore extends SnapshotStore {
|
|
90
115
|
#private;
|
|
91
116
|
constructor(options?: InMemorySnapshotStoreOptions);
|
|
92
117
|
head(path: string): Snapshot | null;
|
|
93
118
|
byHash(path: string, hash: string): Snapshot | null;
|
|
119
|
+
byHashExact(path: string, hash: string): Snapshot | null;
|
|
120
|
+
byContent(path: string, fullText: string): Snapshot | null;
|
|
94
121
|
findByHash(hash: string): Snapshot[];
|
|
95
122
|
record(path: string, fullText: string, seenLines?: Iterable<number>): string;
|
|
96
123
|
recordSeenLines(path: string, hash: string, lines: Iterable<number>): void;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/hashline",
|
|
4
|
-
"version": "16.
|
|
4
|
+
"version": "16.3.0",
|
|
5
5
|
"description": "Hashline: a compact, line-anchored patch language and applier. Pluggable FS/IO so it works over disk, in-memory, or any custom backend.",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
package/src/messages.ts
CHANGED
|
@@ -54,7 +54,7 @@ export const BARE_BODY_AUTO_PIPED_WARNING =
|
|
|
54
54
|
|
|
55
55
|
/** Unified-diff-style `-` row in a hunk body. */
|
|
56
56
|
export const MINUS_ROW_REJECTED =
|
|
57
|
-
"`-` rows are not valid; the range already names the lines being changed. For
|
|
57
|
+
"`-` rows are not valid; the range already names the lines being changed. For Markdown bullets or other literal `-` lines, prefix the literal row with `+`: `+- item`.";
|
|
58
58
|
|
|
59
59
|
/** Replace hunk with no body. */
|
|
60
60
|
export const EMPTY_REPLACE = `\`SWAP N${HL_RANGE_SEP}M:\` needs at least one \`+TEXT\` body row. To delete lines, use \`DEL N${HL_RANGE_SEP}M\`.`;
|
|
@@ -168,6 +168,10 @@ export const RECOVERY_SESSION_CHAIN_WARNING =
|
|
|
168
168
|
export const RECOVERY_SESSION_REPLAY_WARNING =
|
|
169
169
|
"Recovered by replaying your edits onto the current file content (a prior in-session edit changed the lines you re-targeted with a stale hash). Verify the diff matches your intent.";
|
|
170
170
|
|
|
171
|
+
/** `Recovery`: stale anchors were relocated to unchanged live lines after drift. */
|
|
172
|
+
export const RECOVERY_LINE_REMAP_WARNING =
|
|
173
|
+
"Recovered by remapping stale line anchors to unchanged current lines (file changed since the tagged read). Verify the diff matches your intent.";
|
|
174
|
+
|
|
171
175
|
/**
|
|
172
176
|
* `insert head:`/`insert tail:` applied despite a stale snapshot tag.
|
|
173
177
|
* Head/tail position is content-independent, so drift is non-fatal: apply
|
package/src/patcher.ts
CHANGED
|
@@ -38,7 +38,7 @@ import {
|
|
|
38
38
|
import { MismatchError } from "./mismatch";
|
|
39
39
|
import { detectLineEnding, type LineEnding, normalizeToLF, restoreLineEndings, stripBom } from "./normalize";
|
|
40
40
|
import { Recovery, type RecoveryResult } from "./recovery";
|
|
41
|
-
import type { SnapshotStore } from "./snapshots";
|
|
41
|
+
import type { Snapshot, SnapshotStore } from "./snapshots";
|
|
42
42
|
import type { ApplyResult, BlockResolution, BlockResolver, Edit, FileOp } from "./types";
|
|
43
43
|
|
|
44
44
|
export interface PatcherOptions {
|
|
@@ -480,14 +480,15 @@ export class Patcher {
|
|
|
480
480
|
|
|
481
481
|
/**
|
|
482
482
|
* Reject an anchored edit that references a line the read which minted
|
|
483
|
-
* `expected` never displayed.
|
|
484
|
-
*
|
|
485
|
-
*
|
|
486
|
-
*
|
|
487
|
-
* the tagged content
|
|
483
|
+
* `expected` never displayed. `matchedSnapshot` is the store version whose
|
|
484
|
+
* text equals the live normalized content — the exact snapshot the model
|
|
485
|
+
* anchored against. Absent means no provenance was recorded (the tag was
|
|
486
|
+
* externally minted or aged out), so the edit applies as before. Only runs
|
|
487
|
+
* on the no-drift path, where anchor line numbers index the tagged content
|
|
488
|
+
* 1:1.
|
|
488
489
|
*/
|
|
489
|
-
#assertSeenLines(section: PatchSection,
|
|
490
|
-
const seen =
|
|
490
|
+
#assertSeenLines(section: PatchSection, expected: string, matchedSnapshot: Snapshot | null): void {
|
|
491
|
+
const seen = matchedSnapshot?.seenLines;
|
|
491
492
|
if (!seen || seen.size === 0) return;
|
|
492
493
|
const unseen = section.collectAnchorLines().filter(line => !seen.has(line));
|
|
493
494
|
if (unseen.length === 0) return;
|
|
@@ -520,7 +521,23 @@ export class Patcher {
|
|
|
520
521
|
}): ApplyResult {
|
|
521
522
|
const { section, canonicalPath, exists, normalized, edits } = args;
|
|
522
523
|
const expected = exists ? section.fileHash : undefined;
|
|
523
|
-
|
|
524
|
+
// A 16-bit tag can collide across two different file states, so equality
|
|
525
|
+
// on `computeFileHash(normalized) === expected` alone is not enough to
|
|
526
|
+
// prove the live text IS the snapshot the tag names. Also require that,
|
|
527
|
+
// when a snapshot for `(path, expected)` is retained, exactly one stored
|
|
528
|
+
// version carries the tag and its full text matches the live text. If
|
|
529
|
+
// multiple versions share the tag, the header is ambiguous: there is no
|
|
530
|
+
// safe way to know which stored text the model's line anchors came from.
|
|
531
|
+
const storedSnapshotsForTag =
|
|
532
|
+
expected === undefined
|
|
533
|
+
? []
|
|
534
|
+
: this.snapshots.findByHash(expected).filter(snapshot => snapshot.path === canonicalPath);
|
|
535
|
+
const ambiguousStoredTag = storedSnapshotsForTag.length > 1;
|
|
536
|
+
const storedSnapshotForTag = expected === undefined ? null : this.snapshots.byHash(canonicalPath, expected);
|
|
537
|
+
const hashMatches = expected !== undefined && computeFileHash(normalized) === expected;
|
|
538
|
+
const matchedSnapshot = hashMatches ? this.snapshots.byContent(canonicalPath, normalized) : null;
|
|
539
|
+
const liveMatches =
|
|
540
|
+
hashMatches && !ambiguousStoredTag && (storedSnapshotForTag === null || matchedSnapshot !== null);
|
|
524
541
|
|
|
525
542
|
// Resolve `replace_block N:` edits to concrete ranges before recovery
|
|
526
543
|
// runs. Block anchors are expressed against the snapshot the section tag
|
|
@@ -535,8 +552,10 @@ export class Patcher {
|
|
|
535
552
|
const resolveWarnings: string[] = [];
|
|
536
553
|
let resolved: readonly Edit[] = edits;
|
|
537
554
|
if (hasBlockEdit(edits)) {
|
|
538
|
-
|
|
539
|
-
|
|
555
|
+
if (ambiguousStoredTag) {
|
|
556
|
+
throw this.#mismatchError(section, canonicalPath, normalized, expected ?? "", true);
|
|
557
|
+
}
|
|
558
|
+
const baseText = expected === undefined || liveMatches ? normalized : storedSnapshotForTag?.text;
|
|
540
559
|
if (baseText === undefined) {
|
|
541
560
|
throw this.#mismatchError(section, canonicalPath, normalized, expected ?? "", false);
|
|
542
561
|
}
|
|
@@ -559,7 +578,7 @@ export class Patcher {
|
|
|
559
578
|
// The line numbers in `edits` index the exact content the tag names.
|
|
560
579
|
// Reject any anchor the read never displayed: editing lines the model
|
|
561
580
|
// has not seen is the off-by-memory mistake that mangles files.
|
|
562
|
-
if (expected !== undefined) this.#assertSeenLines(section,
|
|
581
|
+
if (expected !== undefined) this.#assertSeenLines(section, expected, matchedSnapshot);
|
|
563
582
|
const result = applyEdits(normalized, resolved);
|
|
564
583
|
return withResolveWarnings(blockResolutions.length > 0 ? { ...result, blockResolutions } : result);
|
|
565
584
|
}
|
|
@@ -571,6 +590,9 @@ export class Patcher {
|
|
|
571
590
|
const result = applyEdits(normalized, resolved);
|
|
572
591
|
return withResolveWarnings({ ...result, warnings: [HEADTAIL_DRIFT_WARNING, ...(result.warnings ?? [])] });
|
|
573
592
|
}
|
|
593
|
+
if (ambiguousStoredTag) {
|
|
594
|
+
throw this.#mismatchError(section, canonicalPath, normalized, expected ?? "", true);
|
|
595
|
+
}
|
|
574
596
|
// File drifted: try to replay the edit against the version the tag
|
|
575
597
|
// names and 3-way-merge it onto the live content.
|
|
576
598
|
const recovered = this.recovery.tryRecover({
|
package/src/prompt.md
CHANGED
|
@@ -19,7 +19,7 @@ Single line: `SWAP N.=N:` / `DEL N`. The range is the ORIGINAL lines you touch;
|
|
|
19
19
|
</ops>
|
|
20
20
|
|
|
21
21
|
<body-rows>
|
|
22
|
-
Body rows appear only under a `:` header. Every body row is `+TEXT` — add a literal line `TEXT`, verbatim (leading whitespace kept); `+` alone adds a blank line. No other row kind. NEVER write `-old` or a bare/context line. To keep a line, leave it out of every range.
|
|
22
|
+
Body rows appear only under a `:` header. Every body row is `+TEXT` — add a literal line `TEXT`, verbatim (leading whitespace kept); `+` alone adds a blank line. No other row kind. NEVER write `-old` or a bare/context line. To keep a line, leave it out of every range. Literal lines starting with `-`/`+` still need the body prefix: Markdown `- item` → `+- item`, `+ item` → `++ item`.
|
|
23
23
|
</body-rows>
|
|
24
24
|
|
|
25
25
|
<rules>
|
|
@@ -103,6 +103,14 @@ INS.TAIL:
|
|
|
103
103
|
+greet("everyone")
|
|
104
104
|
```
|
|
105
105
|
|
|
106
|
+
Insert Markdown bullets — the leading `+` is the body-row marker; the file receives `- task`:
|
|
107
|
+
```
|
|
108
|
+
[PLAN.md#A1B2]
|
|
109
|
+
INS.POST 2:
|
|
110
|
+
+- task
|
|
111
|
+
+ - nested task
|
|
112
|
+
```
|
|
113
|
+
|
|
106
114
|
Replace the whole `greet` function block — `SWAP.BLK 1:` resolves lines 1–3 (the `def` header through `print(msg)`); line 4 is a separate statement and stays:
|
|
107
115
|
```
|
|
108
116
|
[greet.py#A1B2]
|
|
@@ -160,5 +168,5 @@ INS.POST 3:
|
|
|
160
168
|
If you remember nothing else:
|
|
161
169
|
1. RE-GROUND AFTER EVERY EDIT. Every apply mints a fresh `#TAG` and renumbers — take the next edit's numbers from the edit response or a fresh `read`. Stale tag or surprise? STOP, re-`read`.
|
|
162
170
|
2. RANGES ARE TIGHT. Cover only lines that change; a stale wide range shreds everything it spans. Whole construct → `SWAP.BLK N`.
|
|
163
|
-
3. THE BODY IS THE FINAL CONTENT.
|
|
171
|
+
3. THE BODY IS THE FINAL CONTENT. Every body row starts with `+`; Markdown bullets use `+- item`, not `- item`.
|
|
164
172
|
</critical>
|
package/src/recovery.ts
CHANGED
|
@@ -10,7 +10,12 @@
|
|
|
10
10
|
*/
|
|
11
11
|
import * as Diff from "diff";
|
|
12
12
|
import { applyEdits } from "./apply";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
RECOVERY_EXTERNAL_WARNING,
|
|
15
|
+
RECOVERY_LINE_REMAP_WARNING,
|
|
16
|
+
RECOVERY_SESSION_CHAIN_WARNING,
|
|
17
|
+
RECOVERY_SESSION_REPLAY_WARNING,
|
|
18
|
+
} from "./messages";
|
|
14
19
|
import type { Snapshot, SnapshotStore } from "./snapshots";
|
|
15
20
|
import type { Anchor, ApplyResult, Edit } from "./types";
|
|
16
21
|
|
|
@@ -97,6 +102,219 @@ function verifyAnchorContent(previousText: string, currentText: string, edits: r
|
|
|
97
102
|
return true;
|
|
98
103
|
}
|
|
99
104
|
|
|
105
|
+
function buildLineMap(previousText: string, currentText: string): Map<number, number> {
|
|
106
|
+
const previousLines = previousText.split("\n");
|
|
107
|
+
const currentLines = currentText.split("\n");
|
|
108
|
+
const changes = Diff.diffArrays(previousLines, currentLines);
|
|
109
|
+
const map = new Map<number, number>();
|
|
110
|
+
let previousLine = 1;
|
|
111
|
+
let currentLine = 1;
|
|
112
|
+
|
|
113
|
+
for (const change of changes) {
|
|
114
|
+
const count = change.value.length;
|
|
115
|
+
if (change.added) {
|
|
116
|
+
currentLine += count;
|
|
117
|
+
continue;
|
|
118
|
+
}
|
|
119
|
+
if (change.removed) {
|
|
120
|
+
previousLine += count;
|
|
121
|
+
continue;
|
|
122
|
+
}
|
|
123
|
+
for (let offset = 0; offset < count; offset++) {
|
|
124
|
+
map.set(previousLine + offset, currentLine + offset);
|
|
125
|
+
}
|
|
126
|
+
previousLine += count;
|
|
127
|
+
currentLine += count;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return map;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/** Values appearing two or more times in `lines`, for O(1) duplicate checks. */
|
|
134
|
+
function collectDuplicatedValues(lines: readonly string[]): Set<string> {
|
|
135
|
+
const seen = new Set<string>();
|
|
136
|
+
const duplicated = new Set<string>();
|
|
137
|
+
for (const value of lines) {
|
|
138
|
+
if (seen.has(value)) duplicated.add(value);
|
|
139
|
+
else seen.add(value);
|
|
140
|
+
}
|
|
141
|
+
return duplicated;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
interface AnchorNeighbors {
|
|
145
|
+
/** Nearest non-anchor line below the anchor's run, or `undefined` at the file edge. */
|
|
146
|
+
before: number | undefined;
|
|
147
|
+
/** Nearest non-anchor line above the anchor's run, or `undefined` at the file edge. */
|
|
148
|
+
after: number | undefined;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Nearest non-anchor context line on each side of every anchor, computed in
|
|
153
|
+
* one sweep over the sorted anchor set. Anchors in one contiguous run share
|
|
154
|
+
* both neighbors (the lines just outside the run), so this replaces the
|
|
155
|
+
* per-anchor directional walk across anchored ranges — O(anchors²) on a
|
|
156
|
+
* large block replacement — with one O(anchors log anchors) pass.
|
|
157
|
+
*/
|
|
158
|
+
function computeAnchorNeighbors(anchorLines: ReadonlySet<number>, lineCount: number): Map<number, AnchorNeighbors> {
|
|
159
|
+
const sorted = [...anchorLines].sort((a, b) => a - b);
|
|
160
|
+
const neighbors = new Map<number, AnchorNeighbors>();
|
|
161
|
+
for (let i = 0; i < sorted.length; ) {
|
|
162
|
+
let j = i;
|
|
163
|
+
while (j + 1 < sorted.length && sorted[j + 1] === sorted[j] + 1) j++;
|
|
164
|
+
const start = sorted[i];
|
|
165
|
+
const end = sorted[j];
|
|
166
|
+
const before = start - 1 >= 1 && start - 1 <= lineCount ? start - 1 : undefined;
|
|
167
|
+
const after = end + 1 <= lineCount ? end + 1 : undefined;
|
|
168
|
+
for (let k = i; k <= j; k++) neighbors.set(sorted[k], { before, after });
|
|
169
|
+
i = j + 1;
|
|
170
|
+
}
|
|
171
|
+
return neighbors;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function validateDuplicateAnchorContext(
|
|
175
|
+
line: number,
|
|
176
|
+
mapped: number,
|
|
177
|
+
neighbors: AnchorNeighbors,
|
|
178
|
+
lineMap: ReadonlyMap<number, number>,
|
|
179
|
+
): boolean {
|
|
180
|
+
let checked = false;
|
|
181
|
+
const { before, after } = neighbors;
|
|
182
|
+
if (before !== undefined) {
|
|
183
|
+
checked = true;
|
|
184
|
+
if (lineMap.get(before) !== mapped - (line - before)) return false;
|
|
185
|
+
}
|
|
186
|
+
if (after !== undefined) {
|
|
187
|
+
checked = true;
|
|
188
|
+
if (lineMap.get(after) !== mapped + (after - line)) return false;
|
|
189
|
+
}
|
|
190
|
+
return checked;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
function validateUniqueAnchorContext(
|
|
194
|
+
line: number,
|
|
195
|
+
mapped: number,
|
|
196
|
+
neighbors: AnchorNeighbors,
|
|
197
|
+
lineMap: ReadonlyMap<number, number>,
|
|
198
|
+
): boolean {
|
|
199
|
+
const offset = mapped - line;
|
|
200
|
+
const { before, after } = neighbors;
|
|
201
|
+
if (after !== undefined) return lineMap.get(after) === after + offset;
|
|
202
|
+
return before !== undefined && lineMap.get(before) === before + offset;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function validateRemappedAnchorContext(
|
|
206
|
+
previousText: string,
|
|
207
|
+
currentText: string,
|
|
208
|
+
lineMap: ReadonlyMap<number, number>,
|
|
209
|
+
edits: readonly Edit[],
|
|
210
|
+
): boolean {
|
|
211
|
+
const previousLines = previousText.split("\n");
|
|
212
|
+
const currentLines = currentText.split("\n");
|
|
213
|
+
const anchorLines = new Set(collectAnchorLines(edits));
|
|
214
|
+
// Precompute once per validation pass: which line values are duplicated,
|
|
215
|
+
// and each anchor's nearest non-anchor context. The per-anchor forms —
|
|
216
|
+
// indexOf/lastIndexOf full-file scans plus directional walks across
|
|
217
|
+
// anchored ranges — are O(anchors×lines) + O(anchors²) and blow up on
|
|
218
|
+
// large block replacements.
|
|
219
|
+
const duplicatedPrevious = collectDuplicatedValues(previousLines);
|
|
220
|
+
const duplicatedCurrent = collectDuplicatedValues(currentLines);
|
|
221
|
+
const anchorNeighbors = computeAnchorNeighbors(anchorLines, previousLines.length);
|
|
222
|
+
|
|
223
|
+
for (const [line, neighbors] of anchorNeighbors) {
|
|
224
|
+
const mapped = lineMap.get(line);
|
|
225
|
+
if (mapped === undefined) return false;
|
|
226
|
+
if (!duplicatedPrevious.has(previousLines[line - 1]) && !duplicatedCurrent.has(currentLines[mapped - 1])) {
|
|
227
|
+
if (!validateUniqueAnchorContext(line, mapped, neighbors, lineMap)) {
|
|
228
|
+
return false;
|
|
229
|
+
}
|
|
230
|
+
continue;
|
|
231
|
+
}
|
|
232
|
+
if (!validateDuplicateAnchorContext(line, mapped, neighbors, lineMap)) {
|
|
233
|
+
return false;
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return true;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
function remapEditsToCurrent(previousText: string, currentText: string, edits: readonly Edit[]): Edit[] | null {
|
|
241
|
+
const lineMap = buildLineMap(previousText, currentText);
|
|
242
|
+
if (!validateRemappedAnchorContext(previousText, currentText, lineMap, edits)) return null;
|
|
243
|
+
const offsets: number[] = [];
|
|
244
|
+
|
|
245
|
+
const mapLine = (line: number): number | null => {
|
|
246
|
+
const mapped = lineMap.get(line);
|
|
247
|
+
if (mapped === undefined) return null;
|
|
248
|
+
offsets.push(mapped - line);
|
|
249
|
+
return mapped;
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
const mapAnchor = (anchor: Anchor): Anchor | null => {
|
|
253
|
+
const line = mapLine(anchor.line);
|
|
254
|
+
return line === null ? null : { line };
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const remapped: Edit[] = [];
|
|
258
|
+
for (const edit of edits) {
|
|
259
|
+
if (edit.kind === "delete") {
|
|
260
|
+
const anchor = mapAnchor(edit.anchor);
|
|
261
|
+
if (anchor === null) return null;
|
|
262
|
+
remapped.push({ ...edit, anchor });
|
|
263
|
+
continue;
|
|
264
|
+
}
|
|
265
|
+
if (edit.kind === "block") {
|
|
266
|
+
const anchor = mapAnchor(edit.anchor);
|
|
267
|
+
if (anchor === null) return null;
|
|
268
|
+
remapped.push({ ...edit, anchor });
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
let blockStart = edit.blockStart;
|
|
273
|
+
if (blockStart !== undefined) {
|
|
274
|
+
const mappedBlockStart = mapLine(blockStart);
|
|
275
|
+
if (mappedBlockStart === null) return null;
|
|
276
|
+
blockStart = mappedBlockStart;
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const cursor = edit.cursor;
|
|
280
|
+
if (cursor.kind !== "before_anchor" && cursor.kind !== "after_anchor") {
|
|
281
|
+
remapped.push(blockStart === edit.blockStart ? edit : { ...edit, blockStart });
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const anchor = mapAnchor(cursor.anchor);
|
|
286
|
+
if (anchor === null) return null;
|
|
287
|
+
remapped.push({ ...edit, cursor: { kind: cursor.kind, anchor }, blockStart });
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
if (offsets.length === 0) return null;
|
|
291
|
+
const firstOffset = offsets[0];
|
|
292
|
+
if (firstOffset === 0) return null;
|
|
293
|
+
if (!offsets.every(offset => offset === firstOffset)) return null;
|
|
294
|
+
return remapped;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
function replayRemappedAnchorsOnCurrent(
|
|
298
|
+
previousText: string,
|
|
299
|
+
currentText: string,
|
|
300
|
+
edits: readonly Edit[],
|
|
301
|
+
): RecoveryResult | null {
|
|
302
|
+
const remapped = remapEditsToCurrent(previousText, currentText, edits);
|
|
303
|
+
if (remapped === null) return null;
|
|
304
|
+
let applied: ApplyResult;
|
|
305
|
+
try {
|
|
306
|
+
applied = applyEdits(currentText, remapped);
|
|
307
|
+
} catch {
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
if (applied.text === currentText) return null;
|
|
311
|
+
return {
|
|
312
|
+
text: applied.text,
|
|
313
|
+
firstChangedLine: applied.firstChangedLine,
|
|
314
|
+
warnings: [RECOVERY_LINE_REMAP_WARNING, ...(applied.warnings ?? [])],
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
100
318
|
function replaySessionChainOnCurrent(
|
|
101
319
|
previousText: string,
|
|
102
320
|
currentText: string,
|
|
@@ -150,11 +368,15 @@ function isHeadSnapshot(head: Snapshot | null, snapshot: Snapshot): boolean {
|
|
|
150
368
|
/**
|
|
151
369
|
* Stateless recovery driver over a {@link SnapshotStore}. Construct once and
|
|
152
370
|
* call {@link Recovery.tryRecover} per stale-tag incident. The default
|
|
153
|
-
* implementation tries
|
|
371
|
+
* implementation tries three strategies in order:
|
|
154
372
|
*
|
|
155
373
|
* 1. Apply the edits on the full-file version the tag names, then 3-way-merge
|
|
156
374
|
* the resulting patch onto the live content (handles external writes).
|
|
157
|
-
* 2.
|
|
375
|
+
* 2. Remap every stale anchor through the unchanged-line diff from the tagged
|
|
376
|
+
* snapshot to the live text, then replay on live content. This handles a
|
|
377
|
+
* prior insertion/deletion before the target while refusing changed anchors
|
|
378
|
+
* and mixed offsets across the same edit range.
|
|
379
|
+
* 3. (Session chain) If that version wasn't the head, replay the edits onto
|
|
158
380
|
* the live content directly when line counts match AND every edit's anchor
|
|
159
381
|
* line content is unchanged between version and current — a prior in-session
|
|
160
382
|
* edit advanced the tag and the model's anchors still name the same logical
|
|
@@ -170,16 +392,26 @@ export class Recovery {
|
|
|
170
392
|
*/
|
|
171
393
|
tryRecover(args: RecoveryArgs): RecoveryResult | null {
|
|
172
394
|
const { path, currentText, fileHash, edits } = args;
|
|
173
|
-
|
|
395
|
+
// Collision-safe lookup: when two retained texts share the 16-bit tag
|
|
396
|
+
// there is no way to know which one the model's anchors were minted
|
|
397
|
+
// against — replaying against the wrong collider would land the edit
|
|
398
|
+
// on unrelated content. Refuse and let the caller reject (re-read).
|
|
399
|
+
const snapshot = this.store.byHashExact(path, fileHash);
|
|
174
400
|
if (!snapshot) return null;
|
|
175
401
|
const isHead = isHeadSnapshot(this.store.head(path), snapshot);
|
|
176
402
|
const recoveryWarning = isHead ? RECOVERY_EXTERNAL_WARNING : RECOVERY_SESSION_CHAIN_WARNING;
|
|
177
403
|
const merged = applyEditsToSnapshot(snapshot.text, currentText, edits, recoveryWarning);
|
|
178
404
|
if (merged !== null) return merged;
|
|
179
|
-
//
|
|
180
|
-
//
|
|
181
|
-
// anchor
|
|
182
|
-
//
|
|
405
|
+
// Line-shift fallback: the 3-way merge refused, but unchanged anchor
|
|
406
|
+
// lines may have moved because a prior edit inserted or deleted rows
|
|
407
|
+
// before them. Remap only when every anchor resolves through the diff
|
|
408
|
+
// with one consistent offset; otherwise the edit range was touched.
|
|
409
|
+
const remapped = replayRemappedAnchorsOnCurrent(snapshot.text, currentText, edits);
|
|
410
|
+
if (remapped !== null) return remapped;
|
|
411
|
+
// Session-chain fallback: replay onto current is gated by line-count
|
|
412
|
+
// equality AND anchor-content alignment — see
|
|
413
|
+
// `replaySessionChainOnCurrent` for why both guards together still
|
|
414
|
+
// don't fully prove correctness.
|
|
183
415
|
if (!isHead) return replaySessionChainOnCurrent(snapshot.text, currentText, edits);
|
|
184
416
|
return null;
|
|
185
417
|
}
|
package/src/snapshots.ts
CHANGED
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
* Producers (typically `read` / `search` / `write` tools) call
|
|
11
11
|
* {@link SnapshotStore.record} with the full normalized text they observed.
|
|
12
12
|
* The store hashes it, dedups against the per-path history, and returns the
|
|
13
|
-
* tag. Consumers (the patcher) resolve a stale tag back to the
|
|
14
|
-
* text via {@link SnapshotStore.
|
|
15
|
-
* the live content.
|
|
13
|
+
* tag. Consumers (recovery, the patcher) resolve a stale tag back to the
|
|
14
|
+
* recorded full text via {@link SnapshotStore.byHashExact} and 3-way-merge the
|
|
15
|
+
* would-be edit onto the live content.
|
|
16
16
|
*
|
|
17
17
|
* The abstract base class lets callers plug in whatever storage they like
|
|
18
18
|
* (LRU, persistent SQLite, etc.). {@link InMemorySnapshotStore} ships as a
|
|
@@ -49,16 +49,40 @@ export interface Snapshot {
|
|
|
49
49
|
|
|
50
50
|
/**
|
|
51
51
|
* Storage seam for full-file version snapshots. The patcher calls {@link head}
|
|
52
|
-
* for the latest version of a path and {@link
|
|
52
|
+
* for the latest version of a path and {@link byHashExact} when it needs the
|
|
53
53
|
* specific historical version a section's stale tag names.
|
|
54
54
|
*/
|
|
55
55
|
export abstract class SnapshotStore {
|
|
56
56
|
/** Most-recently recorded version for `path`, or `null` if none. */
|
|
57
57
|
abstract head(path: string): Snapshot | null;
|
|
58
58
|
|
|
59
|
-
/**
|
|
59
|
+
/**
|
|
60
|
+
* Recorded version for `path` whose tag equals `hash`, or `null`. When two
|
|
61
|
+
* distinct texts collide on the 16-bit tag, returns the most-recently
|
|
62
|
+
* recorded one; callers that treat the tag as content identity must use
|
|
63
|
+
* {@link byHashExact} (or verify {@link Snapshot.text} via {@link byContent}).
|
|
64
|
+
*/
|
|
60
65
|
abstract byHash(path: string, hash: string): Snapshot | null;
|
|
61
66
|
|
|
67
|
+
/**
|
|
68
|
+
* Collision-safe {@link byHash}: the single retained version for `path`
|
|
69
|
+
* whose tag equals `hash`, or `null` when none is retained OR when two or
|
|
70
|
+
* more distinct texts collide on the tag. In the collision case there is
|
|
71
|
+
* no way to know which retained text the model's line anchors were minted
|
|
72
|
+
* against, so consumers that replay anchors (recovery, previews) must
|
|
73
|
+
* refuse rather than pick one.
|
|
74
|
+
*/
|
|
75
|
+
abstract byHashExact(path: string, hash: string): Snapshot | null;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Recorded version for `path` whose {@link Snapshot.text} equals `fullText`,
|
|
79
|
+
* or `null`. Disambiguates hash collisions where two distinct file states
|
|
80
|
+
* share the same 4-hex tag: the patcher consults this before taking the
|
|
81
|
+
* no-drift path so a colliding live text is never accepted as the exact
|
|
82
|
+
* snapshot the model's line anchors were minted against.
|
|
83
|
+
*/
|
|
84
|
+
abstract byContent(path: string, fullText: string): Snapshot | null;
|
|
85
|
+
|
|
62
86
|
/**
|
|
63
87
|
* Every retained version whose tag equals `hash`, across all tracked
|
|
64
88
|
* paths. The patcher uses this to recover the intended file when a section
|
|
@@ -133,7 +157,10 @@ export interface InMemorySnapshotStoreOptions {
|
|
|
133
157
|
*
|
|
134
158
|
* Recording byte-identical content again refreshes recency and reuses the
|
|
135
159
|
* existing tag (read fusion); recording new content unshifts a fresh version
|
|
136
|
-
* onto the front of the path history.
|
|
160
|
+
* onto the front of the path history. Two distinct texts that collide on the
|
|
161
|
+
* short 4-hex tag are retained as separate versions so callers can still tell
|
|
162
|
+
* them apart via {@link Snapshot.text} — the tag is only a fast index, never
|
|
163
|
+
* the identity.
|
|
137
164
|
*/
|
|
138
165
|
export class InMemorySnapshotStore extends SnapshotStore {
|
|
139
166
|
readonly #versions: LRUCache<string, Snapshot[]>;
|
|
@@ -162,6 +189,25 @@ export class InMemorySnapshotStore extends SnapshotStore {
|
|
|
162
189
|
return history?.find(version => version.hash === hash) ?? null;
|
|
163
190
|
}
|
|
164
191
|
|
|
192
|
+
byHashExact(path: string, hash: string): Snapshot | null {
|
|
193
|
+
const history = this.#versions.get(path);
|
|
194
|
+
if (history === undefined) return null;
|
|
195
|
+
let match: Snapshot | null = null;
|
|
196
|
+
for (const version of history) {
|
|
197
|
+
if (version.hash !== hash) continue;
|
|
198
|
+
// Two retained versions with one tag are distinct texts by
|
|
199
|
+
// construction (record() dedups on full-text equality) — ambiguous.
|
|
200
|
+
if (match !== null) return null;
|
|
201
|
+
match = version;
|
|
202
|
+
}
|
|
203
|
+
return match;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
byContent(path: string, fullText: string): Snapshot | null {
|
|
207
|
+
const history = this.#versions.get(path);
|
|
208
|
+
return history?.find(version => version.text === fullText) ?? null;
|
|
209
|
+
}
|
|
210
|
+
|
|
165
211
|
findByHash(hash: string): Snapshot[] {
|
|
166
212
|
const matches: Snapshot[] = [];
|
|
167
213
|
for (const history of this.#versions.values()) {
|
|
@@ -176,7 +222,13 @@ export class InMemorySnapshotStore extends SnapshotStore {
|
|
|
176
222
|
const hash = computeFileHash(fullText);
|
|
177
223
|
// `get` refreshes LRU recency for `path`.
|
|
178
224
|
const history = this.#versions.get(path) ?? [];
|
|
179
|
-
|
|
225
|
+
// Dedup requires full-text equality, not just tag equality: two distinct
|
|
226
|
+
// texts that happen to share the 4-hex tag are DIFFERENT snapshots — fusing
|
|
227
|
+
// them under one entry would corrupt seenLines (attaching lines from
|
|
228
|
+
// text B onto the stored text A) and let the patcher misresolve which
|
|
229
|
+
// snapshot the section tag names when it does 3-way merge or seen-line
|
|
230
|
+
// validation. See issue #4075.
|
|
231
|
+
const existing = history.find(version => version.hash === hash && version.text === fullText);
|
|
180
232
|
if (existing) {
|
|
181
233
|
// Same content state observed again: refresh recency and promote to
|
|
182
234
|
// head (it is the current file content), then reuse the tag. Union any
|