pi-hashline-edit-pro 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -8
- package/package.json +1 -1
- package/prompts/edit-snippet.md +1 -1
- package/prompts/edit.md +14 -14
- package/prompts/read-guidelines.md +2 -2
- package/prompts/read-snippet.md +1 -1
- package/prompts/read.md +8 -8
- package/src/edit-diff.ts +2 -2
- package/src/edit-response.ts +2 -2
- package/src/edit.ts +3 -3
- package/src/hashline/hash.ts +18 -9
- package/src/hashline/index.ts +7 -4
- package/src/hashline/parse.ts +14 -11
- package/src/hashline/resolve.ts +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# pi-hashline-edit-pro
|
|
2
2
|
|
|
3
|
-
A [pi-coding-agent](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent) extension that replaces the built-in `read` and `edit` tools with a hash-anchored line-editing workflow. **Strict semantics** — no silent relocation, no autocorrection, no fuzzy fallback. **Higher-entropy anchors** — 4-character content hashes over a 64-character URL-safe base64 alphabet (24 bits / 16 777 216 buckets) so birthday-paradox collisions are effectively zero in any realistic file.
|
|
3
|
+
A [pi-coding-agent](https://github.com/badlogic/pi-mono/tree/main/packages/coding-agent) extension that replaces the built-in `read` and `edit` tools with a hash-anchored line-editing workflow. **Strict semantics** — no silent relocation, no autocorrection, no fuzzy fallback. **Higher-entropy anchors** — `#`-prefixed 4-character content hashes over a 64-character URL-safe base64 alphabet (24 bits / 16 777 216 buckets) so birthday-paradox collisions are effectively zero in any realistic file.
|
|
4
4
|
|
|
5
5
|
This is a fork of [pi-hashline-edit](https://github.com/RimuruW/pi-hashline-edit) by RimuruW. The strict-semantics policy is unchanged. This fork extends the upstream design in two compounding ways: a 4-character hash length and an occurrence-aware discriminator that makes identical content at different positions hash to different values.
|
|
6
6
|
|
|
@@ -29,7 +29,7 @@ pi install /path/to/pi-hashline-edit-pro
|
|
|
29
29
|
|
|
30
30
|
### `read` — tagged line output
|
|
31
31
|
|
|
32
|
-
Text files are returned with a
|
|
32
|
+
Text files are returned with a `#HASH:content` prefix on every line. The line number is no longer part of the wire format — only the `#`-prefixed 4-character hash followed by the line content. Example output for the source below; the hashes are the real xxHash-derived values for the file content shown:
|
|
33
33
|
|
|
34
34
|
```js
|
|
35
35
|
function hello() {
|
|
@@ -40,12 +40,12 @@ function hello() {
|
|
|
40
40
|
would be returned as:
|
|
41
41
|
|
|
42
42
|
```text
|
|
43
|
-
0qH3:function hello() {
|
|
44
|
-
szJr: console.log("world");
|
|
45
|
-
_zlP:}
|
|
43
|
+
#0qH3:function hello() {
|
|
44
|
+
#szJr: console.log("world");
|
|
45
|
+
#_zlP:}
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
-
- `HASH` — 4-character content hash from the URL-safe base64 alphabet `A-Za-z0-9-_
|
|
48
|
+
- `HASH` — `#`-prefixed 4-character content hash from the URL-safe base64 alphabet `A-Za-z0-9-_` (e.g. `#aB3x`).
|
|
49
49
|
|
|
50
50
|
Optional parameters:
|
|
51
51
|
|
|
@@ -105,7 +105,7 @@ The post-edit diff (with `+`/`-` markers and new `HASH:content` anchors) is expo
|
|
|
105
105
|
|
|
106
106
|
## Hashing
|
|
107
107
|
|
|
108
|
-
Hashes are computed with [xxhashjs](https://github.com/pierrec/js-xxhash) (xxHash32), then mapped to a 4-character string from the URL-safe base64 alphabet `A-Za-z0-9-_` — 64 distinct characters, 6 bits per position, **24 bits of entropy per anchor**.
|
|
108
|
+
Hashes are computed with [xxhashjs](https://github.com/pierrec/js-xxhash) (xxHash32), then mapped to a `#`-prefixed 4-character string from the URL-safe base64 alphabet `A-Za-z0-9-_` — 64 distinct characters, 6 bits per position, **24 bits of entropy per anchor**.
|
|
109
109
|
|
|
110
110
|
The alphabet is sized for an LLM consumer. The model tokenizes — it doesn't squint at pixel glyphs — so the human-readability heuristics used by smaller hand-curated alphabets (no G/L/I/O because they look like digits, no vowels so the hash doesn't accidentally spell a word, no hex digits so it can't be confused with `0xFF`) don't apply. The full 64 chars give maximum entropy per character, with case and digits included.
|
|
111
111
|
|
|
@@ -120,7 +120,7 @@ The runtime always precomputes the full per-line hash array for a file via `comp
|
|
|
120
120
|
|
|
121
121
|
### Trade-off: the bare-prefix detector
|
|
122
122
|
|
|
123
|
-
With
|
|
123
|
+
With the `#` prefix format, the bare-prefix detector regex `^\s*#[A-Za-z0-9_-]{4}:` is highly specific — it only matches lines starting with `#` followed by exactly 4 base64 chars and `:`. This eliminates false positives from common code patterns like `init:`, `data:`, `else:`, etc. that plagued the old 4-char-only detector. The detector rejects edit lines matching this pattern with `[E_BARE_HASH_PREFIX]` to prevent the model from accidentally pasting hash anchors into file content.
|
|
124
124
|
|
|
125
125
|
## Development
|
|
126
126
|
|
package/package.json
CHANGED
package/prompts/edit-snippet.md
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
Edit a text file via
|
|
1
|
+
Edit a text file via #HASH anchors from read
|
package/prompts/edit.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
Patch a text file using
|
|
1
|
+
Patch a text file using `#HASH` anchors copied verbatim from `read`.
|
|
2
2
|
|
|
3
3
|
Put all operations on one file in a single `edit` call. Stack every region into the `edits` array, even when they are far apart. Anchors within one call must all come from the same pre-edit read; the runtime applies them atomically against that one snapshot, so you do not adjust anchors for line-number shifts between edits in the same call.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Anchors are `#` + 4 characters (e.g. `#aB3x`), alphabet `A-Za-z0-9-_`. The wire format for `start`/`end`/`pos` is the anchor only — no line number, no trailing content, no line content.
|
|
6
6
|
|
|
7
7
|
Ops:
|
|
8
8
|
- `replace` — replace the inclusive range `start`..`end`. Both anchors are required. Single line: `start = end`. To delete a range, use `lines: []`. Do NOT use the `pos` field on `replace`; use `start`.
|
|
@@ -14,45 +14,45 @@ Examples:
|
|
|
14
14
|
1. Single line replace:
|
|
15
15
|
```json
|
|
16
16
|
{ "path": "src/main.ts", "edits": [
|
|
17
|
-
{ "op": "replace", "start": "MQXV", "end": "MQXV", "lines": ["const x = 1;"] }
|
|
17
|
+
{ "op": "replace", "start": "#MQXV", "end": "#MQXV", "lines": ["const x = 1;"] }
|
|
18
18
|
] }
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
2. Range replace (3 lines → 3 new lines):
|
|
22
22
|
```json
|
|
23
23
|
{ "path": "src/main.ts", "edits": [
|
|
24
|
-
{ "op": "replace", "start": "ZPMQ", "end": "VRWS", "lines": [
|
|
24
|
+
{ "op": "replace", "start": "#ZPMQ", "end": "#VRWS", "lines": [
|
|
25
25
|
"function greet(name) {",
|
|
26
26
|
" return `Hello, ${name}`;",
|
|
27
27
|
"}"
|
|
28
|
-
|
|
28
|
+
}
|
|
29
29
|
] }
|
|
30
30
|
```
|
|
31
31
|
|
|
32
32
|
3. Multiple regions in one call (delete two non-adjacent ranges, insert before a third anchor):
|
|
33
33
|
```json
|
|
34
34
|
{ "path": "src/server.ts", "edits": [
|
|
35
|
-
{ "op": "replace", "start": "aB3x", "end": "xY7q", "lines": [] },
|
|
36
|
-
{ "op": "replace", "start": "MQXV", "end": "ZPMQ", "lines": [] },
|
|
37
|
-
{ "op": "prepend", "pos": "VRWS", "lines": ["// inserted before VRWS"] }
|
|
35
|
+
{ "op": "replace", "start": "#aB3x", "end": "#xY7q", "lines": [] },
|
|
36
|
+
{ "op": "replace", "start": "#MQXV", "end": "#ZPMQ", "lines": [] },
|
|
37
|
+
{ "op": "prepend", "pos": "#VRWS", "lines": ["// inserted before VRWS"] }
|
|
38
38
|
] }
|
|
39
39
|
```
|
|
40
40
|
|
|
41
41
|
Rules:
|
|
42
|
-
- `replace` requires both `start` and `end`. A single-line replace is `start=X, end=X`. To replace more than one line, set `end` to a different line's
|
|
43
|
-
- `start`, `end`, `pos` are
|
|
44
|
-
- `lines` is literal file content. No
|
|
42
|
+
- `replace` requires both `start` and `end`. A single-line replace is `start=X, end=X`. To replace more than one line, set `end` to a different line's anchor.
|
|
43
|
+
- `start`, `end`, `pos` are HASH anchors only (e.g. `#aB3x`). Other forms are rejected with `[E_BAD_REF]`.
|
|
44
|
+
- `lines` is literal file content. No `#HASH:` prefix, no leading `+`/`-` (those are read/diff metadata, not file content). Lines starting with `#` + 4 base64 chars + `:` are checked; if detected, the edit is rejected with `[E_BARE_HASH_PREFIX]`. For `.py` files, this becomes a `[W_BARE_HASH_PREFIX]` warning instead (Python syntax like `else:`, `except:` triggers the detector).
|
|
45
45
|
- Copy anchors from the most recent `read` of the file. Do not guess or construct them.
|
|
46
46
|
- All edits in one call must be non-conflicting. The runtime rejects with `[E_EDIT_CONFLICT]` if: two `replace` ranges overlap; two `append`/`prepend` target the same insertion boundary (e.g. two EOF appends on a newline-terminated file); or an `append`/`prepend` falls inside a `replace` range in the same call. Fix: merge into one, use different boundaries, or split into a follow-up `edit` call.
|
|
47
47
|
- If `lines` matches the current content byte-for-byte, the edit is classified as `Classification: noop` (file unchanged, not an error).
|
|
48
48
|
|
|
49
|
-
On success (`changed` mode, default), the response text contains an `--- Anchors ---` block with fresh
|
|
49
|
+
On success (`changed` mode, default), the response text contains an `--- Anchors ---` block with fresh `#HASH:content` for the changed region (2 lines of context, capped at ~12 lines / 50 KB). Use those for nearby follow-up edits instead of re-reading. If the response says `Anchors omitted; use read for subsequent edits`, the region was too large — call `read` again. For distant follow-ups, or on any error, call `read` again. `full` and `ranges` modes put previews in `details`; the model only needs what's in the text.
|
|
50
50
|
|
|
51
|
-
Errors are text starting with a bracketed code (e.g. `[E_BAD_SHAPE]`, `[E_STALE_ANCHOR]`, `[E_BAD_OP]`, `[E_INVALID_PATCH]`, `[E_LEGACY_SHAPE]`, `[E_EDIT_CONFLICT]`, `[E_BAD_REF]`, `[E_AMBIGUOUS_ANCHOR]`, `[E_BARE_HASH_PREFIX]`, `[E_WOULD_EMPTY]`). The message tells you what to retry; stale-anchor errors include `>>> HASH:content` lines, ready to copy.
|
|
51
|
+
Errors are text starting with a bracketed code (e.g. `[E_BAD_SHAPE]`, `[E_STALE_ANCHOR]`, `[E_BAD_OP]`, `[E_INVALID_PATCH]`, `[E_LEGACY_SHAPE]`, `[E_EDIT_CONFLICT]`, `[E_BAD_REF]`, `[E_AMBIGUOUS_ANCHOR]`, `[E_BARE_HASH_PREFIX]`, `[E_WOULD_EMPTY]`). The message tells you what to retry; stale-anchor errors include `>>> #HASH:content` lines, ready to copy.
|
|
52
52
|
|
|
53
53
|
The legacy `oldText`/`newText` shape (top-level or as `op: "replace_text"`) is rejected with `[E_LEGACY_SHAPE]`. Use hash-anchored edits instead.
|
|
54
54
|
|
|
55
55
|
Auto-read after write:
|
|
56
|
-
- After a successful `write`, the result includes a `--- Auto-read (hashline anchors) ---` block with HASH:content for the written file.
|
|
56
|
+
- After a successful `write`, the result includes a `--- Auto-read (hashline anchors) ---` block with `#HASH:content` for the written file.
|
|
57
57
|
- Use those anchors directly for `edit` calls without a separate `read`.
|
|
58
58
|
- This enables a seamless write → edit workflow with no extra tool calls.
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
- Use read before edit when you do not have current HASH anchors for the file.
|
|
2
|
-
- Copy exactly the
|
|
3
|
-
- A HASH
|
|
2
|
+
- Copy exactly the HASH (the `#` + 4 characters before the `:`); never include the `:` or line content in `pos`/`end`.
|
|
3
|
+
- A HASH always starts with `#`; the body may contain `-` as a normal alphabet character.
|
package/prompts/read-snippet.md
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
Read a text file with HASH:content anchors for edit (copy the HASH
|
|
1
|
+
Read a text file with #HASH:content anchors for edit (copy the #HASH into `start`/`end`/`pos`)
|
package/prompts/read.md
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
Read a text file. Each line is returned as
|
|
1
|
+
Read a text file. Each line is returned as `#HASH:content`. The HASH starts with `#` followed by 4 base64 characters before the first `:`; the content after is the line verbatim. Pass the HASH (e.g. `#aB3x`) into `edit`'s `start`/`end` (for `replace`) or `pos` (for `append`/`prepend`) — never include the line content.
|
|
2
2
|
|
|
3
3
|
HASH shape:
|
|
4
|
-
-
|
|
4
|
+
- 5 characters total: `#` prefix + 4 characters from the URL-safe base64 alphabet `A-Za-z0-9-_` (e.g. `#aB3x`, `#4yN-`, `#-qkl`).
|
|
5
5
|
- The line number is not part of the wire format. Anchor by HASH, never by reading a line number off the rendered output.
|
|
6
6
|
|
|
7
7
|
HASH → edit:
|
|
8
|
-
- Copy
|
|
9
|
-
- Do not include the `:`, the line content, or surrounding whitespace. The wire format for `start`/`end`/`pos` is the
|
|
8
|
+
- Copy the full 5-character HASH (including the `#` prefix). Use that HASH as `start` or `end` (for `replace`) or `pos` (for `append`/`prepend`) in the next `edit` call.
|
|
9
|
+
- Do not include the `:`, the line content, or surrounding whitespace. The wire format for `start`/`end`/`pos` is the HASH only.
|
|
10
10
|
|
|
11
11
|
Pagination:
|
|
12
12
|
- Large files return a truncated preview with a `nextOffset` line. Call `read` again with `offset=nextOffset` to continue.
|
|
@@ -14,15 +14,15 @@ Pagination:
|
|
|
14
14
|
- Empty files return an advisory suggesting `prepend`/`append` instead of a synthetic anchor.
|
|
15
15
|
|
|
16
16
|
Error recovery:
|
|
17
|
-
- `[E_STALE_ANCHOR]` — the file changed since your last read. The error includes fresh `>>> HASH:content` lines; copy the HASH portion (4 chars before `:`) and retry.
|
|
18
|
-
- `[E_BAD_REF]` — malformed HASH. Re-read and try again with a valid
|
|
17
|
+
- `[E_STALE_ANCHOR]` — the file changed since your last read. The error includes fresh `>>> #HASH:content` lines; copy the HASH portion (the `#` + 4 chars before `:`) and retry.
|
|
18
|
+
- `[E_BAD_REF]` — malformed HASH. Re-read and try again with a valid HASH anchor (e.g. `#aB3x`).
|
|
19
19
|
|
|
20
20
|
File kinds:
|
|
21
|
-
- Text files are returned as
|
|
21
|
+
- Text files are returned as `#HASH:content` lines.
|
|
22
22
|
- Images (JPEG, PNG, GIF, WebP) are returned as visual attachments; the HASH-line protocol does not apply.
|
|
23
23
|
- Binary files and directories are rejected with a descriptive error.
|
|
24
24
|
|
|
25
25
|
Auto-read after write:
|
|
26
|
-
- After a successful `write`, the result includes a `--- Auto-read (hashline anchors) ---` block with HASH:content for the written file.
|
|
26
|
+
- After a successful `write`, the result includes a `--- Auto-read (hashline anchors) ---` block with `#HASH:content` for the written file.
|
|
27
27
|
- Use those anchors directly for `edit` calls without a separate `read`.
|
|
28
28
|
- The auto-read output follows the same format and rules as `read` output.
|
package/src/edit-diff.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as Diff from "diff";
|
|
2
2
|
import {
|
|
3
3
|
computeLineHashes,
|
|
4
|
-
|
|
4
|
+
ANCHOR_LENGTH,
|
|
5
5
|
} from "./hashline";
|
|
6
6
|
|
|
7
7
|
// ─── Line ending normalization ──────────────────────────────────────────
|
|
@@ -41,7 +41,7 @@ function formatDiffPreviewLine(
|
|
|
41
41
|
// Removed lines have no hash, but they still need column alignment with
|
|
42
42
|
// the hash-prefixed lines (` HASH:`, `+HASH:`). Pad with `HASH_LENGTH`
|
|
43
43
|
// spaces so the `:` lines up in the same column.
|
|
44
|
-
return `${prefix}${" ".repeat(
|
|
44
|
+
return `${prefix}${" ".repeat(ANCHOR_LENGTH)}:${line}`;
|
|
45
45
|
}
|
|
46
46
|
return `${prefix}${hash}:${line}`;
|
|
47
47
|
}
|
package/src/edit-response.ts
CHANGED
|
@@ -194,7 +194,7 @@ function truncateOutlineEntry(text: string, max = 88): string {
|
|
|
194
194
|
function collectOutlineEntries(previewText: string): string[] {
|
|
195
195
|
const structural: string[] = [];
|
|
196
196
|
for (const line of previewText.split("\n")) {
|
|
197
|
-
const match = line.match(/^\s
|
|
197
|
+
const match = line.match(/^\s*#([A-Za-z0-9_\-]{4}):(.*)$/);
|
|
198
198
|
if (!match) continue;
|
|
199
199
|
const content = match[2]!.trim();
|
|
200
200
|
if (content.length === 0) continue;
|
|
@@ -250,7 +250,7 @@ function formatRequestedRangePreviews(
|
|
|
250
250
|
},
|
|
251
251
|
precomputedHashes,
|
|
252
252
|
);
|
|
253
|
-
const hasReturnedLines = /^\s
|
|
253
|
+
const hasReturnedLines = /^\s*#[A-Za-z0-9_\-]{4}:/m.test(preview.text);
|
|
254
254
|
const actualEnd = hasReturnedLines
|
|
255
255
|
? preview.nextOffset !== undefined
|
|
256
256
|
? preview.nextOffset - 1
|
package/src/edit.ts
CHANGED
|
@@ -92,19 +92,19 @@ const hashlineEditItemSchema = Type.Object(
|
|
|
92
92
|
start: Type.Optional(
|
|
93
93
|
Type.String({
|
|
94
94
|
description:
|
|
95
|
-
"required range-start anchor for op \"replace\" (
|
|
95
|
+
"required range-start anchor for op \"replace\" (hash anchor like \"#aB3x\" copied from read output); no content may follow the anchor",
|
|
96
96
|
}),
|
|
97
97
|
),
|
|
98
98
|
end: Type.Optional(
|
|
99
99
|
Type.String({
|
|
100
100
|
description:
|
|
101
|
-
"required range-end anchor for op \"replace\" (
|
|
101
|
+
"required range-end anchor for op \"replace\" (hash anchor like \"#aB3x\"). To replace a single line, set start = end = the line's anchor",
|
|
102
102
|
}),
|
|
103
103
|
),
|
|
104
104
|
pos: Type.Optional(
|
|
105
105
|
Type.String({
|
|
106
106
|
description:
|
|
107
|
-
"anchor for op \"append\" or \"prepend\" (
|
|
107
|
+
"anchor for op \"append\" or \"prepend\" (hash anchor like \"#aB3x\"). Omit for file-boundary insertion (EOF/BOF).",
|
|
108
108
|
}),
|
|
109
109
|
),
|
|
110
110
|
lines: Type.Optional(hashlineEditLinesSchema),
|
package/src/hashline/hash.ts
CHANGED
|
@@ -23,6 +23,13 @@ import * as XXH from "xxhashjs";
|
|
|
23
23
|
*/
|
|
24
24
|
export const HASH_LENGTH = 4;
|
|
25
25
|
|
|
26
|
+
/** Prefix marker for hash anchors. Every anchor starts with `#` so the hash */
|
|
27
|
+
/** format is `#` + HASH_LENGTH base64 chars (e.g. `#aB3x`, `#4yN-`). */
|
|
28
|
+
export const HASH_PREFIX = "#";
|
|
29
|
+
|
|
30
|
+
/** Total wire-format length of an anchor: prefix + hash body. */
|
|
31
|
+
export const ANCHOR_LENGTH = HASH_PREFIX.length + HASH_LENGTH;
|
|
32
|
+
|
|
26
33
|
/**
|
|
27
34
|
* URL-safe base64 alphabet: A–Z, a–z, 0–9, `-`, `_`. 64 distinct chars
|
|
28
35
|
* giving 6 bits per hash character. No exclusions, no human-readability
|
|
@@ -40,7 +47,7 @@ const HASH_ALPHABET_MASK = (1 << HASH_ALPHABET_BITS) - 1;
|
|
|
40
47
|
// silently swallows the literal `-`). The `_` is always literal.
|
|
41
48
|
const HASH_ALPHABET_REGEX_SAFE = HASH_ALPHABET.replace(/-/g, "\\-");
|
|
42
49
|
const HASH_ALPHABET_RE = new RegExp(`^[${HASH_ALPHABET_REGEX_SAFE}]+$`);
|
|
43
|
-
export const HASH_CHARS_CLASS =
|
|
50
|
+
export const HASH_CHARS_CLASS = `${HASH_PREFIX}[${HASH_ALPHABET_REGEX_SAFE}]{${HASH_LENGTH}}`;
|
|
44
51
|
|
|
45
52
|
/**
|
|
46
53
|
* Encode the top `HASH_LENGTH * 6` bits of a 32-bit hash value as a
|
|
@@ -68,7 +75,7 @@ function hashToString(h: number): string {
|
|
|
68
75
|
HASH_ALPHABET_MASK
|
|
69
76
|
]!;
|
|
70
77
|
}
|
|
71
|
-
return out;
|
|
78
|
+
return HASH_PREFIX + out;
|
|
72
79
|
}
|
|
73
80
|
|
|
74
81
|
/**
|
|
@@ -85,16 +92,16 @@ export const HASHLINE_PREFIX_PLUS_RE = new RegExp(
|
|
|
85
92
|
export const DIFF_MINUS_RE = /^-\s*\d+\s{4}/;
|
|
86
93
|
|
|
87
94
|
/**
|
|
88
|
-
* Bare hashline prefix: a HASH_LENGTH-char hash followed by ":" with
|
|
89
|
-
* "LINE#" part (e.g. "KKZ:### heading", "TPN:text", "TJZ:"). Capture
|
|
90
|
-
* group 1 is the
|
|
95
|
+
* Bare hashline prefix: a `#` + HASH_LENGTH-char hash followed by ":" with
|
|
96
|
+
* no "LINE#" part (e.g. "#KKZ:### heading", "#TPN:text", "#TJZ:"). Capture
|
|
97
|
+
* group 1 is the full anchor (including `#` prefix).
|
|
91
98
|
*
|
|
92
99
|
* This is the partial-hash failure mode from issue #24: the model copies a
|
|
93
100
|
* hash it saw in `read` output into the line content but drops the rest
|
|
94
|
-
* of the rendered `HASH:content` form. The
|
|
95
|
-
*
|
|
96
|
-
*
|
|
97
|
-
*
|
|
101
|
+
* of the rendered `HASH:content` form. The anchor (prefix + HASH_LENGTH chars
|
|
102
|
+
* + ":") is matched by this regex, then `assertNoBareHashPrefixLines` rejects
|
|
103
|
+
* the edit with `[E_BARE_HASH_PREFIX]` so the model gets actionable feedback
|
|
104
|
+
* instead of a silent correctness bug.
|
|
98
105
|
*/
|
|
99
106
|
export const HASHLINE_BARE_PREFIX_RE = new RegExp(`^\\s*(${HASH_CHARS_CLASS}):`);
|
|
100
107
|
|
|
@@ -182,7 +189,9 @@ export function computeLineHash(idx: number, line: string): string {
|
|
|
182
189
|
|
|
183
190
|
/** Exported for tests and for downstream tools that want to mirror the format. */
|
|
184
191
|
export const HASH_FORMAT = {
|
|
192
|
+
prefix: HASH_PREFIX,
|
|
185
193
|
length: HASH_LENGTH,
|
|
194
|
+
anchorLength: ANCHOR_LENGTH,
|
|
186
195
|
bitsPerChar: HASH_ALPHABET_BITS,
|
|
187
196
|
alphabet: HASH_ALPHABET,
|
|
188
197
|
};
|
package/src/hashline/index.ts
CHANGED
|
@@ -6,15 +6,16 @@
|
|
|
6
6
|
*
|
|
7
7
|
* This fork preserves the strict semantics of the original (no silent
|
|
8
8
|
* relocation, no autocorrection heuristics, no fuzzy fallback) and uses a
|
|
9
|
-
*
|
|
9
|
+
* `#`-prefixed hash over a 64-character URL-safe base64 alphabet, giving
|
|
10
10
|
* 24 bits of entropy (16 777 216 buckets) per anchor. Birthday-paradox
|
|
11
11
|
* collisions become effectively zero for any realistic file size. The
|
|
12
12
|
* alphabet is sized for an LLM consumer, not a human reader — the model
|
|
13
13
|
* tokenizes, it does not squint at pixel glyphs.
|
|
14
14
|
*
|
|
15
|
-
* Anchor format:
|
|
16
|
-
* part of the wire format, and no content may follow the
|
|
17
|
-
* model never has to type a line number; the runtime resolves each
|
|
15
|
+
* Anchor format: `#` prefix + 4 base64 chars (e.g. `#aB3x`). The line number
|
|
16
|
+
* is no longer part of the wire format, and no content may follow the anchor
|
|
17
|
+
* either. The model never has to type a line number; the runtime resolves each
|
|
18
|
+
* anchor to a line via the file's precomputed hash array.
|
|
18
19
|
* a line via the file's precomputed hash array.
|
|
19
20
|
*
|
|
20
21
|
* On a hash collision (two different lines happen to have the same hash
|
|
@@ -30,6 +31,8 @@
|
|
|
30
31
|
export {
|
|
31
32
|
// Hash computation
|
|
32
33
|
HASH_LENGTH,
|
|
34
|
+
HASH_PREFIX,
|
|
35
|
+
ANCHOR_LENGTH,
|
|
33
36
|
HASH_FORMAT,
|
|
34
37
|
HASH_CHARS_CLASS,
|
|
35
38
|
HASHLINE_PREFIX_RE,
|
package/src/hashline/parse.ts
CHANGED
|
@@ -6,7 +6,8 @@
|
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import {
|
|
9
|
-
|
|
9
|
+
ANCHOR_LENGTH,
|
|
10
|
+
HASH_PREFIX,
|
|
10
11
|
HASH_ALPHABET_RE,
|
|
11
12
|
HASH_CHARS_CLASS,
|
|
12
13
|
HASHLINE_PREFIX_PLUS_RE,
|
|
@@ -28,30 +29,32 @@ function diagnoseHashRef(ref: string): string {
|
|
|
28
29
|
const trimmed = ref.trim();
|
|
29
30
|
|
|
30
31
|
if (!trimmed.length) {
|
|
31
|
-
return `[E_BAD_REF] Invalid anchor. Expected a
|
|
32
|
+
return `[E_BAD_REF] Invalid anchor. Expected a hash anchor like "#aB3x" (prefix "#" + 4 base64 chars).`;
|
|
32
33
|
}
|
|
33
34
|
|
|
34
35
|
// Detect the legacy "LINE#HASH" form (5#aB3x, 12#MQ, etc.) so we can
|
|
35
36
|
// give a clear error pointing at the new format.
|
|
36
37
|
if (/^\d+\s*#/.test(trimmed)) {
|
|
37
|
-
return `[E_BAD_REF] Invalid anchor. Use the hash alone (e.g. "aB3x") — no line numbers or trailing content.`;
|
|
38
|
+
return `[E_BAD_REF] Invalid anchor. Use the hash alone (e.g. "#aB3x") — no line numbers or trailing content.`;
|
|
38
39
|
}
|
|
39
40
|
|
|
40
|
-
return `[E_BAD_REF] Invalid anchor "${trimmed}". Expected a
|
|
41
|
+
return `[E_BAD_REF] Invalid anchor "${trimmed}". Expected a hash anchor like "#aB3x".`;
|
|
41
42
|
}
|
|
42
43
|
|
|
43
44
|
function parseAnchorRef(ref: string): Anchor {
|
|
44
45
|
const trimmed = ref.trim();
|
|
45
46
|
|
|
46
|
-
// Strict: the wire format is
|
|
47
|
+
// Strict: the wire format is `#` + 4-character hash from the URL-safe base64
|
|
47
48
|
// alphabet (A-Za-z0-9-_), copied verbatim from `read` output. The first
|
|
48
|
-
// character can be `-` (a valid alphabet char), so
|
|
49
|
-
// taken literally. No other form is tolerated: `+`/`-`/`>>>`
|
|
50
|
-
// diff contexts or stale-anchor retry blocks are rejected. The
|
|
51
|
-
// copy just the 4
|
|
49
|
+
// character of the hash body can be `-` (a valid alphabet char), so an anchor
|
|
50
|
+
// like `#-qkl` is taken literally. No other form is tolerated: `+`/`-`/`>>>`
|
|
51
|
+
// markers from diff contexts or stale-anchor retry blocks are rejected. The
|
|
52
|
+
// model must copy just the anchor (prefix + 4 chars) with no surrounding
|
|
53
|
+
// characters.
|
|
52
54
|
if (
|
|
53
|
-
trimmed.length ===
|
|
54
|
-
|
|
55
|
+
trimmed.length === ANCHOR_LENGTH &&
|
|
56
|
+
trimmed.startsWith(HASH_PREFIX) &&
|
|
57
|
+
HASH_ALPHABET_RE.test(trimmed.slice(HASH_PREFIX.length))
|
|
55
58
|
) {
|
|
56
59
|
return { hash: trimmed };
|
|
57
60
|
}
|
package/src/hashline/resolve.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import { throwIfAborted } from "../runtime";
|
|
10
|
-
import {
|
|
10
|
+
import { HASHLINE_BARE_PREFIX_RE } from "./hash";
|
|
11
11
|
import { parseHashRef, hashlineParseText, type Anchor } from "./parse";
|
|
12
12
|
|
|
13
13
|
// ─── Types ──────────────────────────────────────────────────────────────
|