hash-edit 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hash_edit-0.1.0/LICENSE +21 -0
- hash_edit-0.1.0/PKG-INFO +88 -0
- hash_edit-0.1.0/README.md +69 -0
- hash_edit-0.1.0/pyproject.toml +55 -0
- hash_edit-0.1.0/src/hash_edit/__init__.py +47 -0
- hash_edit-0.1.0/src/hash_edit/core.py +532 -0
- hash_edit-0.1.0/src/hash_edit/errors.py +43 -0
- hash_edit-0.1.0/src/hash_edit/py.typed +0 -0
hash_edit-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Jan Siml
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
hash_edit-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hash-edit
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Line-hash anchored read/edit/write for AI coding agents — windowed reads, strict version checks, atomic writes
|
|
5
|
+
Keywords: llm,ai,agent,file-editing,hashline
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Project-URL: Homepage, https://github.com/jansiml/hash-edit
|
|
17
|
+
Project-URL: Repository, https://github.com/jansiml/hash-edit
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# hash-edit (Python)
|
|
21
|
+
|
|
22
|
+
Line-hash anchored read/edit/write for AI coding agents — windowed reads, strict version checks, atomic writes.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install hash-edit
|
|
28
|
+
# or
|
|
29
|
+
uv add hash-edit
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## Quick start
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from hash_edit import HashEditHarness
|
|
36
|
+
|
|
37
|
+
h = HashEditHarness("/path/to/project")
|
|
38
|
+
|
|
39
|
+
# 1. Read — returns rendered lines with hashes and a full-file version digest
|
|
40
|
+
result = h.read("src/app.py")
|
|
41
|
+
# result["version"] == "4a3f..."
|
|
42
|
+
# result["lines"] == ["1:ab|import os", "2:cd|", "3:ef|def main(): ..."]
|
|
43
|
+
# result["total_lines"] == 3
|
|
44
|
+
|
|
45
|
+
# 2. Edit — anchored ops verified against expected_version
|
|
46
|
+
h.edit(
|
|
47
|
+
"src/app.py",
|
|
48
|
+
[
|
|
49
|
+
{"op": "replace", "line": 3, "hash": "ef", "lines": ["def main():", " pass"]},
|
|
50
|
+
],
|
|
51
|
+
expected_version=result["version"],
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# 3. Write — safely create or overwrite; expected_version required when overwriting
|
|
55
|
+
h.write("src/new_file.py", "# new content\n")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Windowed reads
|
|
59
|
+
|
|
60
|
+
For large files, pass `start_line` / `end_line` to read only the lines you need (saves 55–87% tokens):
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
result = h.read("src/app.py", start_line=100, end_line=150)
|
|
64
|
+
# result["lines"] — 51 rendered lines, numbered 100–150
|
|
65
|
+
# result["total_lines"] — full file length (e.g. 1 200)
|
|
66
|
+
# result["version"] — full-file digest, same as a whole-file read
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
The `version` is always the full-file blake2s digest, so a windowed read still gives you a valid `expected_version` to pass to `edit()`.
|
|
70
|
+
|
|
71
|
+
## Error classes
|
|
72
|
+
|
|
73
|
+
| Error | When it is raised |
|
|
74
|
+
|---|---|
|
|
75
|
+
| `VersionConflictError` | The file changed between your last `read()` and the current `edit()` or `write()` |
|
|
76
|
+
| `AnchorMismatchError` | A `hash` field does not match the current line; the error message includes updated rendered context so the agent can retry immediately |
|
|
77
|
+
| `InvalidOperationError` | The edit payload is structurally invalid (missing field, out-of-range line, no-op replace, etc.) |
|
|
78
|
+
| `PathEscapeError` | The requested path resolves outside the configured root directory |
|
|
79
|
+
| `MixedNewlineError` | The file mixes newline styles (`\n`, `\r\n`, `\r`) and is rejected before mutation |
|
|
80
|
+
| `FileEncodingError` | The file cannot be decoded with the configured encoding (UTF-8 by default) |
|
|
81
|
+
|
|
82
|
+
All six inherit from `HashEditError`.
|
|
83
|
+
|
|
84
|
+
## Further reading
|
|
85
|
+
|
|
86
|
+
- [Root README](https://github.com/jansiml/hash-edit#readme) — project overview, TypeScript package, and benchmarks
|
|
87
|
+
- [`docs/spec.md`](https://github.com/jansiml/hash-edit/blob/main/docs/spec.md) — full protocol specification
|
|
88
|
+
- [`docs/INTERFACE_DESIGN.md`](https://github.com/jansiml/hash-edit/blob/main/docs/INTERFACE_DESIGN.md) — design rationale
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# hash-edit (Python)
|
|
2
|
+
|
|
3
|
+
Line-hash anchored read/edit/write for AI coding agents — windowed reads, strict version checks, atomic writes.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install hash-edit
|
|
9
|
+
# or
|
|
10
|
+
uv add hash-edit
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Quick start
|
|
14
|
+
|
|
15
|
+
```python
|
|
16
|
+
from hash_edit import HashEditHarness
|
|
17
|
+
|
|
18
|
+
h = HashEditHarness("/path/to/project")
|
|
19
|
+
|
|
20
|
+
# 1. Read — returns rendered lines with hashes and a full-file version digest
|
|
21
|
+
result = h.read("src/app.py")
|
|
22
|
+
# result["version"] == "4a3f..."
|
|
23
|
+
# result["lines"] == ["1:ab|import os", "2:cd|", "3:ef|def main(): ..."]
|
|
24
|
+
# result["total_lines"] == 3
|
|
25
|
+
|
|
26
|
+
# 2. Edit — anchored ops verified against expected_version
|
|
27
|
+
h.edit(
|
|
28
|
+
"src/app.py",
|
|
29
|
+
[
|
|
30
|
+
{"op": "replace", "line": 3, "hash": "ef", "lines": ["def main():", " pass"]},
|
|
31
|
+
],
|
|
32
|
+
expected_version=result["version"],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# 3. Write — safely create or overwrite; expected_version required when overwriting
|
|
36
|
+
h.write("src/new_file.py", "# new content\n")
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Windowed reads
|
|
40
|
+
|
|
41
|
+
For large files, pass `start_line` / `end_line` to read only the lines you need (saves 55–87% tokens):
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
result = h.read("src/app.py", start_line=100, end_line=150)
|
|
45
|
+
# result["lines"] — 51 rendered lines, numbered 100–150
|
|
46
|
+
# result["total_lines"] — full file length (e.g. 1 200)
|
|
47
|
+
# result["version"] — full-file digest, same as a whole-file read
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
The `version` is always the full-file blake2s digest, so a windowed read still gives you a valid `expected_version` to pass to `edit()`.
|
|
51
|
+
|
|
52
|
+
## Error classes
|
|
53
|
+
|
|
54
|
+
| Error | When it is raised |
|
|
55
|
+
|---|---|
|
|
56
|
+
| `VersionConflictError` | The file changed between your last `read()` and the current `edit()` or `write()` |
|
|
57
|
+
| `AnchorMismatchError` | A `hash` field does not match the current line; the error message includes updated rendered context so the agent can retry immediately |
|
|
58
|
+
| `InvalidOperationError` | The edit payload is structurally invalid (missing field, out-of-range line, no-op replace, etc.) |
|
|
59
|
+
| `PathEscapeError` | The requested path resolves outside the configured root directory |
|
|
60
|
+
| `MixedNewlineError` | The file mixes newline styles (`\n`, `\r\n`, `\r`) and is rejected before mutation |
|
|
61
|
+
| `FileEncodingError` | The file cannot be decoded with the configured encoding (UTF-8 by default) |
|
|
62
|
+
|
|
63
|
+
All six inherit from `HashEditError`.
|
|
64
|
+
|
|
65
|
+
## Further reading
|
|
66
|
+
|
|
67
|
+
- [Root README](https://github.com/jansiml/hash-edit#readme) — project overview, TypeScript package, and benchmarks
|
|
68
|
+
- [`docs/spec.md`](https://github.com/jansiml/hash-edit/blob/main/docs/spec.md) — full protocol specification
|
|
69
|
+
- [`docs/INTERFACE_DESIGN.md`](https://github.com/jansiml/hash-edit/blob/main/docs/INTERFACE_DESIGN.md) — design rationale
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["uv_build>=0.10.9,<0.11.0"]
|
|
3
|
+
build-backend = "uv_build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "hash-edit"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Line-hash anchored read/edit/write for AI coding agents — windowed reads, strict version checks, atomic writes"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
dependencies = []
|
|
14
|
+
keywords = ["llm", "ai", "agent", "file-editing", "hashline"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 3 - Alpha",
|
|
17
|
+
"Intended Audience :: Developers",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Topic :: Software Development :: Libraries",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/jansiml/hash-edit"
|
|
27
|
+
Repository = "https://github.com/jansiml/hash-edit"
|
|
28
|
+
|
|
29
|
+
[dependency-groups]
|
|
30
|
+
dev = [
|
|
31
|
+
"pytest>=8.4.0",
|
|
32
|
+
"ruff>=0.12.0",
|
|
33
|
+
"ty>=0.0.1a6",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[tool.pytest.ini_options]
|
|
37
|
+
testpaths = ["tests"]
|
|
38
|
+
pythonpath = ["src"]
|
|
39
|
+
|
|
40
|
+
[tool.ruff]
|
|
41
|
+
line-length = 88
|
|
42
|
+
target-version = "py311"
|
|
43
|
+
|
|
44
|
+
[tool.ruff.lint]
|
|
45
|
+
select = ["E", "F", "I", "N", "W", "UP"]
|
|
46
|
+
ignore = ["E501"]
|
|
47
|
+
|
|
48
|
+
[tool.ruff.lint.isort]
|
|
49
|
+
known-first-party = ["hash_edit"]
|
|
50
|
+
|
|
51
|
+
[tool.ruff.format]
|
|
52
|
+
quote-style = "double"
|
|
53
|
+
|
|
54
|
+
[tool.ty.environment]
|
|
55
|
+
python-version = "3.11"
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from .core import (
|
|
2
|
+
DeleteOp,
|
|
3
|
+
EditOp,
|
|
4
|
+
EditResult,
|
|
5
|
+
HashEditHarness,
|
|
6
|
+
InsertAfterOp,
|
|
7
|
+
InsertBeforeOp,
|
|
8
|
+
ReadResult,
|
|
9
|
+
ReplaceOp,
|
|
10
|
+
WriteResult,
|
|
11
|
+
compute_line_hash,
|
|
12
|
+
render_line,
|
|
13
|
+
render_lines,
|
|
14
|
+
strip_render_prefixes,
|
|
15
|
+
)
|
|
16
|
+
from .errors import (
|
|
17
|
+
AnchorMismatchError,
|
|
18
|
+
FileEncodingError,
|
|
19
|
+
HashEditError,
|
|
20
|
+
InvalidOperationError,
|
|
21
|
+
MixedNewlineError,
|
|
22
|
+
PathEscapeError,
|
|
23
|
+
VersionConflictError,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"AnchorMismatchError",
|
|
28
|
+
"DeleteOp",
|
|
29
|
+
"EditOp",
|
|
30
|
+
"EditResult",
|
|
31
|
+
"FileEncodingError",
|
|
32
|
+
"HashEditError",
|
|
33
|
+
"HashEditHarness",
|
|
34
|
+
"InsertAfterOp",
|
|
35
|
+
"InsertBeforeOp",
|
|
36
|
+
"InvalidOperationError",
|
|
37
|
+
"MixedNewlineError",
|
|
38
|
+
"PathEscapeError",
|
|
39
|
+
"ReadResult",
|
|
40
|
+
"ReplaceOp",
|
|
41
|
+
"VersionConflictError",
|
|
42
|
+
"WriteResult",
|
|
43
|
+
"compute_line_hash",
|
|
44
|
+
"render_line",
|
|
45
|
+
"render_lines",
|
|
46
|
+
"strip_render_prefixes",
|
|
47
|
+
]
|
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import stat
|
|
7
|
+
import tempfile
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Literal, NotRequired, TypeAlias, TypedDict, cast
|
|
11
|
+
|
|
12
|
+
from .errors import (
|
|
13
|
+
AnchorMismatchError,
|
|
14
|
+
FileEncodingError,
|
|
15
|
+
InvalidOperationError,
|
|
16
|
+
MixedNewlineError,
|
|
17
|
+
PathEscapeError,
|
|
18
|
+
VersionConflictError,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
DEFAULT_ENCODING = "utf-8"
|
|
22
|
+
DEFAULT_HASH_LENGTH = 2
|
|
23
|
+
UTF8_BOM = b"\xef\xbb\xbf"
|
|
24
|
+
NEWLINE_RE = re.compile(r"\r\n|\n|\r")
|
|
25
|
+
PREFIX_RE = re.compile(r"^\s*\d+:[0-9a-f]{2,}\|")
|
|
26
|
+
|
|
27
|
+
NewlineStyle = Literal["lf", "crlf", "cr", "none"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ReplaceOp(TypedDict):
|
|
31
|
+
op: Literal["replace"]
|
|
32
|
+
line: int
|
|
33
|
+
hash: str
|
|
34
|
+
lines: list[str]
|
|
35
|
+
end_line: NotRequired[int]
|
|
36
|
+
end_hash: NotRequired[str]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class InsertBeforeOp(TypedDict):
|
|
40
|
+
op: Literal["insert_before"]
|
|
41
|
+
line: int
|
|
42
|
+
hash: str
|
|
43
|
+
lines: list[str]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class InsertAfterOp(TypedDict):
|
|
47
|
+
op: Literal["insert_after"]
|
|
48
|
+
line: int
|
|
49
|
+
hash: str
|
|
50
|
+
lines: list[str]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DeleteOp(TypedDict):
|
|
54
|
+
op: Literal["delete"]
|
|
55
|
+
line: int
|
|
56
|
+
hash: str
|
|
57
|
+
end_line: NotRequired[int]
|
|
58
|
+
end_hash: NotRequired[str]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
EditOp: TypeAlias = ReplaceOp | InsertBeforeOp | InsertAfterOp | DeleteOp
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class ReadResult(TypedDict):
|
|
65
|
+
path: str
|
|
66
|
+
version: str
|
|
67
|
+
encoding: str
|
|
68
|
+
bom: bool
|
|
69
|
+
newline: NewlineStyle
|
|
70
|
+
has_final_newline: bool
|
|
71
|
+
hash_length: int
|
|
72
|
+
total_lines: int
|
|
73
|
+
start_line: int
|
|
74
|
+
end_line: int
|
|
75
|
+
lines: list[str]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class EditResult(TypedDict):
|
|
79
|
+
path: str
|
|
80
|
+
version_before: str
|
|
81
|
+
version_after: str
|
|
82
|
+
applied: int
|
|
83
|
+
first_changed_line: int | None
|
|
84
|
+
encoding: str
|
|
85
|
+
bom: bool
|
|
86
|
+
newline: NewlineStyle
|
|
87
|
+
has_final_newline: bool
|
|
88
|
+
total_lines: int
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class WriteResult(TypedDict):
|
|
92
|
+
path: str
|
|
93
|
+
version_after: str
|
|
94
|
+
created: bool
|
|
95
|
+
encoding: str
|
|
96
|
+
bom: bool
|
|
97
|
+
newline: NewlineStyle
|
|
98
|
+
has_final_newline: bool
|
|
99
|
+
total_lines: int
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass(frozen=True)
|
|
103
|
+
class _Snapshot:
|
|
104
|
+
path: str
|
|
105
|
+
version: str
|
|
106
|
+
encoding: str
|
|
107
|
+
bom: bool
|
|
108
|
+
newline: NewlineStyle
|
|
109
|
+
has_final_newline: bool
|
|
110
|
+
lines: list[str]
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def total_lines(self) -> int:
|
|
114
|
+
return len(self.lines)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _newline_separator(style: NewlineStyle) -> str:
|
|
118
|
+
if style == "lf":
|
|
119
|
+
return "\n"
|
|
120
|
+
if style == "crlf":
|
|
121
|
+
return "\r\n"
|
|
122
|
+
if style == "cr":
|
|
123
|
+
return "\r"
|
|
124
|
+
return "\n"
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _detect_newline_style(text: str) -> tuple[NewlineStyle, bool, list[str]]:
|
|
128
|
+
matches = NEWLINE_RE.findall(text)
|
|
129
|
+
unique = set(matches)
|
|
130
|
+
if len(unique) > 1:
|
|
131
|
+
raise MixedNewlineError(f"Mixed newline styles detected: {sorted(unique)}")
|
|
132
|
+
if not matches:
|
|
133
|
+
return ("none", False, [] if text == "" else [text])
|
|
134
|
+
separator = matches[0]
|
|
135
|
+
_newline_map: dict[str, NewlineStyle] = {"\n": "lf", "\r\n": "crlf", "\r": "cr"}
|
|
136
|
+
style: NewlineStyle = _newline_map[separator]
|
|
137
|
+
has_final_newline = text.endswith(separator)
|
|
138
|
+
lines = text.split(separator)
|
|
139
|
+
if has_final_newline:
|
|
140
|
+
lines = lines[:-1]
|
|
141
|
+
return style, has_final_newline, lines
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _join_text(lines: list[str], newline: NewlineStyle, has_final_newline: bool) -> str:
|
|
145
|
+
if not lines:
|
|
146
|
+
return ""
|
|
147
|
+
separator = _newline_separator(newline)
|
|
148
|
+
text = separator.join(lines)
|
|
149
|
+
if has_final_newline:
|
|
150
|
+
text += separator
|
|
151
|
+
return text
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _version_for_bytes(data: bytes) -> str:
|
|
155
|
+
return hashlib.blake2s(data, digest_size=16).hexdigest()
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def compute_line_hash(
|
|
159
|
+
line_number: int, text: str, hash_length: int = DEFAULT_HASH_LENGTH
|
|
160
|
+
) -> str:
|
|
161
|
+
if hash_length < 2:
|
|
162
|
+
raise ValueError("hash_length must be >= 2")
|
|
163
|
+
normalized = text.replace("\r", "").rstrip()
|
|
164
|
+
material = normalized
|
|
165
|
+
if not any(character.isalnum() for character in normalized):
|
|
166
|
+
material = f"{line_number}\0{normalized}"
|
|
167
|
+
return hashlib.blake2s(
|
|
168
|
+
material.encode(DEFAULT_ENCODING), digest_size=16
|
|
169
|
+
).hexdigest()[:hash_length]
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def render_line(
|
|
173
|
+
line_number: int, text: str, hash_length: int = DEFAULT_HASH_LENGTH
|
|
174
|
+
) -> str:
|
|
175
|
+
return f"{line_number}:{compute_line_hash(line_number, text, hash_length)}|{text}"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def render_lines(
|
|
179
|
+
lines: list[str], start_line: int = 1, hash_length: int = DEFAULT_HASH_LENGTH
|
|
180
|
+
) -> list[str]:
|
|
181
|
+
return [
|
|
182
|
+
render_line(start_line + offset, line, hash_length)
|
|
183
|
+
for offset, line in enumerate(lines)
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def strip_render_prefixes(lines: list[str]) -> list[str]:
|
|
188
|
+
non_empty = [line for line in lines if line != ""]
|
|
189
|
+
if non_empty and all(PREFIX_RE.match(line) for line in non_empty):
|
|
190
|
+
return [PREFIX_RE.sub("", line, count=1) for line in lines]
|
|
191
|
+
return list(lines)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _message_for_mismatches(
|
|
195
|
+
path: str,
|
|
196
|
+
lines: list[str],
|
|
197
|
+
hash_length: int,
|
|
198
|
+
mismatches: list[dict[str, str | int]],
|
|
199
|
+
) -> str:
|
|
200
|
+
display_lines: set[int] = set()
|
|
201
|
+
for mismatch in mismatches:
|
|
202
|
+
line_number = int(mismatch["line"])
|
|
203
|
+
for current in range(
|
|
204
|
+
max(1, line_number - 2), min(len(lines), line_number + 2) + 1
|
|
205
|
+
):
|
|
206
|
+
display_lines.add(current)
|
|
207
|
+
rendered: list[str] = [
|
|
208
|
+
f"Anchors are stale in {path}. Re-read the file and use the updated rendered lines below."
|
|
209
|
+
]
|
|
210
|
+
if not display_lines:
|
|
211
|
+
return "\n".join(rendered)
|
|
212
|
+
rendered.append("")
|
|
213
|
+
mismatch_map = {int(mismatch["line"]): mismatch for mismatch in mismatches}
|
|
214
|
+
previous = -1
|
|
215
|
+
for line_number in sorted(display_lines):
|
|
216
|
+
if previous != -1 and line_number > previous + 1:
|
|
217
|
+
rendered.append(" ...")
|
|
218
|
+
prefix = ">>> " if line_number in mismatch_map else " "
|
|
219
|
+
rendered.append(
|
|
220
|
+
prefix + render_line(line_number, lines[line_number - 1], hash_length)
|
|
221
|
+
)
|
|
222
|
+
previous = line_number
|
|
223
|
+
return "\n".join(rendered)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class HashEditHarness:
|
|
227
|
+
def __init__(
|
|
228
|
+
self,
|
|
229
|
+
root: str | Path = ".",
|
|
230
|
+
*,
|
|
231
|
+
default_encoding: str = DEFAULT_ENCODING,
|
|
232
|
+
hash_length: int = DEFAULT_HASH_LENGTH,
|
|
233
|
+
) -> None:
|
|
234
|
+
self._root = Path(root).resolve()
|
|
235
|
+
self._default_encoding = default_encoding
|
|
236
|
+
self._hash_length = hash_length
|
|
237
|
+
|
|
238
|
+
def _resolve_path(self, path: str) -> Path:
|
|
239
|
+
candidate = Path(path)
|
|
240
|
+
if not candidate.is_absolute():
|
|
241
|
+
candidate = self._root / candidate
|
|
242
|
+
resolved = candidate.resolve()
|
|
243
|
+
try:
|
|
244
|
+
resolved.relative_to(self._root)
|
|
245
|
+
except ValueError as error:
|
|
246
|
+
raise PathEscapeError(f"{path} escapes root {self._root}") from error
|
|
247
|
+
return resolved
|
|
248
|
+
|
|
249
|
+
def _read_snapshot(self, path: Path) -> _Snapshot:
|
|
250
|
+
raw = path.read_bytes()
|
|
251
|
+
version = _version_for_bytes(raw)
|
|
252
|
+
bom = raw.startswith(UTF8_BOM)
|
|
253
|
+
payload = raw[len(UTF8_BOM) :] if bom else raw
|
|
254
|
+
try:
|
|
255
|
+
text = payload.decode(self._default_encoding, errors="strict")
|
|
256
|
+
except UnicodeDecodeError as error:
|
|
257
|
+
raise FileEncodingError(
|
|
258
|
+
f"{path.name} is not valid {self._default_encoding}"
|
|
259
|
+
) from error
|
|
260
|
+
newline, has_final_newline, lines = _detect_newline_style(text)
|
|
261
|
+
return _Snapshot(
|
|
262
|
+
path=str(path.relative_to(self._root)),
|
|
263
|
+
version=version,
|
|
264
|
+
encoding=self._default_encoding,
|
|
265
|
+
bom=bom,
|
|
266
|
+
newline=newline,
|
|
267
|
+
has_final_newline=has_final_newline,
|
|
268
|
+
lines=lines,
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _write_snapshot(
|
|
272
|
+
self,
|
|
273
|
+
path: Path,
|
|
274
|
+
*,
|
|
275
|
+
lines: list[str],
|
|
276
|
+
bom: bool,
|
|
277
|
+
newline: NewlineStyle,
|
|
278
|
+
has_final_newline: bool,
|
|
279
|
+
) -> str:
|
|
280
|
+
text = _join_text(lines, newline, has_final_newline)
|
|
281
|
+
payload = text.encode(self._default_encoding)
|
|
282
|
+
if bom:
|
|
283
|
+
payload = UTF8_BOM + payload
|
|
284
|
+
|
|
285
|
+
temporary_path: str | None = None
|
|
286
|
+
existing_mode = None
|
|
287
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
288
|
+
if path.exists():
|
|
289
|
+
existing_mode = stat.S_IMODE(path.stat().st_mode)
|
|
290
|
+
|
|
291
|
+
try:
|
|
292
|
+
handle, temporary_path = tempfile.mkstemp(
|
|
293
|
+
prefix=f".{path.name}.", dir=path.parent
|
|
294
|
+
)
|
|
295
|
+
with os.fdopen(handle, "wb") as file_handle:
|
|
296
|
+
file_handle.write(payload)
|
|
297
|
+
if existing_mode is not None:
|
|
298
|
+
os.chmod(temporary_path, existing_mode)
|
|
299
|
+
os.replace(temporary_path, path)
|
|
300
|
+
finally:
|
|
301
|
+
if temporary_path and os.path.exists(temporary_path):
|
|
302
|
+
os.unlink(temporary_path)
|
|
303
|
+
|
|
304
|
+
return _version_for_bytes(payload)
|
|
305
|
+
|
|
306
|
+
def read(
|
|
307
|
+
self, path: str, *, start_line: int | None = None, end_line: int | None = None
|
|
308
|
+
) -> ReadResult:
|
|
309
|
+
resolved = self._resolve_path(path)
|
|
310
|
+
snapshot = self._read_snapshot(resolved)
|
|
311
|
+
|
|
312
|
+
if snapshot.total_lines == 0:
|
|
313
|
+
return {
|
|
314
|
+
"path": snapshot.path,
|
|
315
|
+
"version": snapshot.version,
|
|
316
|
+
"encoding": snapshot.encoding,
|
|
317
|
+
"bom": snapshot.bom,
|
|
318
|
+
"newline": snapshot.newline,
|
|
319
|
+
"has_final_newline": snapshot.has_final_newline,
|
|
320
|
+
"hash_length": self._hash_length,
|
|
321
|
+
"total_lines": 0,
|
|
322
|
+
"start_line": 0,
|
|
323
|
+
"end_line": 0,
|
|
324
|
+
"lines": [],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
first = 1 if start_line is None else start_line
|
|
328
|
+
last = snapshot.total_lines if end_line is None else end_line
|
|
329
|
+
if first < 1 or last < first or last > snapshot.total_lines:
|
|
330
|
+
raise InvalidOperationError(
|
|
331
|
+
f"Invalid read window for {snapshot.path}: start_line={first}, end_line={last}, total_lines={snapshot.total_lines}"
|
|
332
|
+
)
|
|
333
|
+
visible = snapshot.lines[first - 1 : last]
|
|
334
|
+
return {
|
|
335
|
+
"path": snapshot.path,
|
|
336
|
+
"version": snapshot.version,
|
|
337
|
+
"encoding": snapshot.encoding,
|
|
338
|
+
"bom": snapshot.bom,
|
|
339
|
+
"newline": snapshot.newline,
|
|
340
|
+
"has_final_newline": snapshot.has_final_newline,
|
|
341
|
+
"hash_length": self._hash_length,
|
|
342
|
+
"total_lines": snapshot.total_lines,
|
|
343
|
+
"start_line": first,
|
|
344
|
+
"end_line": last,
|
|
345
|
+
"lines": render_lines(visible, first, self._hash_length),
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
def edit(
|
|
349
|
+
self, path: str, ops: EditOp | list[EditOp], *, expected_version: str
|
|
350
|
+
) -> EditResult:
|
|
351
|
+
resolved = self._resolve_path(path)
|
|
352
|
+
snapshot = self._read_snapshot(resolved)
|
|
353
|
+
if expected_version != snapshot.version:
|
|
354
|
+
raise VersionConflictError(
|
|
355
|
+
snapshot.path, expected_version, snapshot.version
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
operations = [ops] if isinstance(ops, dict) else list(ops)
|
|
359
|
+
if not operations:
|
|
360
|
+
raise InvalidOperationError("edit() requires at least one operation")
|
|
361
|
+
|
|
362
|
+
lines = list(snapshot.lines)
|
|
363
|
+
mismatches: list[dict[str, str | int]] = []
|
|
364
|
+
|
|
365
|
+
def validate_anchor(line_number: int, expected_hash: str) -> None:
|
|
366
|
+
if line_number < 1 or line_number > len(lines):
|
|
367
|
+
raise InvalidOperationError(
|
|
368
|
+
f"Line {line_number} is out of range for {snapshot.path} (total_lines={len(lines)})"
|
|
369
|
+
)
|
|
370
|
+
actual_hash = compute_line_hash(
|
|
371
|
+
line_number, lines[line_number - 1], self._hash_length
|
|
372
|
+
)
|
|
373
|
+
if actual_hash != expected_hash:
|
|
374
|
+
mismatches.append(
|
|
375
|
+
{
|
|
376
|
+
"line": line_number,
|
|
377
|
+
"expected": expected_hash,
|
|
378
|
+
"actual": actual_hash,
|
|
379
|
+
}
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
for operation in operations:
|
|
383
|
+
validate_anchor(int(operation["line"]), str(operation["hash"]))
|
|
384
|
+
if "end_line" in operation:
|
|
385
|
+
range_op = cast("ReplaceOp | DeleteOp", operation)
|
|
386
|
+
end_line = int(range_op["end_line"])
|
|
387
|
+
end_hash = str(range_op.get("end_hash", ""))
|
|
388
|
+
if not end_hash:
|
|
389
|
+
raise InvalidOperationError("Range edits require end_hash")
|
|
390
|
+
validate_anchor(end_line, end_hash)
|
|
391
|
+
if int(operation["line"]) > end_line:
|
|
392
|
+
raise InvalidOperationError("end_line must be >= line")
|
|
393
|
+
|
|
394
|
+
if mismatches:
|
|
395
|
+
raise AnchorMismatchError(
|
|
396
|
+
_message_for_mismatches(
|
|
397
|
+
snapshot.path, lines, self._hash_length, mismatches
|
|
398
|
+
),
|
|
399
|
+
mismatches=mismatches,
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
def sort_key(operation: EditOp) -> tuple[int, int]:
|
|
403
|
+
range_op = cast("ReplaceOp | DeleteOp", operation)
|
|
404
|
+
end_line_val = range_op.get("end_line", operation["line"])
|
|
405
|
+
line_number = int(end_line_val)
|
|
406
|
+
precedence = {
|
|
407
|
+
"replace": 0,
|
|
408
|
+
"delete": 0,
|
|
409
|
+
"insert_after": 1,
|
|
410
|
+
"insert_before": 2,
|
|
411
|
+
}[operation["op"]]
|
|
412
|
+
return (-line_number, precedence)
|
|
413
|
+
|
|
414
|
+
first_changed_line: int | None = None
|
|
415
|
+
for operation in sorted(operations, key=sort_key):
|
|
416
|
+
line_number = int(operation["line"])
|
|
417
|
+
op_name = operation["op"]
|
|
418
|
+
if op_name == "replace":
|
|
419
|
+
replace_op = cast("ReplaceOp", operation)
|
|
420
|
+
replacement = strip_render_prefixes(list(replace_op["lines"]))
|
|
421
|
+
end_line = int(replace_op.get("end_line", line_number))
|
|
422
|
+
current = lines[line_number - 1 : end_line]
|
|
423
|
+
if current == replacement:
|
|
424
|
+
raise InvalidOperationError(
|
|
425
|
+
f"No changes made to {snapshot.path}. Replacement for {line_number}:{replace_op['hash']} is identical."
|
|
426
|
+
)
|
|
427
|
+
lines[line_number - 1 : end_line] = replacement
|
|
428
|
+
first_changed_line = (
|
|
429
|
+
line_number
|
|
430
|
+
if first_changed_line is None
|
|
431
|
+
else min(first_changed_line, line_number)
|
|
432
|
+
)
|
|
433
|
+
elif op_name == "delete":
|
|
434
|
+
delete_op = cast("DeleteOp", operation)
|
|
435
|
+
end_line = int(delete_op.get("end_line", line_number))
|
|
436
|
+
del lines[line_number - 1 : end_line]
|
|
437
|
+
first_changed_line = (
|
|
438
|
+
line_number
|
|
439
|
+
if first_changed_line is None
|
|
440
|
+
else min(first_changed_line, line_number)
|
|
441
|
+
)
|
|
442
|
+
elif op_name == "insert_before":
|
|
443
|
+
insert_before_op = cast("InsertBeforeOp", operation)
|
|
444
|
+
inserted = strip_render_prefixes(list(insert_before_op["lines"]))
|
|
445
|
+
if not inserted:
|
|
446
|
+
inserted = [""]
|
|
447
|
+
lines[line_number - 1 : line_number - 1] = inserted
|
|
448
|
+
first_changed_line = (
|
|
449
|
+
line_number
|
|
450
|
+
if first_changed_line is None
|
|
451
|
+
else min(first_changed_line, line_number)
|
|
452
|
+
)
|
|
453
|
+
elif op_name == "insert_after":
|
|
454
|
+
insert_after_op = cast("InsertAfterOp", operation)
|
|
455
|
+
inserted = strip_render_prefixes(list(insert_after_op["lines"]))
|
|
456
|
+
if not inserted:
|
|
457
|
+
inserted = [""]
|
|
458
|
+
insertion_line = line_number + 1
|
|
459
|
+
lines[line_number:line_number] = inserted
|
|
460
|
+
first_changed_line = (
|
|
461
|
+
insertion_line
|
|
462
|
+
if first_changed_line is None
|
|
463
|
+
else min(first_changed_line, insertion_line)
|
|
464
|
+
)
|
|
465
|
+
else:
|
|
466
|
+
raise InvalidOperationError(f"Unsupported op {op_name}")
|
|
467
|
+
|
|
468
|
+
version_after = self._write_snapshot(
|
|
469
|
+
resolved,
|
|
470
|
+
lines=lines,
|
|
471
|
+
bom=snapshot.bom,
|
|
472
|
+
newline=snapshot.newline,
|
|
473
|
+
has_final_newline=snapshot.has_final_newline,
|
|
474
|
+
)
|
|
475
|
+
return {
|
|
476
|
+
"path": snapshot.path,
|
|
477
|
+
"version_before": snapshot.version,
|
|
478
|
+
"version_after": version_after,
|
|
479
|
+
"applied": len(operations),
|
|
480
|
+
"first_changed_line": first_changed_line,
|
|
481
|
+
"encoding": snapshot.encoding,
|
|
482
|
+
"bom": snapshot.bom,
|
|
483
|
+
"newline": snapshot.newline,
|
|
484
|
+
"has_final_newline": snapshot.has_final_newline,
|
|
485
|
+
"total_lines": len(lines),
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
def write(
|
|
489
|
+
self, path: str, text: str, *, expected_version: str | None = None
|
|
490
|
+
) -> WriteResult:
|
|
491
|
+
resolved = self._resolve_path(path)
|
|
492
|
+
created = not resolved.exists()
|
|
493
|
+
|
|
494
|
+
input_newline, input_has_final_newline, input_lines = _detect_newline_style(
|
|
495
|
+
text
|
|
496
|
+
)
|
|
497
|
+
bom = False
|
|
498
|
+
newline = input_newline
|
|
499
|
+
if created:
|
|
500
|
+
version_after = self._write_snapshot(
|
|
501
|
+
resolved,
|
|
502
|
+
lines=input_lines,
|
|
503
|
+
bom=False,
|
|
504
|
+
newline=input_newline,
|
|
505
|
+
has_final_newline=input_has_final_newline,
|
|
506
|
+
)
|
|
507
|
+
else:
|
|
508
|
+
snapshot = self._read_snapshot(resolved)
|
|
509
|
+
if expected_version != snapshot.version:
|
|
510
|
+
raise VersionConflictError(
|
|
511
|
+
snapshot.path, expected_version, snapshot.version
|
|
512
|
+
)
|
|
513
|
+
bom = snapshot.bom
|
|
514
|
+
newline = snapshot.newline if snapshot.newline != "none" else input_newline
|
|
515
|
+
version_after = self._write_snapshot(
|
|
516
|
+
resolved,
|
|
517
|
+
lines=input_lines,
|
|
518
|
+
bom=bom,
|
|
519
|
+
newline=newline,
|
|
520
|
+
has_final_newline=input_has_final_newline,
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
return {
|
|
524
|
+
"path": str(resolved.relative_to(self._root)),
|
|
525
|
+
"version_after": version_after,
|
|
526
|
+
"created": created,
|
|
527
|
+
"encoding": self._default_encoding,
|
|
528
|
+
"bom": bom,
|
|
529
|
+
"newline": newline,
|
|
530
|
+
"has_final_newline": input_has_final_newline,
|
|
531
|
+
"total_lines": len(input_lines),
|
|
532
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class HashEditError(Exception):
|
|
5
|
+
"""Base error for hash-edit failures."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PathEscapeError(HashEditError):
|
|
9
|
+
"""Raised when a requested path escapes the configured root."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MixedNewlineError(HashEditError):
|
|
13
|
+
"""Raised when a file mixes newline styles."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class FileEncodingError(HashEditError):
|
|
17
|
+
"""Raised when a file cannot be decoded with the configured encoding."""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class InvalidOperationError(HashEditError):
|
|
21
|
+
"""Raised when an edit payload is invalid or a no-op."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class VersionConflictError(HashEditError):
|
|
25
|
+
"""Raised when the file version does not match the caller's expectation."""
|
|
26
|
+
|
|
27
|
+
def __init__(self, path: str, expected: str | None, actual: str) -> None:
|
|
28
|
+
if expected is None:
|
|
29
|
+
message = f"{path} already exists. read() first and pass expected_version to overwrite safely."
|
|
30
|
+
else:
|
|
31
|
+
message = f"Version conflict for {path}: expected {expected}, actual {actual}. Re-read the file first."
|
|
32
|
+
super().__init__(message)
|
|
33
|
+
self.path = path
|
|
34
|
+
self.expected = expected
|
|
35
|
+
self.actual = actual
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class AnchorMismatchError(HashEditError):
|
|
39
|
+
"""Raised when one or more anchored lines have changed or were copied incorrectly."""
|
|
40
|
+
|
|
41
|
+
def __init__(self, message: str, *, mismatches: list[dict[str, str | int]]) -> None:
|
|
42
|
+
super().__init__(message)
|
|
43
|
+
self.mismatches = mismatches
|
|
File without changes
|