harnesskit 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Alex Melges
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: harnesskit
3
+ Version: 0.1.0
4
+ Summary: Fuzzy edit tool for LLM coding agents — never fail a str_replace again
5
+ Author-email: Alex Melges <alex@melges.dev>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/alexmelges/harnesskit
8
+ Project-URL: Repository, https://github.com/alexmelges/harnesskit
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development :: Libraries
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Dynamic: license-file
17
+
18
+ # 🔧 HarnessKit
19
+
20
+ > **Fuzzy edit tool for LLM coding agents — never fail a `str_replace` again.**
21
+
22
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
23
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-green.svg)](https://python.org)
24
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-zero-brightgreen.svg)](#)
25
+
26
+ ---
27
+
28
+ ## The Problem
29
+
30
+ Every LLM coding agent has the same Achilles' heel: **edit application**.
31
+
32
+ When Claude, GPT, or any model tries to modify code, it generates an `old_text` → `new_text` pair. The tool then does an exact string match to find where to apply the change. And it fails. A lot.
33
+
34
+ - **Whitespace differences** — the model adds a space, drops a tab, or normalizes indentation
35
+ - **Minor hallucinations** — a variable name is slightly off, a comment is paraphrased
36
+ - **Format fragility** — diffs, patches, and line-number schemes all break in different ways
37
+
38
+ The result? Up to **50% edit failure rates** on non-native models. Every failed edit wastes a tool call, burns tokens on retries, and breaks agent flow.
39
+
40
+ ## The Solution
41
+
42
+ HarnessKit (`hk`) is a drop-in edit tool that **fuzzy-matches** the old text before replacing it. It uses a 4-stage matching cascade:
43
+
44
+ 1. **Exact match** — zero overhead when the model is precise
45
+ 2. **Normalized whitespace** — catches the most common failure mode
46
+ 3. **Sequence matching** — `difflib.SequenceMatcher` with configurable threshold (default 0.8)
47
+ 4. **Line-by-line fuzzy** — finds the best contiguous block match for heavily drifted edits
48
+
49
+ Every edit returns a **confidence score** and **match type**, so your agent knows exactly how the edit was resolved.
50
+
51
+ ## Quick Start
52
+
53
+ ```bash
54
+ pip install harnesskit
55
+ ```
56
+
57
+ Or just copy `hk.py` into your project — it's a single file, stdlib only.
58
+
59
+ ### CLI Usage
60
+
61
+ ```bash
62
+ # Direct arguments
63
+ hk apply --file app.py --old "def hello():\n print('hi')" --new "def hello():\n print('hello world')"
64
+
65
+ # JSON from stdin (perfect for tool_use integration)
66
+ echo '{"file": "app.py", "old_text": "def hello():", "new_text": "def greet():"}' | hk apply --stdin
67
+
68
+ # From a JSON file
69
+ hk apply --edit changes.json
70
+
71
+ # Dry run — see what would change without writing
72
+ hk apply --file app.py --old "..." --new "..." --dry-run
73
+ ```
74
+
75
+ ### JSON Edit Format
76
+
77
+ ```json
78
+ {
79
+ "file": "path/to/file.py",
80
+ "old_text": "def hello():\n print('hi')",
81
+ "new_text": "def hello():\n print('hello world')"
82
+ }
83
+ ```
84
+
85
+ Batch multiple edits:
86
+
87
+ ```json
88
+ {
89
+ "edits": [
90
+ {"file": "a.py", "old_text": "...", "new_text": "..."},
91
+ {"file": "b.py", "old_text": "...", "new_text": "..."}
92
+ ]
93
+ }
94
+ ```
95
+
96
+ ### Output
97
+
98
+ ```json
99
+ {
100
+ "status": "applied",
101
+ "file": "app.py",
102
+ "match_type": "fuzzy",
103
+ "confidence": 0.92,
104
+ "matched_text": "def hello():\n print( 'hi' )"
105
+ }
106
+ ```
107
+
108
+ ### Exit Codes
109
+
110
+ | Code | Meaning |
111
+ |------|---------|
112
+ | `0` | Edit applied successfully |
113
+ | `1` | No match found |
114
+ | `2` | Ambiguous — multiple matches |
115
+
116
+ ## Integration
117
+
118
+ HarnessKit is designed to slot into any agent framework as the edit backend:
119
+
120
+ ```python
121
+ import subprocess, json
122
+
123
+ def apply_edit(file, old_text, new_text):
124
+ result = subprocess.run(
125
+ ["hk", "apply", "--stdin"],
126
+ input=json.dumps({"file": file, "old_text": old_text, "new_text": new_text}),
127
+ capture_output=True, text=True
128
+ )
129
+ return json.loads(result.stdout)
130
+ ```
131
+
132
+ Or import directly:
133
+
134
+ ```python
135
+ from hk import apply_edit
136
+
137
+ result = apply_edit("app.py", old_text, new_text, threshold=0.8)
138
+ ```
139
+
140
+ ## Design Principles
141
+
142
+ - **Single file, stdlib only** — copy it, vendor it, pip install it. No dependency hell.
143
+ - **419 lines of Python** — small enough to audit in one sitting
144
+ - **Graceful degradation** — exact match when possible, fuzzy only when needed
145
+ - **Transparent** — every result tells you *how* it matched and *how confident* it is
146
+ - **Model-agnostic** — works with any LLM that can produce old/new text pairs
147
+
148
+ ## Configuration
149
+
150
+ | Flag | Default | Description |
151
+ |------|---------|-------------|
152
+ | `--threshold` | `0.8` | Minimum similarity score for fuzzy matching |
153
+ | `--dry-run` | `false` | Preview changes without writing to disk |
154
+
155
+ ## Development
156
+
157
+ ```bash
158
+ git clone https://github.com/alexmelges/harnesskit.git
159
+ cd harnesskit
160
+ python3 test_hk.py # 39 tests, stdlib unittest
161
+ ```
162
+
163
+ ## License
164
+
165
+ MIT — see [LICENSE](LICENSE).
166
+
167
+ ---
168
+
169
+ **Built for the agents that build everything else.**
@@ -0,0 +1,152 @@
1
+ # 🔧 HarnessKit
2
+
3
+ > **Fuzzy edit tool for LLM coding agents — never fail a `str_replace` again.**
4
+
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-green.svg)](https://python.org)
7
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-zero-brightgreen.svg)](#)
8
+
9
+ ---
10
+
11
+ ## The Problem
12
+
13
+ Every LLM coding agent has the same Achilles' heel: **edit application**.
14
+
15
+ When Claude, GPT, or any model tries to modify code, it generates an `old_text` → `new_text` pair. The tool then does an exact string match to find where to apply the change. And it fails. A lot.
16
+
17
+ - **Whitespace differences** — the model adds a space, drops a tab, or normalizes indentation
18
+ - **Minor hallucinations** — a variable name is slightly off, a comment is paraphrased
19
+ - **Format fragility** — diffs, patches, and line-number schemes all break in different ways
20
+
21
+ The result? Up to **50% edit failure rates** on non-native models. Every failed edit wastes a tool call, burns tokens on retries, and breaks agent flow.
22
+
23
+ ## The Solution
24
+
25
+ HarnessKit (`hk`) is a drop-in edit tool that **fuzzy-matches** the old text before replacing it. It uses a 4-stage matching cascade:
26
+
27
+ 1. **Exact match** — zero overhead when the model is precise
28
+ 2. **Normalized whitespace** — catches the most common failure mode
29
+ 3. **Sequence matching** — `difflib.SequenceMatcher` with configurable threshold (default 0.8)
30
+ 4. **Line-by-line fuzzy** — finds the best contiguous block match for heavily drifted edits
31
+
32
+ Every edit returns a **confidence score** and **match type**, so your agent knows exactly how the edit was resolved.
33
+
34
+ ## Quick Start
35
+
36
+ ```bash
37
+ pip install harnesskit
38
+ ```
39
+
40
+ Or just copy `hk.py` into your project — it's a single file, stdlib only.
41
+
42
+ ### CLI Usage
43
+
44
+ ```bash
45
+ # Direct arguments
46
+ hk apply --file app.py --old "def hello():\n print('hi')" --new "def hello():\n print('hello world')"
47
+
48
+ # JSON from stdin (perfect for tool_use integration)
49
+ echo '{"file": "app.py", "old_text": "def hello():", "new_text": "def greet():"}' | hk apply --stdin
50
+
51
+ # From a JSON file
52
+ hk apply --edit changes.json
53
+
54
+ # Dry run — see what would change without writing
55
+ hk apply --file app.py --old "..." --new "..." --dry-run
56
+ ```
57
+
58
+ ### JSON Edit Format
59
+
60
+ ```json
61
+ {
62
+ "file": "path/to/file.py",
63
+ "old_text": "def hello():\n print('hi')",
64
+ "new_text": "def hello():\n print('hello world')"
65
+ }
66
+ ```
67
+
68
+ Batch multiple edits:
69
+
70
+ ```json
71
+ {
72
+ "edits": [
73
+ {"file": "a.py", "old_text": "...", "new_text": "..."},
74
+ {"file": "b.py", "old_text": "...", "new_text": "..."}
75
+ ]
76
+ }
77
+ ```
78
+
79
+ ### Output
80
+
81
+ ```json
82
+ {
83
+ "status": "applied",
84
+ "file": "app.py",
85
+ "match_type": "fuzzy",
86
+ "confidence": 0.92,
87
+ "matched_text": "def hello():\n print( 'hi' )"
88
+ }
89
+ ```
90
+
91
+ ### Exit Codes
92
+
93
+ | Code | Meaning |
94
+ |------|---------|
95
+ | `0` | Edit applied successfully |
96
+ | `1` | No match found |
97
+ | `2` | Ambiguous — multiple matches |
98
+
99
+ ## Integration
100
+
101
+ HarnessKit is designed to slot into any agent framework as the edit backend:
102
+
103
+ ```python
104
+ import subprocess, json
105
+
106
+ def apply_edit(file, old_text, new_text):
107
+ result = subprocess.run(
108
+ ["hk", "apply", "--stdin"],
109
+ input=json.dumps({"file": file, "old_text": old_text, "new_text": new_text}),
110
+ capture_output=True, text=True
111
+ )
112
+ return json.loads(result.stdout)
113
+ ```
114
+
115
+ Or import directly:
116
+
117
+ ```python
118
+ from hk import apply_edit
119
+
120
+ result = apply_edit("app.py", old_text, new_text, threshold=0.8)
121
+ ```
122
+
123
+ ## Design Principles
124
+
125
+ - **Single file, stdlib only** — copy it, vendor it, pip install it. No dependency hell.
126
+ - **419 lines of Python** — small enough to audit in one sitting
127
+ - **Graceful degradation** — exact match when possible, fuzzy only when needed
128
+ - **Transparent** — every result tells you *how* it matched and *how confident* it is
129
+ - **Model-agnostic** — works with any LLM that can produce old/new text pairs
130
+
131
+ ## Configuration
132
+
133
+ | Flag | Default | Description |
134
+ |------|---------|-------------|
135
+ | `--threshold` | `0.8` | Minimum similarity score for fuzzy matching |
136
+ | `--dry-run` | `false` | Preview changes without writing to disk |
137
+
138
+ ## Development
139
+
140
+ ```bash
141
+ git clone https://github.com/alexmelges/harnesskit.git
142
+ cd harnesskit
143
+ python3 test_hk.py # 39 tests, stdlib unittest
144
+ ```
145
+
146
+ ## License
147
+
148
+ MIT — see [LICENSE](LICENSE).
149
+
150
+ ---
151
+
152
+ **Built for the agents that build everything else.**
@@ -0,0 +1,169 @@
1
+ Metadata-Version: 2.4
2
+ Name: harnesskit
3
+ Version: 0.1.0
4
+ Summary: Fuzzy edit tool for LLM coding agents — never fail a str_replace again
5
+ Author-email: Alex Melges <alex@melges.dev>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/alexmelges/harnesskit
8
+ Project-URL: Repository, https://github.com/alexmelges/harnesskit
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Software Development :: Libraries
13
+ Requires-Python: >=3.8
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Dynamic: license-file
17
+
18
+ # 🔧 HarnessKit
19
+
20
+ > **Fuzzy edit tool for LLM coding agents — never fail a `str_replace` again.**
21
+
22
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
23
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-green.svg)](https://python.org)
24
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-zero-brightgreen.svg)](#)
25
+
26
+ ---
27
+
28
+ ## The Problem
29
+
30
+ Every LLM coding agent has the same Achilles' heel: **edit application**.
31
+
32
+ When Claude, GPT, or any model tries to modify code, it generates an `old_text` → `new_text` pair. The tool then does an exact string match to find where to apply the change. And it fails. A lot.
33
+
34
+ - **Whitespace differences** — the model adds a space, drops a tab, or normalizes indentation
35
+ - **Minor hallucinations** — a variable name is slightly off, a comment is paraphrased
36
+ - **Format fragility** — diffs, patches, and line-number schemes all break in different ways
37
+
38
+ The result? Up to **50% edit failure rates** on non-native models. Every failed edit wastes a tool call, burns tokens on retries, and breaks agent flow.
39
+
40
+ ## The Solution
41
+
42
+ HarnessKit (`hk`) is a drop-in edit tool that **fuzzy-matches** the old text before replacing it. It uses a 4-stage matching cascade:
43
+
44
+ 1. **Exact match** — zero overhead when the model is precise
45
+ 2. **Normalized whitespace** — catches the most common failure mode
46
+ 3. **Sequence matching** — `difflib.SequenceMatcher` with configurable threshold (default 0.8)
47
+ 4. **Line-by-line fuzzy** — finds the best contiguous block match for heavily drifted edits
48
+
49
+ Every edit returns a **confidence score** and **match type**, so your agent knows exactly how the edit was resolved.
50
+
51
+ ## Quick Start
52
+
53
+ ```bash
54
+ pip install harnesskit
55
+ ```
56
+
57
+ Or just copy `hk.py` into your project — it's a single file, stdlib only.
58
+
59
+ ### CLI Usage
60
+
61
+ ```bash
62
+ # Direct arguments
63
+ hk apply --file app.py --old "def hello():\n print('hi')" --new "def hello():\n print('hello world')"
64
+
65
+ # JSON from stdin (perfect for tool_use integration)
66
+ echo '{"file": "app.py", "old_text": "def hello():", "new_text": "def greet():"}' | hk apply --stdin
67
+
68
+ # From a JSON file
69
+ hk apply --edit changes.json
70
+
71
+ # Dry run — see what would change without writing
72
+ hk apply --file app.py --old "..." --new "..." --dry-run
73
+ ```
74
+
75
+ ### JSON Edit Format
76
+
77
+ ```json
78
+ {
79
+ "file": "path/to/file.py",
80
+ "old_text": "def hello():\n print('hi')",
81
+ "new_text": "def hello():\n print('hello world')"
82
+ }
83
+ ```
84
+
85
+ Batch multiple edits:
86
+
87
+ ```json
88
+ {
89
+ "edits": [
90
+ {"file": "a.py", "old_text": "...", "new_text": "..."},
91
+ {"file": "b.py", "old_text": "...", "new_text": "..."}
92
+ ]
93
+ }
94
+ ```
95
+
96
+ ### Output
97
+
98
+ ```json
99
+ {
100
+ "status": "applied",
101
+ "file": "app.py",
102
+ "match_type": "fuzzy",
103
+ "confidence": 0.92,
104
+ "matched_text": "def hello():\n print( 'hi' )"
105
+ }
106
+ ```
107
+
108
+ ### Exit Codes
109
+
110
+ | Code | Meaning |
111
+ |------|---------|
112
+ | `0` | Edit applied successfully |
113
+ | `1` | No match found |
114
+ | `2` | Ambiguous — multiple matches |
115
+
116
+ ## Integration
117
+
118
+ HarnessKit is designed to slot into any agent framework as the edit backend:
119
+
120
+ ```python
121
+ import subprocess, json
122
+
123
+ def apply_edit(file, old_text, new_text):
124
+ result = subprocess.run(
125
+ ["hk", "apply", "--stdin"],
126
+ input=json.dumps({"file": file, "old_text": old_text, "new_text": new_text}),
127
+ capture_output=True, text=True
128
+ )
129
+ return json.loads(result.stdout)
130
+ ```
131
+
132
+ Or import directly:
133
+
134
+ ```python
135
+ from hk import apply_edit
136
+
137
+ result = apply_edit("app.py", old_text, new_text, threshold=0.8)
138
+ ```
139
+
140
+ ## Design Principles
141
+
142
+ - **Single file, stdlib only** — copy it, vendor it, pip install it. No dependency hell.
143
+ - **419 lines of Python** — small enough to audit in one sitting
144
+ - **Graceful degradation** — exact match when possible, fuzzy only when needed
145
+ - **Transparent** — every result tells you *how* it matched and *how confident* it is
146
+ - **Model-agnostic** — works with any LLM that can produce old/new text pairs
147
+
148
+ ## Configuration
149
+
150
+ | Flag | Default | Description |
151
+ |------|---------|-------------|
152
+ | `--threshold` | `0.8` | Minimum similarity score for fuzzy matching |
153
+ | `--dry-run` | `false` | Preview changes without writing to disk |
154
+
155
+ ## Development
156
+
157
+ ```bash
158
+ git clone https://github.com/alexmelges/harnesskit.git
159
+ cd harnesskit
160
+ python3 test_hk.py # 39 tests, stdlib unittest
161
+ ```
162
+
163
+ ## License
164
+
165
+ MIT — see [LICENSE](LICENSE).
166
+
167
+ ---
168
+
169
+ **Built for the agents that build everything else.**
@@ -0,0 +1,9 @@
1
+ LICENSE
2
+ README.md
3
+ hk.py
4
+ pyproject.toml
5
+ harnesskit.egg-info/PKG-INFO
6
+ harnesskit.egg-info/SOURCES.txt
7
+ harnesskit.egg-info/dependency_links.txt
8
+ harnesskit.egg-info/entry_points.txt
9
+ harnesskit.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ hk = hk:main
harnesskit-0.1.0/hk.py ADDED
@@ -0,0 +1,419 @@
1
+ #!/usr/bin/env python3
2
+ """HarnessKit — Fuzzy edit tool for LLM coding agents.
3
+
4
+ A single-file CLI tool that applies edits to files using fuzzy matching.
5
+ Accepts a simple, model-agnostic edit format and robustly applies changes
6
+ even when the LLM's output isn't pixel-perfect.
7
+
8
+ Algorithm:
9
+ 1. Try exact match
10
+ 2. Fall back to normalized whitespace match
11
+ 3. Fall back to difflib.SequenceMatcher (configurable threshold)
12
+ 4. Fall back to line-by-line fuzzy match (best contiguous block)
13
+
14
+ Exit codes: 0=applied, 1=no match found, 2=ambiguous (multiple matches)
15
+ """
16
+
17
+ import argparse
18
+ import difflib
19
+ import json
20
+ import re
21
+ import sys
22
+ from dataclasses import dataclass
23
+ from typing import List, Optional, Tuple
24
+
25
+
26
+ @dataclass
27
+ class MatchResult:
28
+ start: int
29
+ end: int
30
+ matched_text: str
31
+ match_type: str # "exact", "whitespace", "fuzzy", "line_fuzzy"
32
+ confidence: float
33
+
34
+
35
+ @dataclass
36
+ class EditResult:
37
+ status: str # "applied", "no_match", "ambiguous", "error"
38
+ file: str
39
+ match_type: Optional[str] = None
40
+ confidence: Optional[float] = None
41
+ matched_text: Optional[str] = None
42
+ error: Optional[str] = None
43
+
44
+
45
+ def normalize_whitespace(text: str) -> str:
46
+ """Collapse all runs of whitespace to single spaces and strip."""
47
+ return re.sub(r'\s+', ' ', text).strip()
48
+
49
+
50
+ def find_exact_matches(content: str, old_text: str) -> List[MatchResult]:
51
+ """Find all exact occurrences of old_text in content."""
52
+ matches = []
53
+ start = 0
54
+ while True:
55
+ idx = content.find(old_text, start)
56
+ if idx == -1:
57
+ break
58
+ matches.append(MatchResult(
59
+ start=idx,
60
+ end=idx + len(old_text),
61
+ matched_text=old_text,
62
+ match_type="exact",
63
+ confidence=1.0,
64
+ ))
65
+ start = idx + 1
66
+ return matches
67
+
68
+
69
+ def _strip_whitespace_with_map(text: str) -> Tuple[str, List[int]]:
70
+ """Strip all whitespace from text, returning (stripped, position_map).
71
+
72
+ position_map[i] = index in original text of the i-th non-ws char.
73
+ """
74
+ chars: List[str] = []
75
+ positions: List[int] = []
76
+ for i, ch in enumerate(text):
77
+ if not ch.isspace():
78
+ chars.append(ch)
79
+ positions.append(i)
80
+ return ''.join(chars), positions
81
+
82
+
83
+ def find_whitespace_matches(content: str, old_text: str) -> List[MatchResult]:
84
+ """Find matches where the only differences are whitespace.
85
+
86
+ Strips all whitespace from both strings, finds substring matches
87
+ in the stripped content, then maps positions back to the original.
88
+ """
89
+ stripped_old, _ = _strip_whitespace_with_map(old_text)
90
+ if not stripped_old:
91
+ return []
92
+
93
+ stripped_content, content_pos_map = _strip_whitespace_with_map(content)
94
+
95
+ matches = []
96
+ start = 0
97
+ while True:
98
+ idx = stripped_content.find(stripped_old, start)
99
+ if idx == -1:
100
+ break
101
+ # Map back to original positions
102
+ orig_start = content_pos_map[idx]
103
+ orig_end_char = content_pos_map[idx + len(stripped_old) - 1]
104
+ orig_end = orig_end_char + 1
105
+ matched = content[orig_start:orig_end]
106
+ matches.append(MatchResult(
107
+ start=orig_start,
108
+ end=orig_end,
109
+ matched_text=matched,
110
+ match_type="whitespace",
111
+ confidence=0.95,
112
+ ))
113
+ start = idx + 1
114
+ return matches
115
+
116
+
117
+ def find_fuzzy_matches(
118
+ content: str, old_text: str, threshold: float
119
+ ) -> List[MatchResult]:
120
+ """Find fuzzy matches using SequenceMatcher on sliding windows."""
121
+ if not old_text.strip():
122
+ return []
123
+
124
+ old_len = len(old_text)
125
+ best: List[MatchResult] = []
126
+ best_ratio = threshold
127
+
128
+ # Slide a window of varying sizes around the expected length
129
+ min_window = max(1, int(old_len * 0.7))
130
+ max_window = min(len(content), int(old_len * 1.3) + 1)
131
+
132
+ for window_size in range(min_window, max_window + 1):
133
+ for start in range(0, len(content) - window_size + 1):
134
+ candidate = content[start:start + window_size]
135
+ ratio = difflib.SequenceMatcher(
136
+ None, old_text, candidate
137
+ ).ratio()
138
+ if ratio > best_ratio:
139
+ best = [MatchResult(
140
+ start=start,
141
+ end=start + window_size,
142
+ matched_text=candidate,
143
+ match_type="fuzzy",
144
+ confidence=round(ratio, 4),
145
+ )]
146
+ best_ratio = ratio
147
+ elif ratio == best_ratio and best:
148
+ # Check overlap — only add if non-overlapping
149
+ overlaps = any(
150
+ not (start >= m.end or start + window_size <= m.start)
151
+ for m in best
152
+ )
153
+ if not overlaps:
154
+ best.append(MatchResult(
155
+ start=start,
156
+ end=start + window_size,
157
+ matched_text=candidate,
158
+ match_type="fuzzy",
159
+ confidence=round(ratio, 4),
160
+ ))
161
+
162
+ return best
163
+
164
+
165
+ def find_line_fuzzy_matches(
166
+ content: str, old_text: str, threshold: float
167
+ ) -> List[MatchResult]:
168
+ """Find best contiguous block of lines matching old_text lines."""
169
+ content_lines = content.splitlines(keepends=True)
170
+ old_lines = old_text.splitlines(keepends=True)
171
+
172
+ if not old_lines or not content_lines:
173
+ return []
174
+
175
+ n_old = len(old_lines)
176
+ best_score = threshold
177
+ best_matches: List[Tuple[int, int, float]] = [] # (start_line, end_line, score)
178
+
179
+ for start_line in range(0, len(content_lines) - n_old + 1):
180
+ block = content_lines[start_line:start_line + n_old]
181
+ # Compare line by line
182
+ total = 0.0
183
+ for ol, cl in zip(old_lines, block):
184
+ total += difflib.SequenceMatcher(None, ol, cl).ratio()
185
+ avg = total / n_old
186
+
187
+ if avg > best_score:
188
+ best_score = avg
189
+ best_matches = [(start_line, start_line + n_old, avg)]
190
+ elif avg == best_score and best_matches:
191
+ best_matches.append((start_line, start_line + n_old, avg))
192
+
193
+ results = []
194
+ for start_line, end_line, score in best_matches:
195
+ block = content_lines[start_line:end_line]
196
+ matched_text = ''.join(block)
197
+ # Calculate char offsets
198
+ char_start = sum(len(l) for l in content_lines[:start_line])
199
+ char_end = char_start + len(matched_text)
200
+ results.append(MatchResult(
201
+ start=char_start,
202
+ end=char_end,
203
+ matched_text=matched_text,
204
+ match_type="line_fuzzy",
205
+ confidence=round(score, 4),
206
+ ))
207
+ return results
208
+
209
+
210
+ def find_best_match(
211
+ content: str, old_text: str, threshold: float = 0.8
212
+ ) -> Optional[MatchResult]:
213
+ """Find the best match for old_text in content, trying strategies in order.
214
+
215
+ Returns None if no match meets the threshold, or if multiple ambiguous
216
+ matches are found (raises AmbiguousMatchError).
217
+ """
218
+ # Strategy 1: Exact match
219
+ matches = find_exact_matches(content, old_text)
220
+ if len(matches) == 1:
221
+ return matches[0]
222
+ if len(matches) > 1:
223
+ raise AmbiguousMatchError(matches)
224
+
225
+ # Strategy 2: Whitespace-normalized match
226
+ matches = find_whitespace_matches(content, old_text)
227
+ if len(matches) == 1:
228
+ return matches[0]
229
+ if len(matches) > 1:
230
+ raise AmbiguousMatchError(matches)
231
+
232
+ # Strategy 3: SequenceMatcher fuzzy match
233
+ matches = find_fuzzy_matches(content, old_text, threshold)
234
+ if len(matches) == 1:
235
+ return matches[0]
236
+ if len(matches) > 1:
237
+ raise AmbiguousMatchError(matches)
238
+
239
+ # Strategy 4: Line-by-line fuzzy match
240
+ matches = find_line_fuzzy_matches(content, old_text, threshold)
241
+ if len(matches) == 1:
242
+ return matches[0]
243
+ if len(matches) > 1:
244
+ raise AmbiguousMatchError(matches)
245
+
246
+ return None
247
+
248
+
249
+ class AmbiguousMatchError(Exception):
250
+ """Raised when multiple equally-good matches are found."""
251
+
252
+ def __init__(self, matches: List[MatchResult]):
253
+ self.matches = matches
254
+ super().__init__(f"Found {len(matches)} ambiguous matches")
255
+
256
+
257
+ def apply_edit(
258
+ file_path: str,
259
+ old_text: str,
260
+ new_text: str,
261
+ threshold: float = 0.8,
262
+ dry_run: bool = False,
263
+ ) -> EditResult:
264
+ """Apply a single edit to a file."""
265
+ try:
266
+ with open(file_path, 'r') as f:
267
+ content = f.read()
268
+ except FileNotFoundError:
269
+ return EditResult(
270
+ status="error",
271
+ file=file_path,
272
+ error=f"File not found: {file_path}",
273
+ )
274
+ except OSError as e:
275
+ return EditResult(
276
+ status="error",
277
+ file=file_path,
278
+ error=str(e),
279
+ )
280
+
281
+ try:
282
+ match = find_best_match(content, old_text, threshold)
283
+ except AmbiguousMatchError as e:
284
+ return EditResult(
285
+ status="ambiguous",
286
+ file=file_path,
287
+ match_type=e.matches[0].match_type,
288
+ confidence=e.matches[0].confidence,
289
+ error=f"Found {len(e.matches)} ambiguous matches",
290
+ )
291
+
292
+ if match is None:
293
+ return EditResult(
294
+ status="no_match",
295
+ file=file_path,
296
+ error="No match found above threshold",
297
+ )
298
+
299
+ new_content = content[:match.start] + new_text + content[match.end:]
300
+
301
+ if not dry_run:
302
+ with open(file_path, 'w') as f:
303
+ f.write(new_content)
304
+
305
+ return EditResult(
306
+ status="applied",
307
+ file=file_path,
308
+ match_type=match.match_type,
309
+ confidence=match.confidence,
310
+ matched_text=match.matched_text,
311
+ )
312
+
313
+
314
+ def result_to_dict(result: EditResult) -> dict:
315
+ """Convert EditResult to JSON-serializable dict."""
316
+ d = {"status": result.status, "file": result.file}
317
+ if result.match_type is not None:
318
+ d["match_type"] = result.match_type
319
+ if result.confidence is not None:
320
+ d["confidence"] = result.confidence
321
+ if result.matched_text is not None:
322
+ d["matched_text"] = result.matched_text
323
+ if result.error is not None:
324
+ d["error"] = result.error
325
+ return d
326
+
327
+
328
+ def parse_edit_input(args) -> List[dict]:
329
+ """Parse edit instructions from CLI args or stdin."""
330
+ if args.stdin:
331
+ data = json.load(sys.stdin)
332
+ if "edits" in data:
333
+ return data["edits"]
334
+ return [data]
335
+
336
+ if args.edit:
337
+ with open(args.edit, 'r') as f:
338
+ data = json.load(f)
339
+ if "edits" in data:
340
+ return data["edits"]
341
+ return [data]
342
+
343
+ if args.file and args.old is not None and args.new is not None:
344
+ return [{"file": args.file, "old_text": args.old, "new_text": args.new}]
345
+
346
+ raise ValueError(
347
+ "Must provide --file/--old/--new, --edit <file>, or --stdin"
348
+ )
349
+
350
+
351
+ def main(argv: Optional[List[str]] = None) -> int:
352
+ parser = argparse.ArgumentParser(
353
+ prog="hk",
354
+ description="HarnessKit — Fuzzy edit tool for LLM coding agents",
355
+ )
356
+ sub = parser.add_subparsers(dest="command")
357
+
358
+ apply_parser = sub.add_parser("apply", help="Apply edit(s) to file(s)")
359
+ apply_parser.add_argument("--file", help="Target file path")
360
+ apply_parser.add_argument("--old", help="Text to find")
361
+ apply_parser.add_argument("--new", help="Replacement text")
362
+ apply_parser.add_argument("--edit", help="JSON edit instruction file")
363
+ apply_parser.add_argument(
364
+ "--stdin", action="store_true", help="Read JSON from stdin"
365
+ )
366
+ apply_parser.add_argument(
367
+ "--threshold",
368
+ type=float,
369
+ default=0.8,
370
+ help="Fuzzy match threshold (default: 0.8)",
371
+ )
372
+ apply_parser.add_argument(
373
+ "--dry-run",
374
+ action="store_true",
375
+ help="Show what would change without applying",
376
+ )
377
+
378
+ args = parser.parse_args(argv)
379
+
380
+ if args.command != "apply":
381
+ parser.print_help()
382
+ return 1
383
+
384
+ try:
385
+ edits = parse_edit_input(args)
386
+ except (ValueError, FileNotFoundError, json.JSONDecodeError) as e:
387
+ print(json.dumps({"status": "error", "error": str(e)}))
388
+ return 1
389
+
390
+ results = []
391
+ exit_code = 0
392
+
393
+ for edit in edits:
394
+ file_path = edit.get("file", "")
395
+ old_text = edit.get("old_text", "")
396
+ new_text = edit.get("new_text", "")
397
+
398
+ result = apply_edit(
399
+ file_path, old_text, new_text,
400
+ threshold=args.threshold,
401
+ dry_run=args.dry_run,
402
+ )
403
+ results.append(result_to_dict(result))
404
+
405
+ if result.status == "no_match" or result.status == "error":
406
+ exit_code = max(exit_code, 1)
407
+ elif result.status == "ambiguous":
408
+ exit_code = max(exit_code, 2)
409
+
410
+ if len(results) == 1:
411
+ print(json.dumps(results[0], indent=2))
412
+ else:
413
+ print(json.dumps(results, indent=2))
414
+
415
+ return exit_code
416
+
417
+
418
+ if __name__ == "__main__":
419
+ sys.exit(main())
@@ -0,0 +1,30 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "harnesskit"
7
+ version = "0.1.0"
8
+ description = "Fuzzy edit tool for LLM coding agents — never fail a str_replace again"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ {name = "Alex Melges", email = "alex@melges.dev"},
14
+ ]
15
+ classifiers = [
16
+ "Development Status :: 3 - Alpha",
17
+ "Intended Audience :: Developers",
18
+ "Programming Language :: Python :: 3",
19
+ "Topic :: Software Development :: Libraries",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/alexmelges/harnesskit"
24
+ Repository = "https://github.com/alexmelges/harnesskit"
25
+
26
+ [project.scripts]
27
+ hk = "hk:main"
28
+
29
+ [tool.setuptools]
30
+ py-modules = ["hk"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+