mcp-hashline-edit-server 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Mario Zechner (Pi — original codebase)
4
+ Copyright (c) 2025-2026 Can Boluk (oh-my-pi — hashline edit format and implementation)
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,143 @@
1
+ # mcp-hashline-edit-server
2
+
3
+ An MCP (Model Context Protocol) server that provides hashline-based file editing tools — line-addressed edits using content hashes for integrity verification.
4
+
5
+ Based on the [hashline edit format](https://blog.can.ac/2026/02/12/the-harness-problem/) from [oh-my-pi](https://github.com/can1357/oh-my-pi).
6
+
7
+ ## What is Hashline?
8
+
9
+ When an LLM reads a file, every line comes back tagged with a short content hash:
10
+
11
+ ```
12
+ 1:a3|function hello() {
13
+ 2:f1| return "world";
14
+ 3:0e|}
15
+ ```
16
+
17
+ When editing, the model references those tags — "replace line `2:f1`", "replace range `1:a3` through `3:0e`", "insert after `3:0e`". If the file changed since the last read, the hashes won't match and the edit is rejected before anything gets corrupted.
18
+
19
+ This means the model doesn't need to reproduce old content (or whitespace) to identify what it wants to change. Benchmark results show hashline matches or beats traditional `str_replace` for most models, with the weakest models gaining the most (up to 10x improvement).
20
+
21
+ ## Requirements
22
+
23
+ - [Bun](https://bun.sh/) runtime (uses `Bun.hash.xxHash32` for line hashing)
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ cd mcp-hashline-edit-server
29
+ bun install
30
+ ```
31
+
32
+ ## Usage
33
+
34
+ ### With Claude Desktop / Cursor / any MCP client
35
+
36
+ Add to your MCP configuration:
37
+
38
+ ```json
39
+ {
40
+ "mcpServers": {
41
+ "hashline-edit": {
42
+ "command": "bun",
43
+ "args": ["run", "/path/to/mcp-hashline-edit-server/src/index.ts"]
44
+ }
45
+ }
46
+ }
47
+ ```
48
+
49
+ ### Running directly
50
+
51
+ ```bash
52
+ bun run src/index.ts
53
+ ```
54
+
55
+ The server communicates via stdio using the MCP protocol.
56
+
57
+ ## Tools
58
+
59
+ ### `read_file`
60
+
61
+ Read a file with hashline-prefixed output (`LINE:HASH|content`).
62
+
63
+ | Parameter | Type | Description |
64
+ |-----------|------|-------------|
65
+ | `path` | string | File path (relative or absolute) |
66
+ | `offset` | number? | Line number to start from (1-indexed) |
67
+ | `limit` | number? | Max lines to read (default: 2000) |
68
+
69
+ ### `edit_file`
70
+
71
+ Edit a file using hash-verified line references. Supports four edit variants:
72
+
73
+ **`set_line`** — Replace a single line:
74
+ ```json
75
+ {"set_line": {"anchor": "2:f1", "new_text": " return \"universe\";"}}
76
+ ```
77
+
78
+ **`replace_lines`** — Replace a range:
79
+ ```json
80
+ {"replace_lines": {"start_anchor": "1:a3", "end_anchor": "3:0e", "new_text": "function greet() {\n return \"hi\";\n}"}}
81
+ ```
82
+
83
+ **`insert_after`** — Insert after a line:
84
+ ```json
85
+ {"insert_after": {"anchor": "1:a3", "text": " // new comment"}}
86
+ ```
87
+
88
+ **`replace`** — Substring fuzzy replace (fallback, no hashes needed):
89
+ ```json
90
+ {"replace": {"old_text": "return \"world\"", "new_text": "return \"universe\""}}
91
+ ```
92
+
93
+ | Parameter | Type | Description |
94
+ |-----------|------|-------------|
95
+ | `path` | string | File path |
96
+ | `edits` | array | Array of edit operations |
97
+
98
+ All edits are validated atomically against the file as last read. Edits are sorted and applied bottom-up automatically.
99
+
100
+ ### `write_file`
101
+
102
+ Create or overwrite a file.
103
+
104
+ | Parameter | Type | Description |
105
+ |-----------|------|-------------|
106
+ | `path` | string | File path |
107
+ | `content` | string | Content to write |
108
+
109
+ ### `grep`
110
+
111
+ Search files with hashline-prefixed results.
112
+
113
+ | Parameter | Type | Description |
114
+ |-----------|------|-------------|
115
+ | `pattern` | string | Regex pattern |
116
+ | `path` | string? | File or directory to search |
117
+ | `glob` | string? | Filter by glob (e.g., `*.js`) |
118
+ | `type` | string? | Filter by file type |
119
+ | `i` | boolean? | Case-insensitive |
120
+ | `pre` | number? | Context lines before |
121
+ | `post` | number? | Context lines after |
122
+ | `limit` | number? | Max matches (default: 100) |
123
+
124
+ Requires `rg` (ripgrep) installed on the system.
125
+
126
+ ## How the Hash Works
127
+
128
+ 1. Strip trailing `\r`
129
+ 2. Remove all whitespace from the line
130
+ 3. Compute `xxHash32` on the whitespace-stripped string
131
+ 4. Modulo 256, encode as 2-character lowercase hex (`00`-`ff`)
132
+
133
+ The hash is whitespace-insensitive — indentation changes alone don't change the hash, making references robust against reformatting.
134
+
135
+ ## Error Recovery
136
+
137
+ **Hash mismatch**: The error shows updated `LINE:HASH` refs with `>>>` markers. Copy the new refs and retry.
138
+
139
+ **No-op error**: The replacement text matches current content. Re-read the file to see current state.
140
+
141
+ ## License
142
+
143
+ MIT
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "mcp-hashline-edit-server",
3
+ "version": "0.1.0",
4
+ "description": "MCP server providing hashline-based file editing tools — line-addressed edits using content hashes for integrity verification",
5
+ "type": "module",
6
+ "main": "src/index.ts",
7
+ "bin": {
8
+ "mcp-hashline-edit-server": "src/index.ts"
9
+ },
10
+ "license": "MIT",
11
+ "author": "Submerisble",
12
+ "repository": {
13
+ "type": "git",
14
+ "url": "https://github.com/Submerisble/mcp-hashline-edit-server"
15
+ },
16
+ "keywords": ["mcp", "hashline", "editor", "coding-agent", "model-context-protocol"],
17
+ "files": ["src", "README.md"],
18
+ "scripts": {
19
+ "start": "bun run src/index.ts",
20
+ "dev": "bun --watch src/index.ts"
21
+ },
22
+ "dependencies": {
23
+ "@modelcontextprotocol/sdk": "^1.12.1",
24
+ "diff": "^7.0.0"
25
+ },
26
+ "devDependencies": {
27
+ "@types/bun": "latest",
28
+ "@types/diff": "^7.0.2",
29
+ "typescript": "^5.9.3"
30
+ }
31
+ }
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Tool descriptions shown to the LLM via MCP.
3
+ */
4
+
5
+ export const READ_FILE_DESCRIPTION = `Read a file from the filesystem with hashline-prefixed output.
6
+
7
+ **IMPORTANT: Use this instead of built-in read tools (Cursor Read, Claude Read). This tool provides content-hashed line references required by edit_file.**
8
+
9
+ Each line is tagged with a content hash: \`LINE:HASH|content\`.
10
+ The LINE:HASH pairs serve as stable, verifiable anchors for the edit_file tool.
11
+
12
+ Use \`offset\` and \`limit\` for large files (reads up to 2000 lines by default).
13
+ Supports text files only.`;
14
+
15
+ export const EDIT_FILE_DESCRIPTION = `Edit a file using hash-verified line references.
16
+
17
+ **IMPORTANT: Use this instead of built-in edit tools (Cursor StrReplace, Claude Edit). This tool provides hash-verified edits with fuzzy matching, multi-edit batching, and automatic conflict detection.**
18
+
19
+ Read a file first to get LINE:HASH pairs, then reference them to make edits.
20
+ All edits in one call are validated against the file as last read — line numbers
21
+ and hashes refer to the original state, not after earlier edits in the same array.
22
+
23
+ **Critical rules:**
24
+ - Copy LINE:HASH refs verbatim from read output — never fabricate or guess hashes
25
+ - new_text/text contains plain replacement lines only — no LINE:HASH prefix, no diff + markers
26
+ - On hash mismatch: use the updated LINE:HASH refs shown by >>> directly
27
+ - If you already edited a file in this turn, re-read before the next edit
28
+ - new_text must differ from the current line content — identical content is rejected
29
+
30
+ **Edit variants:**
31
+ - \`set_line\`: Replace a single line by its LINE:HASH anchor
32
+ - \`replace_lines\`: Replace a range of lines (start_anchor through end_anchor)
33
+ - \`insert_after\`: Insert new lines after the anchor line
34
+ - \`replace\`: Substring-style fuzzy replace (no LINE:HASH needed; fallback when refs unavailable)
35
+
36
+ **Atomicity:** Edits are sorted and applied bottom-up automatically.
37
+ new_text: "" means delete (for set_line/replace_lines).
38
+
39
+ **Recovery:**
40
+ - Hash mismatch (>>> error): copy updated LINE:HASH refs from error and retry
41
+ - No-op error: your replacement matches current content — re-read the file
42
+ - After successful edit, re-read before making another edit to same file`;
43
+
44
+ export const WRITE_FILE_DESCRIPTION = `Create or overwrite a file at the specified path.
45
+
46
+ **IMPORTANT: Use this instead of built-in write tools (Cursor Write, Claude Write) to keep all file operations consistent through this server.**
47
+
48
+ Prefer edit_file for modifying existing files (more precise, preserves formatting).
49
+ Use this for creating new files or when full replacement is simpler.`;
50
+
51
+ export const GREP_DESCRIPTION = `Search for text patterns in files using regex.
52
+
53
+ **IMPORTANT: Use this instead of built-in search tools (Cursor Grep, Claude Grep). This tool returns hashline-prefixed results whose LINE:HASH pairs can be passed directly to edit_file for verified edits.**
54
+
55
+ Results show hashline-prefixed matches: \`LINE:HASH|content\`.
56
+ Match lines are prefixed with \`>>\`, context lines with spaces.
57
+ The LINE:HASH pairs can be used directly with edit_file.
58
+
59
+ Supports:
60
+ - Full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
61
+ - File filtering by glob pattern or file type
62
+ - Context lines before/after matches
63
+ - Result limiting and offset for pagination`;
package/src/diff.ts ADDED
@@ -0,0 +1,154 @@
1
+ /**
2
+ * Diff generation and replace-mode utilities.
3
+ */
4
+ import * as Diff from "diff";
5
+ import { DEFAULT_FUZZY_THRESHOLD, findMatch, type FuzzyMatch } from "./fuzzy";
6
+ import { adjustIndentation, normalizeToLF } from "./normalize";
7
+
8
+ export interface DiffResult {
9
+ diff: string;
10
+ firstChangedLine: number | undefined;
11
+ }
12
+
13
+ export interface ReplaceOptions {
14
+ fuzzy: boolean;
15
+ all: boolean;
16
+ threshold?: number;
17
+ }
18
+
19
+ export interface ReplaceResult {
20
+ content: string;
21
+ count: number;
22
+ }
23
+
24
+ function countContentLines(content: string): number {
25
+ const lines = content.split("\n");
26
+ if (lines.length > 1 && lines[lines.length - 1] === "") lines.pop();
27
+ return Math.max(1, lines.length);
28
+ }
29
+
30
+ function formatNumberedDiffLine(prefix: "+" | "-" | " ", lineNum: number, width: number, content: string): string {
31
+ const padded = String(lineNum).padStart(width, " ");
32
+ return `${prefix}${padded}|${content}`;
33
+ }
34
+
35
+ export function generateDiffString(oldContent: string, newContent: string, contextLines = 4): DiffResult {
36
+ const parts = Diff.diffLines(oldContent, newContent);
37
+ const output: string[] = [];
38
+ const maxLineNum = Math.max(countContentLines(oldContent), countContentLines(newContent));
39
+ const lineNumWidth = String(maxLineNum).length;
40
+ let oldLineNum = 1;
41
+ let newLineNum = 1;
42
+ let lastWasChange = false;
43
+ let firstChangedLine: number | undefined;
44
+
45
+ for (let i = 0; i < parts.length; i++) {
46
+ const part = parts[i];
47
+ const raw = part.value.split("\n");
48
+ if (raw[raw.length - 1] === "") raw.pop();
49
+
50
+ if (part.added || part.removed) {
51
+ if (firstChangedLine === undefined) firstChangedLine = newLineNum;
52
+ for (const line of raw) {
53
+ if (part.added) {
54
+ output.push(formatNumberedDiffLine("+", newLineNum, lineNumWidth, line));
55
+ newLineNum++;
56
+ } else {
57
+ output.push(formatNumberedDiffLine("-", oldLineNum, lineNumWidth, line));
58
+ oldLineNum++;
59
+ }
60
+ }
61
+ lastWasChange = true;
62
+ } else {
63
+ const nextPartIsChange = i < parts.length - 1 && (parts[i + 1].added || parts[i + 1].removed);
64
+ if (lastWasChange || nextPartIsChange) {
65
+ let linesToShow = raw;
66
+ let skipStart = 0;
67
+ let skipEnd = 0;
68
+ if (!lastWasChange) {
69
+ skipStart = Math.max(0, raw.length - contextLines);
70
+ linesToShow = raw.slice(skipStart);
71
+ }
72
+ if (!nextPartIsChange && linesToShow.length > contextLines) {
73
+ skipEnd = linesToShow.length - contextLines;
74
+ linesToShow = linesToShow.slice(0, contextLines);
75
+ }
76
+ if (skipStart > 0) {
77
+ output.push(formatNumberedDiffLine(" ", oldLineNum, lineNumWidth, "..."));
78
+ oldLineNum += skipStart;
79
+ newLineNum += skipStart;
80
+ }
81
+ for (const line of linesToShow) {
82
+ output.push(formatNumberedDiffLine(" ", oldLineNum, lineNumWidth, line));
83
+ oldLineNum++;
84
+ newLineNum++;
85
+ }
86
+ if (skipEnd > 0) {
87
+ output.push(formatNumberedDiffLine(" ", oldLineNum, lineNumWidth, "..."));
88
+ oldLineNum += skipEnd;
89
+ newLineNum += skipEnd;
90
+ }
91
+ } else {
92
+ oldLineNum += raw.length;
93
+ newLineNum += raw.length;
94
+ }
95
+ lastWasChange = false;
96
+ }
97
+ }
98
+ return { diff: output.join("\n"), firstChangedLine };
99
+ }
100
+
101
+ export function replaceText(content: string, oldText: string, newText: string, options: ReplaceOptions): ReplaceResult {
102
+ if (oldText.length === 0) throw new Error("oldText must not be empty.");
103
+ const threshold = options.threshold ?? DEFAULT_FUZZY_THRESHOLD;
104
+ let normalizedContent = normalizeToLF(content);
105
+ const normalizedOldText = normalizeToLF(oldText);
106
+ const normalizedNewText = normalizeToLF(newText);
107
+ let count = 0;
108
+
109
+ if (options.all) {
110
+ const exactCount = normalizedContent.split(normalizedOldText).length - 1;
111
+ if (exactCount > 0) {
112
+ return {
113
+ content: normalizedContent.split(normalizedOldText).join(normalizedNewText),
114
+ count: exactCount,
115
+ };
116
+ }
117
+ while (true) {
118
+ const matchOutcome = findMatch(normalizedContent, normalizedOldText, { allowFuzzy: options.fuzzy, threshold });
119
+ const shouldUseClosest =
120
+ options.fuzzy &&
121
+ matchOutcome.closest &&
122
+ matchOutcome.closest.confidence >= threshold &&
123
+ (matchOutcome.fuzzyMatches === undefined || matchOutcome.fuzzyMatches <= 1);
124
+ const match = matchOutcome.match || (shouldUseClosest ? matchOutcome.closest : undefined);
125
+ if (!match) break;
126
+ const adjustedNewText = adjustIndentation(normalizedOldText, match.actualText, normalizedNewText);
127
+ if (adjustedNewText === match.actualText) break;
128
+ normalizedContent =
129
+ normalizedContent.substring(0, match.startIndex) +
130
+ adjustedNewText +
131
+ normalizedContent.substring(match.startIndex + match.actualText.length);
132
+ count++;
133
+ }
134
+ return { content: normalizedContent, count };
135
+ }
136
+
137
+ const matchOutcome = findMatch(normalizedContent, normalizedOldText, { allowFuzzy: options.fuzzy, threshold });
138
+ if (matchOutcome.occurrences && matchOutcome.occurrences > 1) {
139
+ const previews = matchOutcome.occurrencePreviews?.join("\n\n") ?? "";
140
+ const moreMsg = matchOutcome.occurrences > 5 ? ` (showing first 5 of ${matchOutcome.occurrences})` : "";
141
+ throw new Error(
142
+ `Found ${matchOutcome.occurrences} occurrences${moreMsg}:\n\n${previews}\n\nAdd more context lines to disambiguate.`,
143
+ );
144
+ }
145
+ if (!matchOutcome.match) return { content: normalizedContent, count: 0 };
146
+
147
+ const match = matchOutcome.match;
148
+ const adjustedNewText = adjustIndentation(normalizedOldText, match.actualText, normalizedNewText);
149
+ normalizedContent =
150
+ normalizedContent.substring(0, match.startIndex) +
151
+ adjustedNewText +
152
+ normalizedContent.substring(match.startIndex + match.actualText.length);
153
+ return { content: normalizedContent, count: 1 };
154
+ }
package/src/fuzzy.ts ADDED
@@ -0,0 +1,243 @@
1
+ /**
2
+ * Fuzzy matching utilities for the edit tool.
3
+ *
4
+ * Provides both character-level and line-level fuzzy matching with progressive
5
+ * fallback strategies for finding text in files.
6
+ */
7
+ import { countLeadingWhitespace, normalizeForFuzzy, normalizeUnicode } from "./normalize";
8
+
9
+ // Constants
10
+
11
+ export const DEFAULT_FUZZY_THRESHOLD = 0.95;
12
+ const SEQUENCE_FUZZY_THRESHOLD = 0.92;
13
+ const FALLBACK_THRESHOLD = 0.8;
14
+ const PARTIAL_MATCH_MIN_LENGTH = 6;
15
+ const PARTIAL_MATCH_MIN_RATIO = 0.3;
16
+ const OCCURRENCE_PREVIEW_CONTEXT = 5;
17
+ const OCCURRENCE_PREVIEW_MAX_LEN = 80;
18
+
19
+ // Types
20
+
21
+ export interface FuzzyMatch {
22
+ actualText: string;
23
+ startIndex: number;
24
+ startLine: number;
25
+ confidence: number;
26
+ }
27
+
28
+ export interface MatchOutcome {
29
+ match?: FuzzyMatch;
30
+ closest?: FuzzyMatch;
31
+ occurrences?: number;
32
+ occurrenceLines?: number[];
33
+ occurrencePreviews?: string[];
34
+ fuzzyMatches?: number;
35
+ dominantFuzzy?: boolean;
36
+ }
37
+
38
+ // Core Algorithms
39
+
40
+ export function levenshteinDistance(a: string, b: string): number {
41
+ if (a === b) return 0;
42
+ const aLen = a.length;
43
+ const bLen = b.length;
44
+ if (aLen === 0) return bLen;
45
+ if (bLen === 0) return aLen;
46
+
47
+ let prev = new Array<number>(bLen + 1);
48
+ let curr = new Array<number>(bLen + 1);
49
+ for (let j = 0; j <= bLen; j++) prev[j] = j;
50
+
51
+ for (let i = 1; i <= aLen; i++) {
52
+ curr[0] = i;
53
+ const aCode = a.charCodeAt(i - 1);
54
+ for (let j = 1; j <= bLen; j++) {
55
+ const cost = aCode === b.charCodeAt(j - 1) ? 0 : 1;
56
+ const deletion = prev[j] + 1;
57
+ const insertion = curr[j - 1] + 1;
58
+ const substitution = prev[j - 1] + cost;
59
+ curr[j] = Math.min(deletion, insertion, substitution);
60
+ }
61
+ const tmp = prev;
62
+ prev = curr;
63
+ curr = tmp;
64
+ }
65
+ return prev[bLen];
66
+ }
67
+
68
+ export function similarity(a: string, b: string): number {
69
+ if (a.length === 0 && b.length === 0) return 1;
70
+ const maxLen = Math.max(a.length, b.length);
71
+ if (maxLen === 0) return 1;
72
+ const distance = levenshteinDistance(a, b);
73
+ return 1 - distance / maxLen;
74
+ }
75
+
76
+ // Line-Based Utilities
77
+
78
+ function computeRelativeIndentDepths(lines: string[]): number[] {
79
+ const indents = lines.map(countLeadingWhitespace);
80
+ const nonEmptyIndents: number[] = [];
81
+ for (let i = 0; i < lines.length; i++) {
82
+ if (lines[i].trim().length > 0) nonEmptyIndents.push(indents[i]);
83
+ }
84
+ const minIndent = nonEmptyIndents.length > 0 ? Math.min(...nonEmptyIndents) : 0;
85
+ const indentSteps = nonEmptyIndents.map((indent) => indent - minIndent).filter((step) => step > 0);
86
+ const indentUnit = indentSteps.length > 0 ? Math.min(...indentSteps) : 1;
87
+ return lines.map((line, index) => {
88
+ if (line.trim().length === 0) return 0;
89
+ if (indentUnit <= 0) return 0;
90
+ const relativeIndent = indents[index] - minIndent;
91
+ return Math.round(relativeIndent / indentUnit);
92
+ });
93
+ }
94
+
95
+ function normalizeLines(lines: string[], includeDepth = true): string[] {
96
+ const indentDepths = includeDepth ? computeRelativeIndentDepths(lines) : null;
97
+ return lines.map((line, index) => {
98
+ const trimmed = line.trim();
99
+ const prefix = indentDepths ? `${indentDepths[index]}|` : "|";
100
+ if (trimmed.length === 0) return prefix;
101
+ return `${prefix}${normalizeForFuzzy(trimmed)}`;
102
+ });
103
+ }
104
+
105
+ function computeLineOffsets(lines: string[]): number[] {
106
+ const offsets: number[] = [];
107
+ let offset = 0;
108
+ for (let i = 0; i < lines.length; i++) {
109
+ offsets.push(offset);
110
+ offset += lines[i].length;
111
+ if (i < lines.length - 1) offset += 1;
112
+ }
113
+ return offsets;
114
+ }
115
+
116
+ // Character-Level Fuzzy Match
117
+
118
+ interface BestFuzzyMatchResult {
119
+ best?: FuzzyMatch;
120
+ aboveThresholdCount: number;
121
+ secondBestScore: number;
122
+ }
123
+
124
+ function findBestFuzzyMatchCore(
125
+ contentLines: string[],
126
+ targetLines: string[],
127
+ offsets: number[],
128
+ threshold: number,
129
+ includeDepth: boolean,
130
+ ): BestFuzzyMatchResult {
131
+ const targetNormalized = normalizeLines(targetLines, includeDepth);
132
+ let best: FuzzyMatch | undefined;
133
+ let bestScore = -1;
134
+ let secondBestScore = -1;
135
+ let aboveThresholdCount = 0;
136
+
137
+ for (let start = 0; start <= contentLines.length - targetLines.length; start++) {
138
+ const windowLines = contentLines.slice(start, start + targetLines.length);
139
+ const windowNormalized = normalizeLines(windowLines, includeDepth);
140
+ let score = 0;
141
+ for (let i = 0; i < targetLines.length; i++) {
142
+ score += similarity(targetNormalized[i], windowNormalized[i]);
143
+ }
144
+ score = score / targetLines.length;
145
+
146
+ if (score >= threshold) aboveThresholdCount++;
147
+ if (score > bestScore) {
148
+ secondBestScore = bestScore;
149
+ bestScore = score;
150
+ best = {
151
+ actualText: windowLines.join("\n"),
152
+ startIndex: offsets[start],
153
+ startLine: start + 1,
154
+ confidence: score,
155
+ };
156
+ } else if (score > secondBestScore) {
157
+ secondBestScore = score;
158
+ }
159
+ }
160
+ return { best, aboveThresholdCount, secondBestScore };
161
+ }
162
+
163
+ function findBestFuzzyMatch(content: string, target: string, threshold: number): BestFuzzyMatchResult {
164
+ const contentLines = content.split("\n");
165
+ const targetLines = target.split("\n");
166
+ if (targetLines.length === 0 || target.length === 0) {
167
+ return { aboveThresholdCount: 0, secondBestScore: 0 };
168
+ }
169
+ if (targetLines.length > contentLines.length) {
170
+ return { aboveThresholdCount: 0, secondBestScore: 0 };
171
+ }
172
+ const offsets = computeLineOffsets(contentLines);
173
+ let result = findBestFuzzyMatchCore(contentLines, targetLines, offsets, threshold, true);
174
+
175
+ if (result.best && result.best.confidence < threshold && result.best.confidence >= FALLBACK_THRESHOLD) {
176
+ const noDepthResult = findBestFuzzyMatchCore(contentLines, targetLines, offsets, threshold, false);
177
+ if (noDepthResult.best && noDepthResult.best.confidence > result.best.confidence) {
178
+ result = noDepthResult;
179
+ }
180
+ }
181
+ return result;
182
+ }
183
+
184
+ /**
185
+ * Find a match for target text within content.
186
+ */
187
+ export function findMatch(
188
+ content: string,
189
+ target: string,
190
+ options: { allowFuzzy: boolean; threshold?: number },
191
+ ): MatchOutcome {
192
+ if (target.length === 0) return {};
193
+
194
+ // Try exact match first
195
+ const exactIndex = content.indexOf(target);
196
+ if (exactIndex !== -1) {
197
+ const occurrences = content.split(target).length - 1;
198
+ if (occurrences > 1) {
199
+ const contentLines = content.split("\n");
200
+ const occurrenceLines: number[] = [];
201
+ const occurrencePreviews: string[] = [];
202
+ let searchStart = 0;
203
+ for (let i = 0; i < 5; i++) {
204
+ const idx = content.indexOf(target, searchStart);
205
+ if (idx === -1) break;
206
+ const lineNumber = content.slice(0, idx).split("\n").length;
207
+ occurrenceLines.push(lineNumber);
208
+ const start = Math.max(0, lineNumber - 1 - OCCURRENCE_PREVIEW_CONTEXT);
209
+ const end = Math.min(contentLines.length, lineNumber + OCCURRENCE_PREVIEW_CONTEXT + 1);
210
+ const previewLines = contentLines.slice(start, end);
211
+ const preview = previewLines
212
+ .map((line, idx) => {
213
+ const num = start + idx + 1;
214
+ return ` ${num} | ${line.length > OCCURRENCE_PREVIEW_MAX_LEN ? `${line.slice(0, OCCURRENCE_PREVIEW_MAX_LEN - 1)}…` : line}`;
215
+ })
216
+ .join("\n");
217
+ occurrencePreviews.push(preview);
218
+ searchStart = idx + 1;
219
+ }
220
+ return { occurrences, occurrenceLines, occurrencePreviews };
221
+ }
222
+ const startLine = content.slice(0, exactIndex).split("\n").length;
223
+ return {
224
+ match: { actualText: target, startIndex: exactIndex, startLine, confidence: 1 },
225
+ };
226
+ }
227
+
228
+ // Try fuzzy match
229
+ const threshold = options.threshold ?? DEFAULT_FUZZY_THRESHOLD;
230
+ const { best, aboveThresholdCount, secondBestScore } = findBestFuzzyMatch(content, target, threshold);
231
+
232
+ if (!best) return {};
233
+
234
+ if (options.allowFuzzy && best.confidence >= threshold) {
235
+ if (aboveThresholdCount === 1) return { match: best, closest: best };
236
+ const dominantDelta = 0.08;
237
+ const dominantMin = 0.97;
238
+ if (aboveThresholdCount > 1 && best.confidence >= dominantMin && best.confidence - secondBestScore >= dominantDelta) {
239
+ return { match: best, closest: best, fuzzyMatches: aboveThresholdCount, dominantFuzzy: true };
240
+ }
241
+ }
242
+ return { closest: best, fuzzyMatches: aboveThresholdCount };
243
+ }