claude-dev-env 1.69.2 → 1.70.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hooks/blocking/CLAUDE.md +1 -0
- package/hooks/blocking/claude_md_orphan_file_blocker.py +600 -0
- package/hooks/blocking/test_claude_md_orphan_file_blocker.py +587 -0
- package/hooks/hooks.json +5 -0
- package/hooks/hooks_constants/CLAUDE.md +1 -0
- package/hooks/hooks_constants/claude_md_orphan_file_blocker_constants.py +96 -0
- package/package.json +1 -1
- package/rules/CLAUDE.md +1 -0
- package/rules/claude-md-orphan-file.md +24 -0
- package/skills/autoconverge/workflow/autoconverge_report_constants/render_report_constants.py +36 -5
- package/skills/autoconverge/workflow/render_report.py +43 -5
- package/skills/autoconverge/workflow/test_render_report.py +43 -0
package/hooks/blocking/CLAUDE.md
CHANGED
|
@@ -58,6 +58,7 @@ The check modules it calls are the `code_rules_<concern>.py` files below.
|
|
|
58
58
|
|---|---|---|
|
|
59
59
|
| `block_main_commit.py` | PreToolUse (Bash) | `git commit`/`git push` directly to `main` |
|
|
60
60
|
| `bot_mention_comment_blocker.py` | PreToolUse (Write/Edit) | PR review comments that @-mention a bot |
|
|
61
|
+
| `claude_md_orphan_file_blocker.py` | PreToolUse (Write/Edit/MultiEdit) | Per-directory `CLAUDE.md` table cells naming a bare filename absent from the directory subtree |
|
|
61
62
|
| `convergence_gate_blocker.py` | PreToolUse (Bash) | Convergence workflow actions on a conflicting PR |
|
|
62
63
|
| `destructive_command_blocker.py` | PreToolUse (Bash/PowerShell) | Shell commands with destructive literals (`rm -rf`, `git reset --hard`, etc.) |
|
|
63
64
|
| `es_exe_path_rewriter.py` | PreToolUse | Rewrites paths referencing `.exe` under the Everything search path |
|
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""PreToolUse hook: blocks a per-directory CLAUDE.md table that names a file absent from its subtree.
|
|
3
|
+
|
|
4
|
+
A per-directory ``CLAUDE.md`` documents the files reachable from its own
|
|
5
|
+
directory in a markdown table whose first column names each file in backticks.
|
|
6
|
+
When a first-column cell names a bare filename that exists nowhere under the scan
|
|
7
|
+
root (the CLAUDE.md directory's parent, which covers the directory, its
|
|
8
|
+
subdirectories, and its siblings), the table points a reader at a file that is
|
|
9
|
+
not there. This hook fires on Write, Edit, and MultiEdit targeting a file named
|
|
10
|
+
``CLAUDE.md`` and blocks the write when any such cell names a file absent from
|
|
11
|
+
the scan root. A table block whose own region declares an explicit relative-path
|
|
12
|
+
source (a ``../`` token) documents files outside the subtree, so that block's
|
|
13
|
+
rows are left alone — the exemption is scoped to the block, not the whole file.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import TextIO
|
|
21
|
+
|
|
22
|
+
_hooks_dir = str(Path(__file__).resolve().parent.parent)
|
|
23
|
+
if _hooks_dir not in sys.path:
|
|
24
|
+
sys.path.insert(0, _hooks_dir)
|
|
25
|
+
|
|
26
|
+
from hooks_constants.claude_md_orphan_file_blocker_constants import ( # noqa: E402
|
|
27
|
+
ALL_REFERENCED_FILE_EXTENSIONS,
|
|
28
|
+
CLAUDE_MD_FILENAME,
|
|
29
|
+
CODE_FENCE_PATTERN,
|
|
30
|
+
FIRST_COLUMN_BACKTICK_PATTERN,
|
|
31
|
+
MAX_ORPHAN_FILE_ISSUES,
|
|
32
|
+
MAX_SUBTREE_FILES_SCANNED,
|
|
33
|
+
ORPHAN_FILE_ADDITIONAL_CONTEXT,
|
|
34
|
+
ORPHAN_FILE_MESSAGE_TEMPLATE,
|
|
35
|
+
ORPHAN_FILE_SYSTEM_MESSAGE,
|
|
36
|
+
REGION_BOUNDARY_PATTERN,
|
|
37
|
+
RELATIVE_PATH_SOURCE_PATTERN,
|
|
38
|
+
SEPARATOR_CELL_PATTERN,
|
|
39
|
+
TABLE_ROW_PATTERN,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def is_claude_md_file(file_path: str) -> bool:
|
|
44
|
+
"""Return whether *file_path* names a per-directory ``CLAUDE.md`` file.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
file_path: The destination path of the write or edit.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
True when the path's basename is exactly ``CLAUDE.md``.
|
|
51
|
+
"""
|
|
52
|
+
return os.path.basename(file_path) == CLAUDE_MD_FILENAME
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _first_table_cell(table_line: str) -> str:
|
|
56
|
+
"""Return the trimmed text of the first column cell in a markdown table row.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
table_line: A single line that begins with a pipe character.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The text between the leading pipe and the next pipe, stripped of
|
|
63
|
+
surrounding whitespace; an empty string when no cell is present.
|
|
64
|
+
"""
|
|
65
|
+
after_leading_pipe = table_line.strip().lstrip("|")
|
|
66
|
+
first_cell, _, _ = after_leading_pipe.partition("|")
|
|
67
|
+
return first_cell.strip()
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _referenced_filename_in_cell(cell_text: str) -> str | None:
|
|
71
|
+
"""Return the bare filename a table cell references, when it has one.
|
|
72
|
+
|
|
73
|
+
A cell references a bare filename only when its first backticked token has no
|
|
74
|
+
path separator, is not a slash-command, and carries a known file extension.
|
|
75
|
+
Subdirectory cells (trailing slash) and paths fall outside this scope and
|
|
76
|
+
yield None.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
cell_text: The trimmed text of a first-column table cell.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The bare filename to verify in the subtree, or None when the cell names
|
|
83
|
+
no bare file.
|
|
84
|
+
"""
|
|
85
|
+
backtick_match = FIRST_COLUMN_BACKTICK_PATTERN.search(cell_text)
|
|
86
|
+
if backtick_match is None:
|
|
87
|
+
return None
|
|
88
|
+
inner_text = backtick_match.group(1).strip()
|
|
89
|
+
if not inner_text:
|
|
90
|
+
return None
|
|
91
|
+
if inner_text.startswith("/"):
|
|
92
|
+
return None
|
|
93
|
+
if "/" in inner_text or "\\" in inner_text:
|
|
94
|
+
return None
|
|
95
|
+
_, extension = os.path.splitext(inner_text)
|
|
96
|
+
if extension.lower() not in ALL_REFERENCED_FILE_EXTENSIONS:
|
|
97
|
+
return None
|
|
98
|
+
return inner_text
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _filename_in_table_row(table_line: str) -> str | None:
|
|
102
|
+
"""Return the bare filename a markdown table row references, when it has one.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
table_line: A single line that begins with a pipe character.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
The bare filename in the row's first column, or None when the row is a
|
|
109
|
+
header-separator row or names no bare file.
|
|
110
|
+
"""
|
|
111
|
+
first_cell = _first_table_cell(table_line)
|
|
112
|
+
if not first_cell or SEPARATOR_CELL_PATTERN.match(first_cell):
|
|
113
|
+
return None
|
|
114
|
+
return _referenced_filename_in_cell(first_cell)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _declares_relative_path_source(text: str) -> bool:
|
|
118
|
+
"""Return whether *text* declares an explicit relative-path file source.
|
|
119
|
+
|
|
120
|
+
A ``../`` token signals that a table documents files in a sibling tree,
|
|
121
|
+
referenced by path rather than living in the CLAUDE.md's own subtree. The
|
|
122
|
+
block that carries such a token is out of scope, since its files legitimately
|
|
123
|
+
sit outside the subtree.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
text: A table block together with the prose that introduces it.
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
True when *text* contains a ``../`` relative-path token.
|
|
130
|
+
"""
|
|
131
|
+
return RELATIVE_PATH_SOURCE_PATTERN.search(text) is not None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def find_referenced_filenames(content: str) -> list[str]:
|
|
135
|
+
"""Return each bare filename a CLAUDE.md table references, in order.
|
|
136
|
+
|
|
137
|
+
Walks the content line by line, grouping it into table blocks. A table block
|
|
138
|
+
is a maximal run of consecutive markdown table rows; the prose region since
|
|
139
|
+
the most recent heading introduces it, so prose under one section never
|
|
140
|
+
introduces a table under a later section. A line inside a fenced code block
|
|
141
|
+
(between a ``` or ~~~
|
|
142
|
+
fence pair) is example or sample text, not a live table, so it contributes
|
|
143
|
+
nothing and never ends a block. A block whose introducing region or own rows
|
|
144
|
+
declare an explicit relative-path source (a ``../`` token) documents files in
|
|
145
|
+
a sibling tree, so its rows are skipped — the exemption is scoped to that
|
|
146
|
+
region and block, not the whole file. Every remaining block contributes the
|
|
147
|
+
bare filename each first-column cell names.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
content: The CLAUDE.md content being written.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Each referenced filename from a non-exempt table block, in the order it
|
|
154
|
+
appears; duplicates preserved.
|
|
155
|
+
"""
|
|
156
|
+
referenced_filenames: list[str] = []
|
|
157
|
+
pending_region: list[str] = []
|
|
158
|
+
current_block: list[str] = []
|
|
159
|
+
is_inside_code_fence = False
|
|
160
|
+
for each_line in content.splitlines():
|
|
161
|
+
if CODE_FENCE_PATTERN.match(each_line) is not None:
|
|
162
|
+
is_inside_code_fence = not is_inside_code_fence
|
|
163
|
+
continue
|
|
164
|
+
if is_inside_code_fence:
|
|
165
|
+
continue
|
|
166
|
+
if TABLE_ROW_PATTERN.match(each_line) is not None:
|
|
167
|
+
current_block.append(each_line)
|
|
168
|
+
continue
|
|
169
|
+
if current_block:
|
|
170
|
+
referenced_filenames.extend(_block_filenames(pending_region, current_block))
|
|
171
|
+
current_block = []
|
|
172
|
+
pending_region = []
|
|
173
|
+
if REGION_BOUNDARY_PATTERN.match(each_line) is not None:
|
|
174
|
+
pending_region = []
|
|
175
|
+
pending_region.append(each_line)
|
|
176
|
+
referenced_filenames.extend(_block_filenames(pending_region, current_block))
|
|
177
|
+
return referenced_filenames
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _block_filenames(all_region_lines: list[str], all_block_lines: list[str]) -> list[str]:
|
|
181
|
+
"""Return the bare filenames a table block contributes, honoring its exemption.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
all_region_lines: The prose lines accumulated before this block.
|
|
185
|
+
all_block_lines: The consecutive table rows that form the block.
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Each bare filename the block's first-column cells name, or an empty list
|
|
189
|
+
when the block (with its introducing region) declares a ``../`` source.
|
|
190
|
+
"""
|
|
191
|
+
if not all_block_lines:
|
|
192
|
+
return []
|
|
193
|
+
block_region = "\n".join(all_region_lines + all_block_lines)
|
|
194
|
+
if _declares_relative_path_source(block_region):
|
|
195
|
+
return []
|
|
196
|
+
block_filenames: list[str] = []
|
|
197
|
+
for each_line in all_block_lines:
|
|
198
|
+
each_filename = _filename_in_table_row(each_line)
|
|
199
|
+
if each_filename is not None:
|
|
200
|
+
block_filenames.append(each_filename)
|
|
201
|
+
return block_filenames
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _resolve_scan_root(claude_md_directory: Path) -> Path:
|
|
205
|
+
"""Return the directory whose subtree bounds the filename existence search.
|
|
206
|
+
|
|
207
|
+
The search root is the CLAUDE.md directory's parent when that parent exists,
|
|
208
|
+
so a table that documents files in a sibling directory or one level up still
|
|
209
|
+
resolves them. When the directory has no distinct parent, the CLAUDE.md
|
|
210
|
+
directory itself is the root.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
claude_md_directory: The directory that holds the target CLAUDE.md.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
The directory to walk when collecting candidate filenames.
|
|
217
|
+
"""
|
|
218
|
+
parent_directory = claude_md_directory.parent
|
|
219
|
+
if parent_directory == claude_md_directory:
|
|
220
|
+
return claude_md_directory
|
|
221
|
+
return parent_directory
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class _SubtreeScan:
|
|
225
|
+
"""The basenames a bounded subtree walk collected and whether it ran complete.
|
|
226
|
+
|
|
227
|
+
Attributes:
|
|
228
|
+
all_basenames: Each file basename the walk reached.
|
|
229
|
+
was_scan_complete: True when the walk visited the whole subtree within the
|
|
230
|
+
budget, so ``all_basenames`` is authoritative; False when the budget
|
|
231
|
+
truncated the walk, so a basename's absence from the set is not proof
|
|
232
|
+
of its absence on disk.
|
|
233
|
+
"""
|
|
234
|
+
|
|
235
|
+
def __init__(self, all_basenames: set[str], was_scan_complete: bool) -> None:
|
|
236
|
+
self.all_basenames = all_basenames
|
|
237
|
+
self.was_scan_complete = was_scan_complete
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _scan_subtree_basenames(scan_root: Path) -> _SubtreeScan:
|
|
241
|
+
"""Return the bounded basename scan of *scan_root*, skipping unreadable entries.
|
|
242
|
+
|
|
243
|
+
Walks the subtree collecting each file's basename, stopping once the scan
|
|
244
|
+
budget is reached. A per-entry stat error skips that entry. The result records
|
|
245
|
+
whether the walk completed within the budget, so the caller knows whether the
|
|
246
|
+
set is authoritative.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
scan_root: The directory whose subtree bounds the existence search.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
The collected basenames paired with the scan-completeness flag.
|
|
253
|
+
"""
|
|
254
|
+
all_basenames: set[str] = set()
|
|
255
|
+
scanned_count = 0
|
|
256
|
+
for each_path in scan_root.rglob("*"):
|
|
257
|
+
try:
|
|
258
|
+
if not each_path.is_file():
|
|
259
|
+
continue
|
|
260
|
+
except OSError:
|
|
261
|
+
continue
|
|
262
|
+
all_basenames.add(each_path.name)
|
|
263
|
+
scanned_count += 1
|
|
264
|
+
if scanned_count >= MAX_SUBTREE_FILES_SCANNED:
|
|
265
|
+
return _SubtreeScan(all_basenames, was_scan_complete=False)
|
|
266
|
+
return _SubtreeScan(all_basenames, was_scan_complete=True)
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _filename_exists_under(scan_root: Path, filename: str) -> bool:
|
|
270
|
+
"""Return whether a file with basename *filename* exists anywhere under root.
|
|
271
|
+
|
|
272
|
+
A direct probe that resolves one filename deterministically even when the
|
|
273
|
+
bounded subtree walk was truncated. An unreadable entry mid-walk is skipped.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
scan_root: The directory whose subtree bounds the existence search.
|
|
277
|
+
filename: The bare basename to look for.
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
True when at least one matching file is reachable under the scan root.
|
|
281
|
+
"""
|
|
282
|
+
for each_match in scan_root.rglob(filename):
|
|
283
|
+
try:
|
|
284
|
+
if each_match.is_file():
|
|
285
|
+
return True
|
|
286
|
+
except OSError:
|
|
287
|
+
continue
|
|
288
|
+
return False
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def _present_referenced_filenames(
|
|
292
|
+
all_referenced_filenames: list[str], scan_root: Path
|
|
293
|
+
) -> set[str]:
|
|
294
|
+
"""Return the referenced filenames that exist under the scan root.
|
|
295
|
+
|
|
296
|
+
A complete bounded walk yields an authoritative basename set, so membership in
|
|
297
|
+
it decides presence. When the budget truncated the walk, a name absent from
|
|
298
|
+
the partial set is probed directly with ``rglob`` so a truncated slice never
|
|
299
|
+
produces a false-missing verdict.
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
all_referenced_filenames: The bare filenames a CLAUDE.md table names.
|
|
303
|
+
scan_root: The directory whose subtree bounds the existence search.
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
The subset of *all_referenced_filenames* that resolve to an existing file.
|
|
307
|
+
"""
|
|
308
|
+
subtree_scan = _scan_subtree_basenames(scan_root)
|
|
309
|
+
present_filenames: set[str] = set()
|
|
310
|
+
for each_filename in all_referenced_filenames:
|
|
311
|
+
if each_filename in subtree_scan.all_basenames:
|
|
312
|
+
present_filenames.add(each_filename)
|
|
313
|
+
continue
|
|
314
|
+
if subtree_scan.was_scan_complete:
|
|
315
|
+
continue
|
|
316
|
+
if _filename_exists_under(scan_root, each_filename):
|
|
317
|
+
present_filenames.add(each_filename)
|
|
318
|
+
return present_filenames
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def find_missing_filenames(content: str, claude_md_directory: Path) -> list[str]:
|
|
322
|
+
"""Return the referenced filenames absent from the CLAUDE.md's scan root.
|
|
323
|
+
|
|
324
|
+
A referenced filename is missing when it exists nowhere under the scan root
|
|
325
|
+
— the CLAUDE.md directory's parent (or the directory itself when it has no
|
|
326
|
+
distinct parent), which covers the directory, its subdirectories, and its
|
|
327
|
+
siblings. A table block that declares an explicit relative-path source (a
|
|
328
|
+
``../`` token in the block or the prose that introduces it) yields no findings
|
|
329
|
+
for that block's rows, since those files legitimately live elsewhere; an
|
|
330
|
+
unrelated block in the same file is still checked. A filesystem error that
|
|
331
|
+
halts the whole subtree walk yields no findings (fail open), so an unreadable
|
|
332
|
+
tree never blocks a write.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
content: The CLAUDE.md content being written.
|
|
336
|
+
claude_md_directory: The directory that holds the target CLAUDE.md.
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Each referenced filename with no matching file under the scan root, in
|
|
340
|
+
first-seen order with duplicates removed, capped at the issue budget.
|
|
341
|
+
"""
|
|
342
|
+
referenced_filenames = find_referenced_filenames(content)
|
|
343
|
+
scan_root = _resolve_scan_root(claude_md_directory)
|
|
344
|
+
try:
|
|
345
|
+
present_filenames = _present_referenced_filenames(referenced_filenames, scan_root)
|
|
346
|
+
except OSError:
|
|
347
|
+
return []
|
|
348
|
+
missing_filenames: list[str] = []
|
|
349
|
+
already_reported: set[str] = set()
|
|
350
|
+
for each_filename in referenced_filenames:
|
|
351
|
+
if each_filename in already_reported:
|
|
352
|
+
continue
|
|
353
|
+
if each_filename in present_filenames:
|
|
354
|
+
continue
|
|
355
|
+
already_reported.add(each_filename)
|
|
356
|
+
missing_filenames.append(each_filename)
|
|
357
|
+
if len(missing_filenames) >= MAX_ORPHAN_FILE_ISSUES:
|
|
358
|
+
break
|
|
359
|
+
return missing_filenames
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _read_existing_file_content(file_path: str) -> str | None:
|
|
363
|
+
"""Return the current on-disk content of *file_path*, or None when unreadable.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
file_path: The path of the file the edit targets.
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
The file's text, or None when the file is missing or cannot be decoded.
|
|
370
|
+
"""
|
|
371
|
+
try:
|
|
372
|
+
return Path(file_path).read_text(encoding="utf-8")
|
|
373
|
+
except (OSError, UnicodeDecodeError):
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def _apply_edits(existing_content: str, all_edits: list[dict]) -> str:
|
|
378
|
+
"""Return *existing_content* with each MultiEdit replacement applied in order.
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
existing_content: The current on-disk file content.
|
|
382
|
+
all_edits: The MultiEdit ``edits`` list, each a mapping with an
|
|
383
|
+
``old_string`` and a ``new_string``.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
The content after replacing the first occurrence of each edit's
|
|
387
|
+
``old_string`` with its ``new_string``, in list order.
|
|
388
|
+
"""
|
|
389
|
+
edited_content = existing_content
|
|
390
|
+
for each_edit in all_edits:
|
|
391
|
+
if not isinstance(each_edit, dict):
|
|
392
|
+
continue
|
|
393
|
+
old_string = each_edit.get("old_string", "")
|
|
394
|
+
new_string = each_edit.get("new_string", "")
|
|
395
|
+
if isinstance(old_string, str) and isinstance(new_string, str) and old_string:
|
|
396
|
+
edited_content = edited_content.replace(old_string, new_string, 1)
|
|
397
|
+
return edited_content
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
def _edit_fragments(all_edits: list[dict]) -> list[str]:
|
|
401
|
+
"""Return each MultiEdit ``new_string`` fragment present as a non-empty string.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
all_edits: The MultiEdit ``edits`` list.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
Every ``new_string`` value that is a non-empty string, in list order.
|
|
408
|
+
"""
|
|
409
|
+
all_fragments: list[str] = []
|
|
410
|
+
for each_edit in all_edits:
|
|
411
|
+
if not isinstance(each_edit, dict):
|
|
412
|
+
continue
|
|
413
|
+
new_string = each_edit.get("new_string", "")
|
|
414
|
+
if isinstance(new_string, str) and new_string:
|
|
415
|
+
all_fragments.append(new_string)
|
|
416
|
+
return all_fragments
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class _OrphanScanPlan:
|
|
420
|
+
"""The contents to scan for orphans and the pre-existing orphans to exclude.
|
|
421
|
+
|
|
422
|
+
Attributes:
|
|
423
|
+
candidate_contents: Each content string whose table rows are scanned.
|
|
424
|
+
baseline_missing_filenames: The orphan filenames the file already held
|
|
425
|
+
before this edit; reporting excludes them so an unrelated edit over a
|
|
426
|
+
pre-existing orphan on an untouched line is not blocked. Empty for a
|
|
427
|
+
Write (the whole file is replaced) and for an edit whose existing file
|
|
428
|
+
cannot be read.
|
|
429
|
+
"""
|
|
430
|
+
|
|
431
|
+
def __init__(
|
|
432
|
+
self, all_candidate_contents: list[str], all_baseline_missing_filenames: set[str]
|
|
433
|
+
) -> None:
|
|
434
|
+
self.candidate_contents = all_candidate_contents
|
|
435
|
+
self.baseline_missing_filenames = all_baseline_missing_filenames
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _build_orphan_scan_plan(
|
|
439
|
+
tool_name: str, tool_input: dict, file_path: str, claude_md_directory: Path
|
|
440
|
+
) -> _OrphanScanPlan:
|
|
441
|
+
"""Return the contents to scan and the pre-existing orphans to exclude.
|
|
442
|
+
|
|
443
|
+
For Write the candidate is the full new content with no baseline, so every
|
|
444
|
+
orphan it names is introduced by the write. For Edit and MultiEdit the
|
|
445
|
+
candidate is the existing file with the replacements applied, so a ``../``
|
|
446
|
+
source line outside the edited rows still exempts that table block; the
|
|
447
|
+
baseline is the orphan set the existing file already held, so a pre-existing
|
|
448
|
+
orphan on an untouched line is excluded and only an orphan the edit introduces
|
|
449
|
+
is reported. When the existing file cannot be read, the raw ``new_string``
|
|
450
|
+
fragment(s) are scanned with no baseline, so an orphan the edit itself adds is
|
|
451
|
+
still caught.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
tool_name: The intercepted tool — ``Write``, ``Edit``, or ``MultiEdit``.
|
|
455
|
+
tool_input: The tool's input payload.
|
|
456
|
+
file_path: The destination path of the write or edit.
|
|
457
|
+
claude_md_directory: The directory that holds the target CLAUDE.md.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
The scan plan pairing candidate contents with the baseline orphan set.
|
|
461
|
+
"""
|
|
462
|
+
if tool_name == "Write":
|
|
463
|
+
content = tool_input.get("content", "")
|
|
464
|
+
candidate_contents = [content] if isinstance(content, str) and content else []
|
|
465
|
+
return _OrphanScanPlan(candidate_contents, set())
|
|
466
|
+
all_edits = _edits_for_tool(tool_name, tool_input)
|
|
467
|
+
existing_content = _read_existing_file_content(file_path)
|
|
468
|
+
if existing_content is None:
|
|
469
|
+
return _OrphanScanPlan(_edit_fragments(all_edits), set())
|
|
470
|
+
baseline_missing = set(find_missing_filenames(existing_content, claude_md_directory))
|
|
471
|
+
return _OrphanScanPlan([_apply_edits(existing_content, all_edits)], baseline_missing)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _edits_for_tool(tool_name: str, tool_input: dict) -> list[dict]:
|
|
475
|
+
"""Return the edit mappings an Edit or MultiEdit payload carries.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
tool_name: The intercepted tool — ``Edit`` or ``MultiEdit``.
|
|
479
|
+
tool_input: The tool's input payload.
|
|
480
|
+
|
|
481
|
+
Returns:
|
|
482
|
+
A single-element list holding the Edit payload, or the MultiEdit
|
|
483
|
+
``edits`` list when it is present as a list; an empty list otherwise.
|
|
484
|
+
"""
|
|
485
|
+
if tool_name == "Edit":
|
|
486
|
+
return [tool_input]
|
|
487
|
+
all_edits = tool_input.get("edits", [])
|
|
488
|
+
return all_edits if isinstance(all_edits, list) else []
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _collect_missing_filenames(scan_plan: _OrphanScanPlan, claude_md_directory: Path) -> list[str]:
|
|
492
|
+
"""Return every orphan filename the scan plan introduces, excluding baselines.
|
|
493
|
+
|
|
494
|
+
An orphan the file already held before the edit (a member of the plan's
|
|
495
|
+
baseline set) is excluded, so an unrelated edit over a pre-existing orphan on
|
|
496
|
+
an untouched line reports nothing.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
scan_plan: The candidate contents to scan paired with the baseline orphan
|
|
500
|
+
set to exclude.
|
|
501
|
+
claude_md_directory: The directory that holds the target CLAUDE.md.
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Each introduced orphan filename in first-seen order with duplicates
|
|
505
|
+
removed, capped at the issue budget.
|
|
506
|
+
"""
|
|
507
|
+
missing_filenames: list[str] = []
|
|
508
|
+
already_reported: set[str] = set()
|
|
509
|
+
for each_content in scan_plan.candidate_contents:
|
|
510
|
+
for each_filename in find_missing_filenames(each_content, claude_md_directory):
|
|
511
|
+
if each_filename in scan_plan.baseline_missing_filenames:
|
|
512
|
+
continue
|
|
513
|
+
if each_filename in already_reported:
|
|
514
|
+
continue
|
|
515
|
+
already_reported.add(each_filename)
|
|
516
|
+
missing_filenames.append(each_filename)
|
|
517
|
+
if len(missing_filenames) >= MAX_ORPHAN_FILE_ISSUES:
|
|
518
|
+
return missing_filenames
|
|
519
|
+
return missing_filenames
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def _build_block_payload(all_missing_filenames: list[str], directory: str) -> dict:
|
|
523
|
+
"""Build the PreToolUse deny payload listing each missing filename.
|
|
524
|
+
|
|
525
|
+
Args:
|
|
526
|
+
all_missing_filenames: The referenced filenames absent from the subtree.
|
|
527
|
+
directory: The directory that holds the target CLAUDE.md.
|
|
528
|
+
|
|
529
|
+
Returns:
|
|
530
|
+
The hook-result dictionary the harness reads to deny the write.
|
|
531
|
+
"""
|
|
532
|
+
formatted_missing = ", ".join(f"`{each_name}`" for each_name in all_missing_filenames)
|
|
533
|
+
reason = ORPHAN_FILE_MESSAGE_TEMPLATE.format(directory=directory, missing=formatted_missing)
|
|
534
|
+
return {
|
|
535
|
+
"hookSpecificOutput": {
|
|
536
|
+
"hookEventName": "PreToolUse",
|
|
537
|
+
"permissionDecision": "deny",
|
|
538
|
+
"permissionDecisionReason": reason,
|
|
539
|
+
"additionalContext": ORPHAN_FILE_ADDITIONAL_CONTEXT,
|
|
540
|
+
},
|
|
541
|
+
"systemMessage": ORPHAN_FILE_SYSTEM_MESSAGE,
|
|
542
|
+
"suppressOutput": True,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _emit_hook_result(all_hook_data: dict, output_stream: TextIO) -> None:
|
|
547
|
+
"""Write the hook result JSON to the given output stream.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
all_hook_data: The hook-result dictionary to serialize.
|
|
551
|
+
output_stream: The stream the harness reads the decision from.
|
|
552
|
+
"""
|
|
553
|
+
output_stream.write(json.dumps(all_hook_data) + "\n")
|
|
554
|
+
output_stream.flush()
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
def main() -> None:
|
|
558
|
+
"""Read the PreToolUse payload from stdin and block an orphan-file CLAUDE.md."""
|
|
559
|
+
try:
|
|
560
|
+
input_data = json.load(sys.stdin)
|
|
561
|
+
except json.JSONDecodeError:
|
|
562
|
+
sys.exit(0)
|
|
563
|
+
|
|
564
|
+
if not isinstance(input_data, dict):
|
|
565
|
+
sys.exit(0)
|
|
566
|
+
|
|
567
|
+
tool_name = input_data.get("tool_name", "")
|
|
568
|
+
if not isinstance(tool_name, str):
|
|
569
|
+
sys.exit(0)
|
|
570
|
+
|
|
571
|
+
tool_input = input_data.get("tool_input", {})
|
|
572
|
+
if not isinstance(tool_input, dict):
|
|
573
|
+
sys.exit(0)
|
|
574
|
+
|
|
575
|
+
if tool_name not in ("Write", "Edit", "MultiEdit"):
|
|
576
|
+
sys.exit(0)
|
|
577
|
+
|
|
578
|
+
file_path = tool_input.get("file_path", "")
|
|
579
|
+
if not isinstance(file_path, str) or not is_claude_md_file(file_path):
|
|
580
|
+
sys.exit(0)
|
|
581
|
+
|
|
582
|
+
claude_md_directory = Path(file_path).resolve().parent
|
|
583
|
+
if not claude_md_directory.is_dir():
|
|
584
|
+
sys.exit(0)
|
|
585
|
+
|
|
586
|
+
scan_plan = _build_orphan_scan_plan(tool_name, tool_input, file_path, claude_md_directory)
|
|
587
|
+
if not scan_plan.candidate_contents:
|
|
588
|
+
sys.exit(0)
|
|
589
|
+
|
|
590
|
+
missing_filenames = _collect_missing_filenames(scan_plan, claude_md_directory)
|
|
591
|
+
if not missing_filenames:
|
|
592
|
+
sys.exit(0)
|
|
593
|
+
|
|
594
|
+
block_payload = _build_block_payload(missing_filenames, str(claude_md_directory))
|
|
595
|
+
_emit_hook_result(block_payload, sys.stdout)
|
|
596
|
+
sys.exit(0)
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
if __name__ == "__main__":
|
|
600
|
+
main()
|