claude-dev-env 1.49.1 → 1.50.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/audit-rubrics/category_rubrics/category-a-api-contracts.md +17 -3
  2. package/audit-rubrics/prompts/category-a-api-contracts.md +17 -2
  3. package/docs/CODE_RULES.md +6 -1
  4. package/hooks/blocking/_gh_body_arg_utils.py +67 -11
  5. package/hooks/blocking/_md_to_html_blocker_test_support.py +65 -0
  6. package/hooks/blocking/code_rules_enforcer.py +386 -32
  7. package/hooks/blocking/conftest.py +30 -0
  8. package/hooks/blocking/md_to_html_blocker.py +2 -2
  9. package/hooks/blocking/pr_description_body_audit.py +148 -0
  10. package/hooks/blocking/pr_description_command_parser.py +233 -0
  11. package/hooks/blocking/pr_description_enforcer.py +36 -825
  12. package/hooks/blocking/pr_description_pr_number.py +153 -0
  13. package/hooks/blocking/pr_description_readability.py +366 -0
  14. package/hooks/blocking/test_code_rules_enforcer.py +65 -0
  15. package/hooks/blocking/test_code_rules_enforcer_docstring_args_signature.py +256 -0
  16. package/hooks/blocking/test_code_rules_enforcer_function_length.py +136 -5
  17. package/hooks/blocking/test_code_rules_enforcer_ignored_must_check_return.py +256 -0
  18. package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +137 -1
  19. package/hooks/blocking/test_md_to_html_blocker_exemptions.py +368 -0
  20. package/hooks/blocking/test_md_to_html_blocker_extensions.py +157 -0
  21. package/hooks/blocking/test_md_to_html_blocker_path_resolution.py +336 -0
  22. package/hooks/blocking/test_pr_description_enforcer.py +13 -1499
  23. package/hooks/blocking/test_pr_description_enforcer_body_audit.py +247 -0
  24. package/hooks/blocking/test_pr_description_enforcer_body_rules.py +493 -0
  25. package/hooks/blocking/test_pr_description_enforcer_command_parser.py +366 -0
  26. package/hooks/blocking/test_pr_description_enforcer_pr_number.py +159 -0
  27. package/hooks/blocking/test_pr_description_enforcer_readability.py +443 -0
  28. package/hooks/hooks_constants/blocking_check_limits.py +2 -0
  29. package/hooks/hooks_constants/code_rules_enforcer_constants.py +15 -1
  30. package/hooks/hooks_constants/md_to_html_blocker_constants.py +1 -1
  31. package/hooks/hooks_constants/pr_description_enforcer_constants.py +7 -0
  32. package/hooks/hooks_constants/test_md_to_html_blocker_constants.py +11 -4
  33. package/package.json +1 -1
  34. package/hooks/blocking/test_md_to_html_blocker.py +0 -772
@@ -21,10 +21,10 @@ The decomposition that worked best for PR #394 (a Python+PowerShell scheduled-ta
21
21
 
22
22
  | ID | Axis name | Concrete checks |
23
23
  |---|---|---|
24
- | A1 | Python function signatures vs internal call sites | Parameter count, names, defaults, kw-only barriers; every internal call binds correctly. |
25
- | A2 | Python return-type annotation vs every code path | Each function's return annotation is satisfied by every path: explicit `return X`, fall-through, exception-handler exit. |
24
+ | A1 | Python function signatures vs internal call sites | Parameter count, names, defaults, kw-only barriers; every internal call binds correctly. Is the symbol `async def`? Confirm the exact access path a caller uses: free function vs instance method reached through an object attribute vs import path. A keyword-only parameter with no default is required; omitting it raises `TypeError`. |
25
+ | A2 | Python return-type annotation vs every code path | Each function's return annotation is satisfied by every path: explicit `return X`, fall-through, exception-handler exit. The full failure contract is the return value AND every exception raised — trace the body and the docstring `Raises:` for each `raise`, including custom errors. A `-> bool` function that also raises is not fully described by "returns bool". |
26
26
  | A3 | argparse parser → Namespace contract | Every `add_argument(...)` produces the exact dest name accessed downstream; `type=` matches downstream usage; switches produce bools. |
27
- | A4 | Stdlib callback contracts | `os.walk(onerror=...)` callback shape; `os.path.getctime` / `os.rmdir` argument and exception contracts; `time.sleep` argument types. |
27
+ | A4 | Stdlib callback contracts | `os.walk(onerror=...)` callback shape; `os.path.getctime` / `os.rmdir` argument and exception contracts; `time.sleep` argument types. Catch-site precision: for any claim that code "catches X", confirm the exact catch site and scope — an `except` around only a rollback inside `finally` does not catch the same error raised in the `with` body. |
28
28
  | A5 | subprocess invocation contract | `subprocess.run` kwargs valid for the targeted Python; `args=[list]` shape; exception propagation under `check=True`. |
29
29
  | A6 | PowerShell cmdlet parameter sets and binding | `param(...)` with `ParameterSetName=`; `[CmdletBinding(DefaultParameterSetName=…)]` presence; cmdlet parameter combinations valid per Microsoft docs. |
30
30
  | A7 | Cross-language argv boundary | The `-Argument` string composition → Windows process loader → C-runtime argv parser → Python `sys.argv` → argparse. Trailing-backslash and embedded-space hazards. |
@@ -34,6 +34,20 @@ Adapt these axes for your artifact. For a pure Python codebase, drop A6 and A7 a
34
34
 
35
35
  ---
36
36
 
37
+ ### Documentation as contract: verifying a doc claim about code
38
+
39
+ When the audited artifact is documentation — a CLAUDE.md, a rule file, a README, a table mapping symbols to behavior — that asserts facts about the codebase, API-contract verification means checking every assertion against the current code, not just confirming the symbol exists and its return type matches. A doc that passes the happy-path contract can still be wrong on any of the seven checks below. Run all seven up front. Checks 1, 2, and 6 are the full-contract sharpening of sub-buckets A1, A2, and A4 applied to a doc claim; checks 3, 4, 5, and 7 are specific to documentation artifacts.
40
+
41
+ 1. **Full failure contract** — the failure signals of a function are its return value AND every exception it raises; trace the body and the docstring `Raises:` for every `raise`. _Example:_ a docs PR says a UI helper "returns `bool`", but it also raises a custom not-found error, and a database writer documented by its return type also raises `ValueError` / `RuntimeError` / a driver error, so "returns bool" understates the contract.
42
+ 2. **Call shape** — required versus optional parameters (a keyword-only parameter with NO default is required; omitting it raises `TypeError`), sync versus async, and the exact access path (free function versus instance method reached through an object attribute versus import path). _Example:_ a doc presents a helper as a free function, but it is an `async` instance method reached through an object attribute and one keyword-only parameter has no default, so the call example in the doc would raise `TypeError`.
43
+ 3. **Reuse-first** — before a doc endorses a hand-written snippet, search for a dedicated helper that already does it. _Example:_ a doc endorses hand-composing `normalize(name).lower()` inline while a dedicated `normalize_for_matching()` helper already does exactly that, contradicting the reuse-before-building rule the doc itself states.
44
+ 4. **Path resolution** — every file or directory path a doc cites resolves from the repository root. _Example:_ a doc cites a bare `snapshots/` directory as if it sat at the repo root, but the tree lives under `subsystem/snapshots/`.
45
+ 5. **Cross-entry consistency** — scan parallel rows, sections, and table entries for claims that contradict each other. _Example:_ two adjacent table rows map the same subsystem to two different exception base classes.
46
+ 6. **Catch-site precision** — when a doc claims code "catches X", confirm the exact site and scope of the catch. _Example:_ a doc says a context manager catches a driver error, but the `except` wraps only the rollback inside `finally`, so an error raised in the `with` body propagates uncaught.
47
+ 7. **Citation freshness** — re-derive every `file:line` claim against the current code; never trust a prior "verified" assertion or wording borrowed from a comment. _Example:_ an attribute name carried over from a review comment names a member the class does not define; the current code exposes it under a different name.
48
+
49
+ ---
50
+
37
51
  ## Sample prompt
38
52
 
39
53
  The literal text used in the May 2026 audit experiment is in [`../prompts/category-a-api-contracts.md`](../prompts/category-a-api-contracts.md). It produced 8–10 findings (P0=1–2, P1=2–6, P2=2–5) across two runs. Inline the full diff verbatim — do not ask the agent to fetch it.
@@ -1,4 +1,4 @@
1
- Audit [REPO/ARTIFACT] [TARGET_ID] for **Category A only** (API contract verification). Skip B–K. Sub-bucket forced-exhaustion mode: Category A is decomposed into 8 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
1
+ Audit [REPO/ARTIFACT] [TARGET_ID] for **Category A only** (API contract verification). Skip B–N. Sub-bucket forced-exhaustion mode: Category A is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
2
2
 
3
3
  [ARTIFACT METADATA: title / change description / head SHA or revision identifier / scope summary]
4
4
  ID prefix: `find`.
@@ -15,6 +15,7 @@ ID prefix: `find`.
15
15
  - Flag positional arguments passed to keyword-only parameters and vice versa.
16
16
  - Flag calls that omit a required parameter relying on a default that does not exist on the current branch.
17
17
  - Verify decorators (`@staticmethod`, `@classmethod`, `@property`) do not silently shift the parameter binding (e.g., `self` / `cls` insertion).
18
+ - Confirm sync-vs-async (is the symbol `async def`?), the exact access path a caller uses (free function vs instance method via an object attribute vs import path), and that a keyword-only parameter with no default is required — omitting it raises `TypeError`.
18
19
 
19
20
  **A2. Return-type annotation vs every code path**
20
21
  - For each annotated function, walk every code path: explicit `return X`, fall-through to implicit `None`, exception-handler exit, generator `yield` paths, async coroutine return value.
@@ -22,6 +23,7 @@ ID prefix: `find`.
22
23
  - For functions that raise instead of returning on some path, confirm the annotation does not promise a value the caller will dereference.
23
24
  - Inspect `try/except/finally` chains for paths that return from `finally` and override `try`/`except` returns.
24
25
  - For async functions, confirm the annotation refers to the awaited type, not the coroutine wrapper.
26
+ - The full failure contract is the return value AND every exception raised — list each `raise` in the body and the docstring `Raises:`; a `-> bool` that can also raise is not fully described as "returns bool".
25
27
 
26
28
  **A3. CLI/argument-parser declaration → downstream Namespace contract**
27
29
  - For every `add_argument(...)` (or equivalent CLI declaration), verify the auto-derived or explicit `dest=` matches the attribute name accessed downstream on the parsed namespace.
@@ -34,6 +36,7 @@ ID prefix: `find`.
34
36
  - Identify every callback handed to a library function (e.g., `os.walk(onerror=...)`, sort `key=`, `filter`, `map`, `re.sub(repl=callable)`, signal handlers, threading callbacks). Verify each callback's signature matches what the library calls it with — arity, positional-vs-keyword, return type the library consumes.
35
37
  - For every stdlib function the artifact calls, verify argument types and exception contracts: which exceptions can each call raise, and is each caller prepared (or deliberately not prepared) for them.
36
38
  - Verify kwargs to stdlib functions are spelled correctly for the targeted runtime version (deprecated/renamed kwargs, version-introduced kwargs).
39
+ - Catch-site precision — for any "catches X" claim confirm the exact catch site and scope (an `except` around only a rollback inside `finally` does not catch the same error from the `with` body).
37
40
  - Flag callbacks whose return value the library consumes but the implementation returns `None` (or vice versa).
38
41
  - Confirm callback exception behavior: which exceptions in the callback bubble out, which are swallowed by the library, which terminate iteration.
39
42
 
@@ -66,6 +69,18 @@ ID prefix: `find`.
66
69
  - For write calls, verify the signature against the provider's own published API contract — their REST reference docs, OpenAPI spec, SDK source code, or `--help` output. When a read endpoint exposes the same state, call it to confirm the write contract.
67
70
  - Flag every call where documented parameters, types, or behavior diverge from the official API contract.
68
71
 
72
+ **A9. Documentation claims about the codebase (when the artifact asserts facts about the code)**
73
+
74
+ When the artifact is documentation that asserts facts about the codebase (symbol names, signatures, return types, exceptions, file paths), run all seven documentation-as-contract checks below; each yields a confirmation or a finding. For a pure-code artifact, A9 is one line of proof-of-absence (the artifact asserts no code facts).
75
+
76
+ - Full failure contract — the failure signals of a function are its return value AND every exception it raises; trace the body and the docstring `Raises:` for every `raise`. _Example:_ a docs PR says a UI helper "returns `bool`", but it also raises a custom not-found error, so "returns bool" understates the contract.
77
+ - Call shape — required versus optional parameters (a keyword-only parameter with NO default is required; omitting it raises `TypeError`), sync versus async, and the exact access path (free function versus instance method reached through an object attribute versus import path). _Example:_ a doc presents a helper as a free function, but it is an `async` instance method reached through an object attribute, so the doc's call example would raise `TypeError`.
78
+ - Reuse-first — before a doc endorses a hand-written snippet, search for a dedicated helper that already does it. _Example:_ a doc endorses hand-composing `normalize(name).lower()` inline while a dedicated `normalize_for_matching()` helper already does exactly that.
79
+ - Path resolution — every file or directory path a doc cites resolves from the repository root. _Example:_ a doc cites a bare `snapshots/` directory as if it sat at the repo root, but the tree lives under `subsystem/snapshots/`.
80
+ - Cross-entry consistency — scan parallel rows, sections, and table entries for claims that contradict each other. _Example:_ two adjacent table rows map the same subsystem to two different exception base classes.
81
+ - Catch-site precision — when a doc claims code "catches X", confirm the exact site and scope of the catch. _Example:_ a doc says a context manager catches a driver error, but the `except` wraps only the rollback inside `finally`, so an error raised in the `with` body propagates uncaught.
82
+ - Citation freshness — re-derive every `file:line` claim against the current code; never trust a prior "verified" assertion or wording borrowed from a comment. _Example:_ an attribute name carried over from a review comment names a member the class does not define; the current code exposes it under a different name.
83
+
69
84
  ## Cross-bucket questions to answer at the end
70
85
 
71
86
  Q1: Are there any contracts that span two sub-buckets that single-bucket analysis would miss?
@@ -74,7 +89,7 @@ Q3: Where would a future refactor most likely break a cross-bucket or cross-lang
74
89
 
75
90
  ## Output
76
91
 
77
- Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket A1–A8, produce Shape A or Shape B (with ≥3 adversarial probes). Cross-bucket Q1–Q3 answers after the per-sub-bucket walk. Adversarial second pass: "assume your first pass missed at least 3 P1 bugs across these 8 sub-buckets — find them." Open Questions section for ambiguities. Read-only. No edits, no commits.
92
+ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket A1–A9, produce Shape A or Shape B (with ≥3 adversarial probes). Cross-bucket Q1–Q3 answers after the per-sub-bucket walk. Adversarial second pass: "assume your first pass missed at least 3 P1 bugs across these 9 sub-buckets — find them." Open Questions section for ambiguities. Read-only. No edits, no commits.
78
93
 
79
94
  ---
80
95
 
@@ -62,6 +62,8 @@ These rules are automatically enforced by `code_rules_enforcer.py`. Violations b
62
62
  | Test-mode branching in production | Reading `TESTING`, `PYTEST_CURRENT_TEST`, `IS_TEST`, etc. from production code creates two parallel implementations. Use dependency injection so production stays single-path. **Test files and hook infrastructure exempt.** |
63
63
  | Thin wrapper files | A non-`__init__.py` module whose body is only imports (optionally with an `__all__` assignment) is a re-export indirection with no payload. Callers should import from the real module. `__init__.py` is the canonical re-export surface and is exempt. |
64
64
  | Docstring format (Google-style) | Public functions/methods (no leading underscore, not dunder, body > 3 lines, not `@property`/`@abstractmethod`) require Google-style `Args:` / `Returns:` (or `Yields:`) / `Raises:` sections matching the signature. **Test files exempt.** |
65
+ | Docstring Args match signature | A public function whose docstring `Args:` section names a parameter the signature does not declare is flagged — a rename that left the adjacent `Args:` line stale. Only the `Args:` section is compared against the signature; `Raises:` is left alone because callee-propagated exceptions cause false positives. **Test files and hook infrastructure exempt.** |
66
+ | Ignored must-check return | A bare-statement call to a function whose return value is its only failure signal (the curated `find_and_click`, `write_outcome` set) is flagged — the discarded boolean lets the caller move on silently after a failure. Assign the return and check it. Assigned (`clicked = …`) and branched-on (`if …:`) calls are exempt. Attribute calls are matched by their terminal method name alone (the receiver type is not resolved), so an unrelated `obj.write_outcome()` or `widget.find_and_click()` whose method name collides with a curated name is also flagged. **Test files exempt.** |
65
67
 
66
68
  ### Where UPPER_SNAKE is allowed
67
69
 
@@ -124,7 +126,7 @@ Full words only. No mental translation.
124
126
 
125
127
  **Extended naming rules** :
126
128
  - Loop vars: `each_order`, `each_user` (prefix `each_`)
127
- - Booleans: `is_valid`, `has_permission`, `should_retry` (prefix `is_`/`has_`/`should_`/`can_`)
129
+ - Booleans: `is_valid`, `has_permission`, `should_retry`, `was_clicked`, `did_succeed` (prefix `is_`/`has_`/`should_`/`can_`/`was_`/`did_`). The hook covers both boolean assignments and boolean-typed function parameters (a parameter annotated `bool` or defaulting to a boolean literal); `self`/`cls` and single-character names are exempt.
128
130
  - Collections: `all_orders`, `all_users` (prefix `all_`)
129
131
  - Maps: `price_by_product`, `user_by_id` (pattern `X_by_Y`)
130
132
  - Preposition params: `from_path=`, `to=`, `into=`
@@ -400,6 +402,9 @@ Hook will enforce:
400
402
  [⚡] No test-mode branching in production (TESTING / PYTEST_CURRENT_TEST)
401
403
  [⚡] No thin wrapper modules (imports only, optionally with __all__, outside __init__.py)
402
404
  [⚡] Public functions have Google-style Args:/Returns:/Raises: when warranted
405
+ [⚡] Docstring Args: names match the signature (a stale renamed param is flagged)
406
+ [⚡] Boolean names prefixed is_/has_/should_/can_/was_/did_ (assignments AND bool-typed parameters)
407
+ [⚡] No discarded must-check return (assign and check find_and_click/write_outcome outcomes)
403
408
 
404
409
  Manual check:
405
410
  [ ] No abbreviations?
@@ -1,4 +1,4 @@
1
- """Shared gh body-arg parsing utilities for blocking hooks."""
1
+ """Shared shell-token and gh body-arg parsing utilities for blocking hooks."""
2
2
 
3
3
  from __future__ import annotations
4
4
 
@@ -55,6 +55,63 @@ _all_equals_prefixes_for_skip: tuple[str, ...] = tuple(
55
55
  bash_continuation_marker: str = "\\"
56
56
  powershell_continuation_marker: str = "`"
57
57
 
58
+ shell_variable_sigil: str = "$"
59
+ all_quote_characters: frozenset[str] = frozenset({'"', "'"})
60
+ minimum_meaningful_token_length: int = 2
61
+
62
+ non_body_value_flags: frozenset[str] = all_value_flags - {body_file_flag, body_file_short_flag}
63
+
64
+ _non_body_value_flag_equals_prefixes: tuple[str, ...] = tuple(
65
+ sorted((f"{each_flag}=" for each_flag in non_body_value_flags), key=len, reverse=True)
66
+ )
67
+
68
+
69
+ def is_flag_shaped_token(token: str) -> bool:
70
+ """Report whether a token is flag-shaped for body/PR-number extraction.
71
+
72
+ Treats any token whose second character is "-" as flag-shaped, so bare
73
+ "--" and "--<digit>" tokens both count as flags. `_is_flag_shaped` applies
74
+ a stricter rule for token-stream scanning.
75
+ """
76
+ if len(token) < minimum_meaningful_token_length:
77
+ return False
78
+ if not token.startswith("-"):
79
+ return False
80
+ return token[1] == "-" or token[1].isalpha()
81
+
82
+
83
+ def strip_surrounding_quotes(token: str) -> str:
84
+ if len(token) < minimum_meaningful_token_length:
85
+ return token
86
+ first_character = token[0]
87
+ last_character = token[-1]
88
+ if first_character in all_quote_characters and first_character == last_character:
89
+ return token[1:-1]
90
+ return token
91
+
92
+
93
+ def is_unresolvable_shell_value(token: str) -> bool:
94
+ return token.startswith(shell_variable_sigil)
95
+
96
+
97
+ def _match_prefix(token: str, all_prefixes: tuple[str, ...]) -> str | None:
98
+ for each_prefix in all_prefixes:
99
+ if token.startswith(each_prefix):
100
+ return each_prefix
101
+ return None
102
+
103
+
104
+ def match_body_flag_equals_prefix(token: str) -> str | None:
105
+ return _match_prefix(token, all_body_flag_prefixes)
106
+
107
+
108
+ def match_body_file_equals_prefix(token: str) -> str | None:
109
+ return _match_prefix(token, (body_file_flag_prefix, body_file_short_flag_prefix))
110
+
111
+
112
+ def match_non_body_value_flag_equals_prefix(token: str) -> str | None:
113
+ return _match_prefix(token, _non_body_value_flag_equals_prefixes)
114
+
58
115
 
59
116
  def _count_trailing_run(text: str, marker_character: str) -> int:
60
117
  trailing_run_length = 0
@@ -91,7 +148,13 @@ def get_logical_first_line(command: str) -> str:
91
148
 
92
149
 
93
150
  def _is_flag_shaped(token: str) -> bool:
94
- if len(token) < 2:
151
+ """Report whether a token is flag-shaped for token-stream scanning.
152
+
153
+ Requires an alphabetic character after "--", so bare "--" and "--<digit>"
154
+ tokens are not flag-shaped. `is_flag_shaped_token` applies a looser rule
155
+ for body/PR-number extraction.
156
+ """
157
+ if len(token) < minimum_meaningful_token_length:
95
158
  return False
96
159
  if not token.startswith("-"):
97
160
  return False
@@ -102,7 +165,7 @@ def _is_flag_shaped(token: str) -> bool:
102
165
 
103
166
 
104
167
  def _quoted_value_starts_split(value_token: str) -> bool:
105
- if len(value_token) < 2:
168
+ if len(value_token) < minimum_meaningful_token_length:
106
169
  return False
107
170
  first_character = value_token[0]
108
171
  if first_character not in {'"', "'"}:
@@ -129,13 +192,6 @@ def count_extra_tokens_to_skip_for_split_quoted_value(
129
192
  return None
130
193
 
131
194
 
132
- def _match_equals_prefix_for_skip(token: str) -> str | None:
133
- for each_prefix in _all_equals_prefixes_for_skip:
134
- if token.startswith(each_prefix):
135
- return each_prefix
136
- return None
137
-
138
-
139
195
  def iter_significant_tokens(
140
196
  command: str,
141
197
  pre_tokenized: tuple[str, list[str]] | None = None,
@@ -175,7 +231,7 @@ def iter_significant_tokens(
175
231
  while token_index < len(all_tokens):
176
232
  current_token = all_tokens[token_index]
177
233
  remaining_tokens = all_tokens[token_index + 1:]
178
- matched_equals_prefix = _match_equals_prefix_for_skip(current_token)
234
+ matched_equals_prefix = _match_prefix(current_token, _all_equals_prefixes_for_skip)
179
235
  if matched_equals_prefix is not None:
180
236
  value_token = current_token[len(matched_equals_prefix):]
181
237
  split_value_extra_tokens = count_extra_tokens_to_skip_for_split_quoted_value(
@@ -0,0 +1,65 @@
1
+ """Shared subprocess-invocation helpers for the md_to_html_blocker test suites.
2
+
3
+ Subprocess CWD is rooted in a per-session sandbox created lazily so that
4
+ relative-path test cases canonicalize outside any `.claude-plugin/` ancestor,
5
+ outside the OS temp directory, and outside the exempt home-relative
6
+ subdirectories. The sandbox is a real repo root (it carries a `.git` marker) so
7
+ relative `README.md` / `CHANGELOG.md` writes exercise the repo-root exemption
8
+ path. This keeps the suites independent of where pytest itself is run.
9
+ """
10
+
11
+ import functools
12
+ import json
13
+ import os
14
+ import shutil
15
+ import stat
16
+ import subprocess
17
+ import sys
18
+ import tempfile
19
+ from pathlib import Path
20
+
21
+ HOOK_SCRIPT_PATH = os.path.join(os.path.dirname(__file__), "md_to_html_blocker.py")
22
+
23
+
24
+ def _strip_read_only_and_retry(removal_function, target_path, *_exc_info):
25
+ try:
26
+ os.chmod(target_path, stat.S_IWRITE)
27
+ removal_function(target_path)
28
+ except OSError:
29
+ pass
30
+
31
+
32
+ def _force_rmtree(target_path: str) -> None:
33
+ handler_kw = (
34
+ {"onexc": _strip_read_only_and_retry}
35
+ if sys.version_info >= (3, 12)
36
+ else {"onerror": _strip_read_only_and_retry}
37
+ )
38
+ try:
39
+ shutil.rmtree(target_path, **handler_kw)
40
+ except OSError:
41
+ pass
42
+
43
+
44
+ @functools.lru_cache(maxsize=1)
45
+ def _get_sandbox_parent_directory() -> str:
46
+ sandbox_parent = tempfile.mkdtemp(prefix="pytest_md_blocker_", dir=str(Path.home()))
47
+ git_marker_path = os.path.join(sandbox_parent, ".git")
48
+ Path(git_marker_path).touch()
49
+ return sandbox_parent
50
+
51
+
52
+ class _RunHook:
53
+ def __call__(self, tool_name: str, tool_input: dict) -> subprocess.CompletedProcess:
54
+ payload = json.dumps({"tool_name": tool_name, "tool_input": tool_input})
55
+ return subprocess.run(
56
+ [sys.executable, HOOK_SCRIPT_PATH],
57
+ input=payload,
58
+ capture_output=True,
59
+ text=True,
60
+ check=False,
61
+ cwd=_get_sandbox_parent_directory(),
62
+ )
63
+
64
+
65
+ _run_hook = _RunHook()