claude-dev-env 1.49.0 → 1.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/audit-rubrics/category_rubrics/category-a-api-contracts.md +86 -0
  2. package/audit-rubrics/category_rubrics/category-b-selector-engine-compat.md +36 -0
  3. package/audit-rubrics/category_rubrics/category-c-resource-cleanup.md +35 -0
  4. package/audit-rubrics/category_rubrics/category-d-scoping-and-ordering.md +35 -0
  5. package/audit-rubrics/category_rubrics/category-e-dead-code.md +38 -0
  6. package/audit-rubrics/category_rubrics/category-f-silent-failures.md +38 -0
  7. package/audit-rubrics/category_rubrics/category-g-bounds-and-overflow.md +38 -0
  8. package/audit-rubrics/category_rubrics/category-h-security-boundaries.md +40 -0
  9. package/audit-rubrics/category_rubrics/category-i-concurrency.md +38 -0
  10. package/audit-rubrics/category_rubrics/category-j-code-rules-compliance.md +46 -0
  11. package/audit-rubrics/category_rubrics/category-k-codebase-conflicts.md +59 -0
  12. package/audit-rubrics/category_rubrics/category-l-behavior-equivalence.md +45 -0
  13. package/audit-rubrics/category_rubrics/category-m-producer-consumer-cardinality.md +44 -0
  14. package/audit-rubrics/category_rubrics/category-n-test-name-scenario-verifier.md +45 -0
  15. package/audit-rubrics/prompts/category-a-api-contracts.md +399 -0
  16. package/audit-rubrics/prompts/category-b-selector-engine-compat.md +401 -0
  17. package/audit-rubrics/prompts/category-c-resource-cleanup.md +420 -0
  18. package/audit-rubrics/prompts/category-d-scoping-and-ordering.md +414 -0
  19. package/audit-rubrics/prompts/category-e-dead-code.md +420 -0
  20. package/audit-rubrics/prompts/category-f-silent-failures.md +420 -0
  21. package/audit-rubrics/prompts/category-g-bounds-and-overflow.md +383 -0
  22. package/audit-rubrics/prompts/category-h-security-boundaries.md +423 -0
  23. package/audit-rubrics/prompts/category-i-concurrency.md +429 -0
  24. package/audit-rubrics/prompts/category-j-code-rules-compliance.md +463 -0
  25. package/audit-rubrics/prompts/category-k-codebase-conflicts.md +328 -0
  26. package/audit-rubrics/prompts/category-l-behavior-equivalence.md +128 -0
  27. package/audit-rubrics/prompts/category-m-producer-consumer-cardinality.md +129 -0
  28. package/audit-rubrics/prompts/category-n-test-name-scenario-verifier.md +132 -0
  29. package/audit-rubrics/source-material-section-types.md +51 -0
  30. package/docs/CODE_RULES.md +6 -1
  31. package/hooks/blocking/code_rules_enforcer.py +323 -11
  32. package/hooks/blocking/md_to_html_blocker.py +2 -2
  33. package/hooks/blocking/test_code_rules_enforcer.py +65 -0
  34. package/hooks/blocking/test_code_rules_enforcer_docstring_args_signature.py +256 -0
  35. package/hooks/blocking/test_code_rules_enforcer_ignored_must_check_return.py +256 -0
  36. package/hooks/blocking/test_code_rules_enforcer_naming_pattern.py +137 -1
  37. package/hooks/blocking/test_md_to_html_blocker.py +38 -0
  38. package/hooks/hooks_constants/blocking_check_limits.py +2 -0
  39. package/hooks/hooks_constants/code_rules_enforcer_constants.py +15 -1
  40. package/hooks/hooks_constants/md_to_html_blocker_constants.py +1 -1
  41. package/hooks/hooks_constants/test_md_to_html_blocker_constants.py +11 -4
  42. package/package.json +2 -1
  43. package/skills/bugteam/reference/teardown-publish-permissions.md +7 -2
@@ -0,0 +1,132 @@
1
+ Audit [REPO/ARTIFACT] [TARGET_ID] for **Category N only** (test-name scenario verifier). Skip A–M. Sub-bucket forced-exhaustion mode: Category N is decomposed into 9 sub-buckets below. Each sub-bucket REQUIRES at least one Shape A finding OR exactly one Shape B proof-of-absence with **at least 3 adversarial probes** specific to that sub-bucket. A sub-bucket returning neither is a protocol gap.
2
+
3
+ [ARTIFACT METADATA — include every changed test alongside the production code path it claims to cover]
4
+
5
+ - Title / one-line summary: [TITLE]
6
+ - Head ref / SHA at audit time: [HEAD_SHA]
7
+ - Changed test functions (file + line range + test name + first-line assertion): [CHANGED_TESTS]
8
+ - Production functions the tests claim to cover (file + line range + symbol name + branch structure): [PRODUCTION_TARGETS]
9
+ - Scenario fixtures / monkeypatches in scope (`monkeypatch.setattr`, `pytest.mark.skipif`, `freezegun.freeze_time`, `mock.patch`): [SCENARIO_GATES]
10
+ - Stated intent of each scenario-named test (what condition the test name claims to exercise): [INTENT]
11
+
12
+ ID prefix: `find`.
13
+
14
+ [ONE-PARAGRAPH FRAME: enumerate every test whose name includes a scenario claim (`_when_*`, `_at_*`, `_under_*`, `_with_*`, `_on_*`, `_after_*`, `_during_*`). State the audit goal: for each scenario-named test, verify the body sets up the named condition via fixture / monkeypatch / environment gate so the production code's scenario-named branch actually runs during the act phase.]
15
+
16
+ ## Source material ([N] files/sections, all lines in scope)
17
+
18
+ [INLINE every changed test function alongside the production function it claims to cover. Include the production function's branch structure so the audit can identify the no-op / early-return / default branches that scenario-named tests must NOT silently pass against.]
19
+
20
+ ## Sub-buckets (each requires Shape A finding OR Shape B with ≥3 adversarial probes)
21
+
22
+ **N1. Scenario-named tests demonstrate the scenario** ⭐ canonical N case
23
+ - For every test whose name contains `_when_X` / `_at_X` / `_under_X` / `_with_X` / `_on_X` / `_after_X` / `_during_X`, verify the body sets up condition X via fixture, monkeypatch, or environment gate before calling the system under test.
24
+ - Adversarial probes: (a) construct an input that satisfies the test's assertion but does NOT trigger the scenario-named code path — does the test still pass; (b) trace the production function's code path under the test's input — which branch executes during the act phase; (c) inspect the test's setup-phase for monkeypatch / fixture calls that gate the scenario.
25
+
26
+ **N2. Path-decision parametric matrices**
27
+ - For tests of `is_*_path` / `_resolve_*_path` / `*_path_exemptions` modules, verify the test corpus ships a parametric matrix covering: empty string, single filename, tilde-prefix, UNC path, drive-letter path, symlinked path, `..`-containing path, trailing-slash path.
28
+ - Adversarial probes: (a) walk the production function's path-classification branches — which branch does each input class hit; (b) check the test corpus for input shapes that hit only the default / no-classification branch; (c) for each input class missing from the matrix, construct a probe input and trace which branch executes.
29
+
30
+ **N3. Tests that pass "for the wrong reason"**
31
+ - For every assertion of the shape `assert <substring> in result`, verify the substring shape is unique to the scenario-named branch's output.
32
+ - Adversarial probe: walk the production function's branches; for each branch, build the output and test the substring against it. If the substring matches more than one branch's output, the assertion cannot discriminate which branch ran.
33
+
34
+ **N4. No-op branch exercised by scenario name**
35
+ - For every scenario-named test, identify the production function's no-op / early-return / no-feature-installed branch. Verify the test's constructed input does NOT hit that branch.
36
+ - Adversarial probes: (a) any test whose input fails the production function's first guard returns the no-op default and the assertion checks the default; (b) any test whose input is empty / None / missing returns early; (c) any test whose fixture is not installed at the test runtime hits the "feature missing" branch.
37
+
38
+ **N5. Assertion shape mismatch**
39
+ - For every assertion, verify the assertion's shape can fail by construction. `assert <substring> not in result` where the substring is misspelled relative to the production output, or `assert result == ""` when the production function returns `None` on the negative case, or `len(result) > 0` when the production function returns an empty list on the no-feature path.
40
+ - Adversarial probes: (a) inspect each assertion's shape against the production function's actual return-value space; (b) check for assertions where the substring shape never appears in the production output by construction; (c) check for `assert x is True` where the production function returns truthy non-bool values.
41
+
42
+ **N6. Cross-platform scenario gating**
43
+ - For every test named `_on_windows` / `_on_linux` / `_on_macos`, verify the body gates on `sys.platform`, `monkeypatch.setattr(os, "name", ...)`, or `@pytest.mark.skipif`.
44
+ - Bare scenario names that run unchanged across platforms claim more than they prove.
45
+ - Adversarial probes: (a) does the production function's platform-specific branch get skipped on the CI runner's actual platform; (b) does the test pass against the platform fallback rather than the platform-specific code; (c) is the platform fixture installed and respected by the test runner.
46
+
47
+ **N7. Time / clock scenario gating**
48
+ - For every test named `_after_<duration>` / `_at_midnight` / `_during_business_hours`, verify the body injects a frozen clock (`freezegun.freeze_time`, `monkeypatch.setattr(time, "time", ...)`, `unittest.mock.patch("datetime.now")`).
49
+ - Wall-clock tests are non-deterministic and may pass against the wrong scenario.
50
+ - Adversarial probes: (a) does the test's act phase depend on the system clock being at a specific value; (b) does any timezone shift cause the test to flake; (c) does the production function read the clock during the act phase.
51
+
52
+ **N8. Concurrent / load scenario gating**
53
+ - For every test named `_under_load` / `_with_concurrent_writers` / `_under_contention`, verify the body spawns the concurrent workers and `wait()`s on them.
54
+ - Single-threaded tests cannot claim concurrent-scenario coverage.
55
+ - Adversarial probes: (a) does the test spawn `threading.Thread` / `multiprocessing.Process` / `asyncio.gather` / `concurrent.futures.ThreadPoolExecutor`; (b) does the test's act phase exercise the concurrency primitive the production function relies on; (c) does the test introduce a race window the production function's lock should serialize.
56
+
57
+ **N9. Neutral-named tests (out of scope)**
58
+ - Tests named `test_returns_empty_list_for_unknown_key` / `test_handles_y` / `test_raises_value_error` (no scenario claim in the name) are NOT subject to N1–N8.
59
+ - For neutral-named tests, only N5 (assertion shape mismatch) applies.
60
+
61
+ ## Cross-bucket questions to answer at the end
62
+
63
+ Q1: Across all 9 sub-buckets, is there a scenario-named test that does not exercise the named scenario? Cite the test's file:line and the production function's scenario-named branch that should have been exercised.
64
+
65
+ Q2: What's the worst false-coverage signal introduced by the diff? Evaluate by (a) whether the test's name is load-bearing in the suite's coverage report, (b) whether the named scenario has any other coverage; (c) whether removing the test would change the coverage percentage.
66
+
67
+ Q3: Which scenario-named test most likely will start passing for the wrong reason in a future refactor? Identify tests whose assertions match substrings that could appear in multiple branches — these are time bombs.
68
+
69
+ ## Output
70
+
71
+ Lead: `Total: N (P0=N, P1=N, P2=N)`. For each sub-bucket N1-N9, produce Shape A or Shape B (with ≥3 probes). Each Shape A finding must cite the test's file:line AND the production function's branch the test's name claims to cover. Cross-bucket Q1-Q3 answers after the per-sub-bucket walk. Adversarial second pass: "assume your first pass missed at least 3 scenario-named tests that exercise the no-op branch — find them." Open Questions section for ambiguities. Read-only. No edits, no commits.
72
+
73
+ ---
74
+
75
+ # Worked example: jl-cmd/claude-code-config PR #476
76
+
77
+ Audit jl-cmd/claude-code-config PR #476 for **Category N only** (test-name scenario verifier). Skip A–M. Sub-bucket forced-exhaustion mode: Category N is decomposed into 9 sub-buckets below.
78
+
79
+ PR: refactor(hooks): cross-platform path resolution for windows-rmtree-blocker
80
+ Head SHA: (the commit that landed the platform-conditional logic)
81
+ ID prefix: `find`.
82
+
83
+ The PR adds platform-conditional path-resolution logic to `windows_rmtree_blocker.py` and ships 5 new tests named `test_*_on_windows` and `test_*_on_linux` across `test_windows_rmtree_blocker.py`. The audit goal: verify each scenario-named test sets up the named platform via monkeypatch or skipif gate so the production function's platform-specific branch actually runs during the act phase.
84
+
85
+ ## Sub-buckets (each requires Shape A finding OR Shape B with ≥3 adversarial probes)
86
+
87
+ **N1. Scenario-named tests demonstrate the scenario** ⭐ canonical N case — Shape A findings F5, F21, F23, F26, F27
88
+ - `test_resolves_path_on_windows` calls `windows_rmtree_blocker.resolve_path("C:/Users/test")` and asserts the result equals `Path("C:/Users/test")`. The body does NOT call `monkeypatch.setattr(sys, "platform", "win32")` or `@pytest.mark.skipif(sys.platform != "win32")`. On a Linux CI runner, `sys.platform == "linux"` is in effect when the test runs; the production function's `if sys.platform == "win32":` branch is skipped, and the assertion succeeds against the Linux fallback branch's output (which happens to match `Path("C:/Users/test")` because `pathlib.PurePath` accepts Windows-style strings on Linux without normalization).
89
+ - The test's NAME claims Windows-branch coverage; the test's BODY exercises the Linux fallback. This is the canonical N1 finding shape.
90
+ - Adversarial probe (a): construct an input that the Windows branch would handle differently from the Linux branch — does the test catch the divergence? In F5's case, no: the assertion uses a string that both branches happen to produce, so the test cannot discriminate.
91
+ - Adversarial probe (b): the production function's `sys.platform == "win32"` branch performs UNC-prefix stripping; the Linux fallback does not. Inputs containing `\\?\` would yield different outputs on the two branches. The test does not use such inputs.
92
+ - Adversarial probe (c): the test runtime's `sys.platform` is `"linux"` on the CI runner. The act phase hits the fallback, full stop.
93
+ - **Severity P1** for each of F5, F21, F23, F26, F27: scenario-named tests claim platform-specific coverage they do not provide.
94
+ - **Fix**: wrap each `_on_windows`-named test in `@pytest.mark.skipif(sys.platform != "win32", reason="windows-specific path resolution")` AND duplicate as `_on_linux` for the Linux fallback branch; OR use `monkeypatch.setattr(sys, "platform", "win32")` to force the named platform during the act phase.
95
+
96
+ **N2. Path-decision parametric matrices**
97
+ - The production function `resolve_path` is a path-classifier — it qualifies for N2 coverage. The PR ships 5 inputs: drive-letter, UNC-prefix, tilde-prefix, `..`-containing, and trailing-slash. Missing: empty string, single filename, symlinked path. These three input classes have no test in the diff.
98
+ - Adversarial probes: (a) construct an empty-string input — does any branch handle it; (b) construct a single-filename input (no directory component) — does the function return as-is or attempt to resolve against cwd; (c) construct a symlinked path — does the function resolve through the symlink or preserve it.
99
+
100
+ **N3. Tests that pass "for the wrong reason"**
101
+ - See N1 findings F5, F21, F23, F26, F27 — each passes because the assertion's substring matches both the Windows-branch output and the Linux-fallback output. The assertion shape cannot discriminate which branch ran.
102
+
103
+ **N4. No-op branch exercised by scenario name**
104
+ - F5 finding above: the scenario-named test exercises the Linux-fallback no-op branch on the CI runner.
105
+
106
+ **N5. Assertion shape mismatch**
107
+ - All five tests use `assert result == Path(<expected>)`. The shape can fail by construction (Path equality is strict). N5 verified clean.
108
+
109
+ **N6. Cross-platform scenario gating** ⭐
110
+ - Five `_on_windows`-named tests have zero platform gating. Five `_on_linux`-named tests have zero platform gating. N6 is the structural lens on the N1 findings — every test's NAME claims platform coverage, every test's BODY ignores the platform gate.
111
+ - See N1 F5 / F21 / F23 / F26 / F27.
112
+
113
+ **N7. Time / clock scenario gating**
114
+ - No time-named tests in scope. N7 verified clean.
115
+
116
+ **N8. Concurrent / load scenario gating**
117
+ - No concurrency-named tests in scope. N8 verified clean.
118
+
119
+ **N9. Neutral-named tests (out of scope)**
120
+ - One test in the diff is neutrally named (`test_returns_path_unchanged_when_already_absolute`). N9 marks it out of scope for N1-N4 / N6-N8; only N5 applies. The assertion is `assert result == input_path` — shape clean. Verified clean.
121
+
122
+ ## Cross-bucket questions to answer at the end
123
+
124
+ Q1: Five scenario-named tests (F5, F21, F23, F26, F27) do not gate on `sys.platform` and pass against the Linux-fallback branch on the CI runner. The Windows-specific code path has zero actual coverage despite the test names claiming it. Cite `test_windows_rmtree_blocker.py:42` (F5 first test) and `windows_rmtree_blocker.py:67` (the `if sys.platform == "win32":` branch) as the misclaim pair.
125
+
126
+ Q2: Worst false-coverage signal: F5 — the test's name `test_resolves_path_on_windows` reads as Windows-branch coverage in the PR review, but the act phase exercises the Linux fallback. A reviewer reading the test name during PR review would assume Windows coverage exists; it does not.
127
+
128
+ Q3: Once the Windows branch and the Linux branch diverge in their output for the same input — for example, a future PR that adds normalization to the Windows branch only — these five tests will start failing on Windows CI, exposing the false coverage retroactively.
129
+
130
+ ## Output
131
+
132
+ Lead: `Total: 5 (P0=0, P1=5, P2=0)`. F5, F21, F23, F26, F27 are the N1+N6 scenario-gate-missing findings. N2 has one finding (parametric matrix incomplete) at P2. N3 / N4 are subsumed by N1. N5 / N7 / N8 / N9 verified clean. Adversarial second pass: scan for any non-`_on_<platform>`-named test that exercises the platform-conditional branch — verified none in this diff. Open Questions: whether the PR author intended any of the `_on_<platform>` tests to be platform-gated; resolve via reply on the audit thread. Read-only. No edits, no commits.
@@ -0,0 +1,51 @@
1
+ # What "section" means in the source-material block
2
+
3
+ Audit prompt templates ask you to inline the artifact under audit, broken into "sections." A section is **the natural chunk you'd quote and reference back to when reporting a finding.** The right chunk size depends on what you're auditing.
4
+
5
+ ## Lookup table
6
+
7
+ | If you're auditing… | A "section" is… | What you put in the code fence |
8
+ |---|---|---|
9
+ | A code PR | One file in the diff | Filename as header, full file content |
10
+ | A long Python module by itself | One function or class | Function name as header, just that function's body |
11
+ | A design doc / RFC | One named heading (e.g. "## Authentication") | The heading + all paragraphs under it |
12
+ | An essay or article | One section break or chapter | Section title + the paragraphs |
13
+ | A contract or terms-of-service | One clause | Clause number + clause text |
14
+ | A meeting transcript | One topic or speaker block | Topic name + the dialogue |
15
+ | An email thread | One message | Sender + timestamp + message body |
16
+ | A spreadsheet | One sheet or one logical table | Sheet name + the rows |
17
+ | A SQL schema | One table definition | Table name + the CREATE TABLE statement |
18
+ | A config file | One stanza | Stanza name + the keys/values |
19
+ | A test suite | One test file | Filename + all the test functions |
20
+
21
+ ## Picking the right size
22
+
23
+ The rule: **pick the chunk size that lets the agent cite a finding with `[section name]:[line/paragraph N]` and have the user know exactly where to look.**
24
+
25
+ - **Too small** (one sentence per section): the agent runs out of context per chunk and findings can't reference cross-chunk patterns.
26
+ - **Too big** (the whole document as one section): the agent can't anchor findings to a specific spot, and the `failure_mode` text becomes vague.
27
+ - **Sweet spot in the May 2026 audit experiment on PR #394**: 4 files, 11–102 lines each. Each finding cited `<filename>:<line>` and was easy to verify. Results were better than the same audit run with the diff fetched on demand instead of inlined.
28
+
29
+ ## Header format inside the source-material block
30
+
31
+ Use one `###` header per section so the agent can reference each one by name:
32
+
33
+ ````
34
+ ## Source material (4 files, all lines in scope)
35
+
36
+ ### packages/foo/bar.py
37
+ ```python
38
+ [content]
39
+ ```
40
+
41
+ ### packages/foo/baz.py
42
+ ```python
43
+ [content]
44
+ ```
45
+ ````
46
+
47
+ The header text becomes the anchor the agent quotes back when reporting findings — keep it stable, unambiguous, and copy-pasteable into a citation.
48
+
49
+ ## When the artifact has no natural section breaks
50
+
51
+ If you're auditing something monolithic (a single long function, a contract with no clauses, a stream of dialogue), impose your own breaks at logical hinge points and label them: `### lines 1–40 (parameter parsing)`, `### lines 41–120 (main loop)`, `### lines 121–200 (cleanup)`. Don't hand the agent a wall of text — without anchors, findings degrade to "somewhere in this file."
@@ -62,6 +62,8 @@ These rules are automatically enforced by `code_rules_enforcer.py`. Violations b
62
62
  | Test-mode branching in production | Reading `TESTING`, `PYTEST_CURRENT_TEST`, `IS_TEST`, etc. from production code creates two parallel implementations. Use dependency injection so production stays single-path. **Test files and hook infrastructure exempt.** |
63
63
  | Thin wrapper files | A non-`__init__.py` module whose body is only imports (optionally with an `__all__` assignment) is a re-export indirection with no payload. Callers should import from the real module. `__init__.py` is the canonical re-export surface and is exempt. |
64
64
  | Docstring format (Google-style) | Public functions/methods (no leading underscore, not dunder, body > 3 lines, not `@property`/`@abstractmethod`) require Google-style `Args:` / `Returns:` (or `Yields:`) / `Raises:` sections matching the signature. **Test files exempt.** |
65
+ | Docstring Args match signature | A public function whose docstring `Args:` section names a parameter the signature does not declare is flagged — a rename that left the adjacent `Args:` line stale. Only the `Args:` section is compared against the signature; `Raises:` is left alone because callee-propagated exceptions cause false positives. **Test files and hook infrastructure exempt.** |
66
+ | Ignored must-check return | A bare-statement call to a function whose return value is its only failure signal (the curated `find_and_click`, `write_outcome` set) is flagged — the discarded boolean lets the caller move on silently after a failure. Assign the return and check it. Assigned (`clicked = …`) and branched-on (`if …:`) calls are exempt. Attribute calls are matched by their terminal method name alone (the receiver type is not resolved), so an unrelated `obj.write_outcome()` or `widget.find_and_click()` whose method name collides with a curated name is also flagged. **Test files exempt.** |
65
67
 
66
68
  ### Where UPPER_SNAKE is allowed
67
69
 
@@ -124,7 +126,7 @@ Full words only. No mental translation.
124
126
 
125
127
  **Extended naming rules** :
126
128
  - Loop vars: `each_order`, `each_user` (prefix `each_`)
127
- - Booleans: `is_valid`, `has_permission`, `should_retry` (prefix `is_`/`has_`/`should_`/`can_`)
129
+ - Booleans: `is_valid`, `has_permission`, `should_retry`, `was_clicked`, `did_succeed` (prefix `is_`/`has_`/`should_`/`can_`/`was_`/`did_`). The hook covers both boolean assignments and boolean-typed function parameters (a parameter annotated `bool` or defaulting to a boolean literal); `self`/`cls` and single-character names are exempt.
128
130
  - Collections: `all_orders`, `all_users` (prefix `all_`)
129
131
  - Maps: `price_by_product`, `user_by_id` (pattern `X_by_Y`)
130
132
  - Preposition params: `from_path=`, `to=`, `into=`
@@ -400,6 +402,9 @@ Hook will enforce:
400
402
  [⚡] No test-mode branching in production (TESTING / PYTEST_CURRENT_TEST)
401
403
  [⚡] No thin wrapper modules (imports only, optionally with __all__, outside __init__.py)
402
404
  [⚡] Public functions have Google-style Args:/Returns:/Raises: when warranted
405
+ [⚡] Docstring Args: names match the signature (a stale renamed param is flagged)
406
+ [⚡] Boolean names prefixed is_/has_/should_/can_/was_/did_ (assignments AND bool-typed parameters)
407
+ [⚡] No discarded must-check return (assign and check find_and_click/write_outcome outcomes)
403
408
 
404
409
  Manual check:
405
410
  [ ] No abbreviations?
@@ -91,7 +91,9 @@ from hooks_constants.blocking_check_limits import ( # noqa: E402
91
91
  MAX_BANNED_PREFIX_ISSUES,
92
92
  MAX_BARE_EXCEPT_ISSUES,
93
93
  MAX_BOUNDARY_TYPE_ISSUES,
94
+ MAX_DOCSTRING_ARGS_SIGNATURE_ISSUES,
94
95
  MAX_DOCSTRING_FORMAT_ISSUES,
96
+ MAX_IGNORED_MUST_CHECK_RETURN_ISSUES,
95
97
  MAX_STUB_IMPLEMENTATION_ISSUES,
96
98
  MAX_TEST_BRANCHING_ISSUES,
97
99
  MAX_TYPED_DICT_PAIR_ISSUES,
@@ -132,6 +134,10 @@ from hooks_constants.code_rules_enforcer_constants import ( # noqa: E402
132
134
  BANNED_NOUN_SPAN_FRAGMENT_TEMPLATE,
133
135
  BARE_EACH_TOKEN,
134
136
  ALL_BOOLEAN_NAME_PREFIXES,
137
+ ALL_DOCSTRING_ARGS_SECTION_HEADERS,
138
+ ALL_DOCSTRING_TERMINATING_SECTION_HEADERS,
139
+ DOCSTRING_ARG_ENTRY_PATTERN,
140
+ ALL_MUST_CHECK_RETURN_FUNCTION_NAMES,
135
141
  ALL_BUILTIN_DICT_METHOD_NAMES,
136
142
  ALL_CLI_FILE_PATH_MARKERS,
137
143
  CHAINED_INLINE_COMMENT_PATTERN,
@@ -2092,6 +2098,110 @@ def check_docstring_format(content: str, file_path: str) -> list[str]:
2092
2098
  return issues[:MAX_DOCSTRING_FORMAT_ISSUES]
2093
2099
 
2094
2100
 
2101
+ def _signature_parameter_names(
2102
+ function_node: ast.FunctionDef | ast.AsyncFunctionDef,
2103
+ ) -> set[str]:
2104
+ arguments = function_node.args
2105
+ real_names: set[str] = set()
2106
+ for each_argument in arguments.posonlyargs + arguments.args + arguments.kwonlyargs:
2107
+ real_names.add(each_argument.arg)
2108
+ if arguments.vararg is not None:
2109
+ real_names.add(arguments.vararg.arg)
2110
+ if arguments.kwarg is not None:
2111
+ real_names.add(arguments.kwarg.arg)
2112
+ return real_names - ALL_SELF_AND_CLS_PARAMETER_NAMES
2113
+
2114
+
2115
+ def _is_docstring_terminating_section_header(stripped_line: str) -> bool:
2116
+ return stripped_line in ALL_DOCSTRING_TERMINATING_SECTION_HEADERS
2117
+
2118
+
2119
+ def _documented_argument_names(docstring_text: str) -> list[str]:
2120
+ docstring_lines = docstring_text.splitlines()
2121
+ args_section_index = _find_args_section_index(docstring_lines)
2122
+ if args_section_index is None:
2123
+ return []
2124
+ documented_names: list[str] = []
2125
+ entry_indent: int | None = None
2126
+ for each_line in docstring_lines[args_section_index + 1:]:
2127
+ stripped_line = each_line.strip()
2128
+ if not stripped_line:
2129
+ continue
2130
+ if _is_docstring_terminating_section_header(stripped_line):
2131
+ break
2132
+ current_indent = len(each_line) - len(each_line.lstrip())
2133
+ if current_indent == 0:
2134
+ break
2135
+ if entry_indent is None:
2136
+ entry_indent = current_indent
2137
+ if current_indent > entry_indent:
2138
+ continue
2139
+ entry_match = DOCSTRING_ARG_ENTRY_PATTERN.match(stripped_line)
2140
+ if entry_match is not None:
2141
+ documented_names.append(entry_match.group(1))
2142
+ return documented_names
2143
+
2144
+
2145
+ def _find_args_section_index(all_docstring_lines: list[str]) -> int | None:
2146
+ for each_line_index, each_line in enumerate(all_docstring_lines):
2147
+ if each_line.strip() in ALL_DOCSTRING_ARGS_SECTION_HEADERS:
2148
+ return each_line_index
2149
+ return None
2150
+
2151
+
2152
+ def check_docstring_args_match_signature(content: str, file_path: str) -> list[str]:
2153
+ """Flag docstring Args: entries naming a parameter the signature lacks.
2154
+
2155
+ A fix that renames a parameter often leaves the adjacent ``Args:`` line
2156
+ stale. Each documented argument name is compared to the real signature;
2157
+ a documented name with no matching parameter is reported. Only the
2158
+ ``Args:`` section is validated — ``Raises:`` is left alone because
2159
+ callee-propagated exceptions cause false positives. Functions that
2160
+ accept ``**kwargs`` are skipped because their documented names may be
2161
+ keyword keys the signature cannot enumerate.
2162
+
2163
+ Args:
2164
+ content: The source text to inspect.
2165
+ file_path: The path the source will be written to, used for exemptions.
2166
+
2167
+ Returns:
2168
+ One issue per stale documented argument, capped at the module limit.
2169
+ """
2170
+ if is_test_file(file_path) or is_hook_infrastructure(file_path):
2171
+ return []
2172
+ try:
2173
+ parsed_tree = ast.parse(content)
2174
+ except SyntaxError:
2175
+ return []
2176
+ issues: list[str] = []
2177
+ for each_node in _walk_skipping_type_checking_blocks(parsed_tree):
2178
+ if not isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
2179
+ continue
2180
+ if _function_is_private_or_dunder(each_node.name):
2181
+ continue
2182
+ if _function_has_exempt_decorator(each_node):
2183
+ continue
2184
+ if _function_body_line_count(each_node) <= DOCSTRING_TRIVIAL_FUNCTION_BODY_LINE_LIMIT:
2185
+ continue
2186
+ if each_node.args.kwarg is not None:
2187
+ continue
2188
+ documented_names = _documented_argument_names(_function_docstring_text(each_node))
2189
+ if not documented_names:
2190
+ continue
2191
+ real_names = _signature_parameter_names(each_node)
2192
+ for each_documented_name in documented_names:
2193
+ if each_documented_name in real_names:
2194
+ continue
2195
+ issues.append(
2196
+ f"Line {each_node.lineno}: {each_node.name}() docstring Args: lists "
2197
+ f"'{each_documented_name}' which is not a parameter - update the "
2198
+ "docstring to match the signature"
2199
+ )
2200
+ if len(issues) >= MAX_DOCSTRING_ARGS_SIGNATURE_ISSUES:
2201
+ return issues[:MAX_DOCSTRING_ARGS_SIGNATURE_ISSUES]
2202
+ return issues[:MAX_DOCSTRING_ARGS_SIGNATURE_ISSUES]
2203
+
2204
+
2095
2205
  _PASCAL_TO_SNAKE_WORD_BOUNDARY = re.compile(r"(?<=[a-z0-9])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])")
2096
2206
 
2097
2207
 
@@ -2440,8 +2550,89 @@ def _collect_boolean_assignments(tree: ast.Module) -> list[tuple[str, int, bool]
2440
2550
  return collected
2441
2551
 
2442
2552
 
2443
- def check_boolean_naming(content: str, file_path: str) -> list[str]:
2444
- """Flag boolean assignments whose target name lacks a required prefix."""
2553
+ def _argument_is_boolean(argument_node: ast.arg, default_node: ast.expr | None) -> bool:
2554
+ annotation_is_bool = (
2555
+ isinstance(argument_node.annotation, ast.Name)
2556
+ and argument_node.annotation.id == "bool"
2557
+ )
2558
+ default_is_bool = default_node is not None and _is_bool_constant(default_node)
2559
+ return annotation_is_bool or default_is_bool
2560
+
2561
+
2562
+ def _bool_parameters_for_function(
2563
+ function_node: ast.FunctionDef | ast.AsyncFunctionDef,
2564
+ ) -> list[tuple[str, int]]:
2565
+ arguments = function_node.args
2566
+ positional_arguments = arguments.posonlyargs + arguments.args
2567
+ positional_defaults = arguments.defaults
2568
+ leading_without_default = len(positional_arguments) - len(positional_defaults)
2569
+ bool_parameters: list[tuple[str, int]] = []
2570
+ for each_position, each_argument in enumerate(positional_arguments):
2571
+ default_index = each_position - leading_without_default
2572
+ default_node = (
2573
+ positional_defaults[default_index] if default_index >= 0 else None
2574
+ )
2575
+ if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
2576
+ continue
2577
+ if _argument_is_boolean(each_argument, default_node):
2578
+ bool_parameters.append((each_argument.arg, each_argument.lineno))
2579
+ for each_argument, each_default in zip(arguments.kwonlyargs, arguments.kw_defaults):
2580
+ if each_argument.arg in ALL_SELF_AND_CLS_PARAMETER_NAMES:
2581
+ continue
2582
+ if _argument_is_boolean(each_argument, each_default):
2583
+ bool_parameters.append((each_argument.arg, each_argument.lineno))
2584
+ return bool_parameters
2585
+
2586
+
2587
+ def _collect_bool_parameter_names(tree: ast.Module) -> list[tuple[str, int]]:
2588
+ """Collect (name, line_number) for boolean-typed function parameters.
2589
+
2590
+ A parameter counts as boolean when its annotation is the ``bool`` name or
2591
+ its default is a boolean literal. ``self`` and ``cls`` are skipped.
2592
+
2593
+ Args:
2594
+ tree: The parsed module to inspect.
2595
+
2596
+ Returns:
2597
+ Each boolean parameter as a (name, line_number) pair.
2598
+ """
2599
+ bool_parameters: list[tuple[str, int]] = []
2600
+ for each_node in ast.walk(tree):
2601
+ if isinstance(each_node, (ast.FunctionDef, ast.AsyncFunctionDef)):
2602
+ bool_parameters.extend(_bool_parameters_for_function(each_node))
2603
+ return bool_parameters
2604
+
2605
+
2606
+ def check_boolean_naming(
2607
+ content: str,
2608
+ file_path: str,
2609
+ all_changed_lines: set[int] | None = None,
2610
+ defer_scope_to_caller: bool = False,
2611
+ ) -> list[str]:
2612
+ """Flag boolean assignments and parameters whose name lacks a required prefix.
2613
+
2614
+ The caller passes the reconstructed full file as *content* so ``ast.parse``
2615
+ sees a complete module rather than an Edit's ``new_string`` fragment, which is
2616
+ rarely valid standalone Python. Findings are then scoped to *all_changed_lines*
2617
+ so an Edit blocks on the unprefixed boolean it just introduced while a
2618
+ pre-existing violation on an untouched line does not block the edit.
2619
+
2620
+ Args:
2621
+ content: The source text to inspect — the reconstructed full file on an
2622
+ Edit so the parse succeeds.
2623
+ file_path: The path the source will be written to, used for exemptions.
2624
+ all_changed_lines: Post-edit line numbers the current edit touched, or
2625
+ None to treat the whole file as in scope. When provided, a violation
2626
+ blocks only when its source line intersects the changed lines.
2627
+ defer_scope_to_caller: When True, return every violation so the
2628
+ commit/push gate's ``split_violations_by_scope`` can scope by added
2629
+ line.
2630
+
2631
+ Returns:
2632
+ One issue per unprefixed boolean assignment and parameter, scoped to the
2633
+ changed lines unless *defer_scope_to_caller* is True or *all_changed_lines*
2634
+ is None. This check has no module cap.
2635
+ """
2445
2636
  if is_test_file(file_path):
2446
2637
  return []
2447
2638
  if is_hook_infrastructure(file_path):
@@ -2459,20 +2650,125 @@ def check_boolean_naming(content: str, file_path: str) -> list[str]:
2459
2650
  file=sys.stderr,
2460
2651
  )
2461
2652
  return []
2462
- issues: list[str] = []
2463
- for name, line_number, is_in_upper_snake_scope in _collect_boolean_assignments(tree):
2464
- if len(name) == 1:
2653
+ all_violations_in_walk_order: list[tuple[range, str]] = []
2654
+ for each_name, each_line_number, each_is_in_upper_snake_scope in _collect_boolean_assignments(tree):
2655
+ if len(each_name) == 1:
2465
2656
  continue
2466
- if is_in_upper_snake_scope and UPPER_SNAKE_CONSTANT_PATTERN.match(name):
2657
+ if each_is_in_upper_snake_scope and UPPER_SNAKE_CONSTANT_PATTERN.match(each_name):
2467
2658
  continue
2468
- if name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
2659
+ if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
2469
2660
  continue
2470
- issues.append(
2471
- f"Line {line_number}: Boolean {name} - prefix with is_/has_/should_/can_"
2661
+ message = (
2662
+ f"Line {each_line_number}: Boolean {each_name} - prefix with "
2663
+ "is_/has_/should_/can_/was_/did_"
2472
2664
  )
2473
- return issues
2665
+ all_violations_in_walk_order.append(
2666
+ (range(each_line_number, each_line_number + 1), message)
2667
+ )
2668
+ for each_name, each_line_number in _collect_bool_parameter_names(tree):
2669
+ if len(each_name) == 1:
2670
+ continue
2671
+ if each_name.startswith(ALL_BOOLEAN_NAME_PREFIXES):
2672
+ continue
2673
+ message = (
2674
+ f"Line {each_line_number}: Boolean parameter {each_name} - prefix with "
2675
+ "is_/has_/should_/can_/was_/did_"
2676
+ )
2677
+ all_violations_in_walk_order.append(
2678
+ (range(each_line_number, each_line_number + 1), message)
2679
+ )
2680
+ return _scope_violations_to_changed_lines(
2681
+ all_violations_in_walk_order,
2682
+ all_changed_lines,
2683
+ defer_scope_to_caller,
2684
+ )
2474
2685
 
2475
2686
 
2687
+ def _called_terminal_name(call_node: ast.Call) -> str | None:
2688
+ callee = call_node.func
2689
+ if isinstance(callee, ast.Name):
2690
+ return callee.id
2691
+ if isinstance(callee, ast.Attribute):
2692
+ return callee.attr
2693
+ return None
2694
+
2695
+
2696
+ def check_ignored_must_check_return(
2697
+ content: str,
2698
+ file_path: str,
2699
+ all_changed_lines: set[int] | None = None,
2700
+ defer_scope_to_caller: bool = False,
2701
+ ) -> list[str]:
2702
+ """Flag bare-expression calls whose discarded return is the only failure signal.
2703
+
2704
+ Functions in ``ALL_MUST_CHECK_RETURN_FUNCTION_NAMES`` report success or failure
2705
+ solely through their return value. A bare-statement call discards that value,
2706
+ so the caller silently proceeds on failure. Bare ``ast.Expr`` calls are flagged,
2707
+ including a bare ``await``-wrapped call (``await find_and_click(...)`` as a
2708
+ statement); an assigned or branched-on call is exempt.
2709
+
2710
+ The caller passes the reconstructed full file as *content* so ``ast.parse``
2711
+ sees a complete module rather than an Edit's ``new_string`` fragment, which is
2712
+ rarely valid standalone Python (a bare ``await find_and_click(...)`` line is a
2713
+ SyntaxError on its own). Findings are then scoped to *all_changed_lines* so an
2714
+ Edit blocks on the discarded return it just introduced while a pre-existing
2715
+ violation on an untouched line does not block the edit.
2716
+
2717
+ Args:
2718
+ content: The source text to inspect — the reconstructed full file on an
2719
+ Edit so the parse succeeds.
2720
+ file_path: The path the source will be written to, used for exemptions.
2721
+ all_changed_lines: Post-edit line numbers the current edit touched, or
2722
+ None to treat the whole file as in scope. When provided, a violation
2723
+ blocks only when the bare call's line intersects the changed lines.
2724
+ defer_scope_to_caller: When True, return every violation so the
2725
+ commit/push gate's ``split_violations_by_scope`` can scope by added
2726
+ line.
2727
+
2728
+ Returns:
2729
+ One issue per discarded must-check return, scoped to the changed lines
2730
+ unless *defer_scope_to_caller* is True or *all_changed_lines* is None. When
2731
+ *defer_scope_to_caller* is True every violation is returned uncapped so the
2732
+ gate can scope by added line and apply its own ceiling; otherwise the
2733
+ terminal result is capped at the module limit.
2734
+ """
2735
+ if is_test_file(file_path):
2736
+ return []
2737
+ try:
2738
+ tree = ast.parse(content)
2739
+ except SyntaxError:
2740
+ return []
2741
+ all_violations_in_walk_order: list[tuple[range, str]] = []
2742
+ for each_node in ast.walk(tree):
2743
+ if not isinstance(each_node, ast.Expr):
2744
+ continue
2745
+ expression_value = each_node.value
2746
+ call_node = (
2747
+ expression_value.value
2748
+ if isinstance(expression_value, ast.Await)
2749
+ else expression_value
2750
+ )
2751
+ if not isinstance(call_node, ast.Call):
2752
+ continue
2753
+ called_name = _called_terminal_name(call_node)
2754
+ if called_name is None or called_name not in ALL_MUST_CHECK_RETURN_FUNCTION_NAMES:
2755
+ continue
2756
+ end_line_number = each_node.end_lineno or each_node.lineno
2757
+ line_span = range(each_node.lineno, end_line_number + 1)
2758
+ message = (
2759
+ f"Line {each_node.lineno}: return value of {called_name}() is discarded - "
2760
+ "assign and check it (the boolean/outcome is the only failure signal)"
2761
+ )
2762
+ all_violations_in_walk_order.append((line_span, message))
2763
+ scoped_issues = _scope_violations_to_changed_lines(
2764
+ all_violations_in_walk_order,
2765
+ all_changed_lines,
2766
+ defer_scope_to_caller,
2767
+ )
2768
+ if defer_scope_to_caller:
2769
+ return scoped_issues
2770
+ return scoped_issues[:MAX_IGNORED_MUST_CHECK_RETURN_ISSUES]
2771
+
2476
2772
 
2477
2773
  def _decorator_name_contains_skip(decorator_node: ast.expr) -> bool:
2478
2774
  """Return True when a decorator AST node references an identifier containing 'skip'."""
@@ -5570,7 +5866,23 @@ def validate_content(
5570
5866
  all_issues.extend(check_thin_wrapper_files(effective_content, file_path))
5571
5867
  all_issues.extend(check_boundary_types(effective_content, file_path))
5572
5868
  all_issues.extend(check_docstring_format(effective_content, file_path))
5573
- all_issues.extend(check_boolean_naming(content, file_path))
5869
+ all_issues.extend(check_docstring_args_match_signature(effective_content, file_path))
5870
+ all_issues.extend(
5871
+ check_boolean_naming(
5872
+ effective_content,
5873
+ file_path,
5874
+ all_changed_lines,
5875
+ defer_scope_to_caller,
5876
+ )
5877
+ )
5878
+ all_issues.extend(
5879
+ check_ignored_must_check_return(
5880
+ effective_content,
5881
+ file_path,
5882
+ all_changed_lines,
5883
+ defer_scope_to_caller,
5884
+ )
5885
+ )
5574
5886
  all_issues.extend(check_skip_decorators_in_tests(content, file_path))
5575
5887
  all_issues.extend(
5576
5888
  check_tests_use_isolated_filesystem_paths(
@@ -68,7 +68,7 @@ def _block_context() -> str:
68
68
  f"- Files under {_exempt_plugin_segments_summary} directories\n"
69
69
  f"- Files under {_claude_dev_env_source_directories_summary} source directories\n"
70
70
  f"- Files under any directory whose ancestor contains {PLUGIN_ROOT_MARKER_DIRECTORY_NAME}/\n"
71
- "- README.md and CHANGELOG.md at any repo root\n"
71
+ "- README.md, CHANGELOG.md, CLAUDE.md, and AGENTS.md at any repo root\n"
72
72
  f"- Files under {_exempt_home_directories_summary}\n"
73
73
  "- Files under the OS temp directory"
74
74
  )
@@ -83,7 +83,7 @@ def _block_system_message() -> str:
83
83
  f"{_exempt_anywhere_filenames_summary} anywhere, {_exempt_plugin_segments_summary} trees, "
84
84
  f"{_claude_dev_env_source_directories_summary} source trees, "
85
85
  f"files under a {PLUGIN_ROOT_MARKER_DIRECTORY_NAME}/ root, "
86
- f"README.md/CHANGELOG.md at any repo root, {_exempt_home_directories_summary}, "
86
+ f"README.md/CHANGELOG.md/CLAUDE.md/AGENTS.md at any repo root, {_exempt_home_directories_summary}, "
87
87
  "and the OS temp directory."
88
88
  )
89
89