claude-dev-env 1.72.0 → 1.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/CLAUDE.md +2 -0
  2. package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +2 -2
  3. package/bin/install.mjs +73 -5
  4. package/bin/install.test.mjs +360 -4
  5. package/hooks/blocking/CLAUDE.md +6 -1
  6. package/hooks/blocking/block_main_commit.py +14 -0
  7. package/hooks/blocking/bot_mention_comment_blocker.py +7 -0
  8. package/hooks/blocking/claude_md_orphan_file_blocker.py +19 -48
  9. package/hooks/blocking/code_rules_dead_config_field.py +69 -56
  10. package/hooks/blocking/code_rules_docstrings.py +839 -0
  11. package/hooks/blocking/code_rules_enforcer.py +38 -0
  12. package/hooks/blocking/code_rules_shared.py +19 -0
  13. package/hooks/blocking/code_verifier_spawn_preflight_gate.py +426 -0
  14. package/hooks/blocking/convergence_gate_blocker.py +17 -3
  15. package/hooks/blocking/destructive_command_blocker.py +7 -0
  16. package/hooks/blocking/docstring_rule_gate_count_blocker.py +321 -0
  17. package/hooks/blocking/gh_body_arg_blocker.py +8 -0
  18. package/hooks/blocking/gh_pr_author_enforcer.py +7 -0
  19. package/hooks/blocking/hedging_language_blocker.py +16 -10
  20. package/hooks/blocking/hook_prose_detector_consistency.py +7 -0
  21. package/hooks/blocking/intent_only_ending_blocker.py +17 -11
  22. package/hooks/blocking/md_to_html_blocker.py +17 -10
  23. package/hooks/blocking/open_questions_in_plans_blocker.py +15 -8
  24. package/hooks/blocking/package_inventory_stale_blocker.py +398 -0
  25. package/hooks/blocking/plain_language_blocker.py +57 -16
  26. package/hooks/blocking/pr_converge_bugteam_enforcer.py +11 -5
  27. package/hooks/blocking/pr_description_enforcer.py +6 -0
  28. package/hooks/blocking/pre_tool_use_dispatcher.py +545 -0
  29. package/hooks/blocking/precommit_code_rules_gate.py +10 -1
  30. package/hooks/blocking/pytest_testpaths_orphan_blocker.py +366 -0
  31. package/hooks/blocking/question_to_user_enforcer.py +18 -12
  32. package/hooks/blocking/send_user_file_open_locally_blocker.py +70 -0
  33. package/hooks/blocking/sensitive_file_protector.py +15 -1
  34. package/hooks/blocking/session_handoff_blocker.py +14 -8
  35. package/hooks/blocking/state_description_blocker.py +81 -36
  36. package/hooks/blocking/subprocess_budget_completeness.py +9 -3
  37. package/hooks/blocking/tdd_enforcer.py +6 -0
  38. package/hooks/blocking/test_code_rules_enforcer_dead_config_field.py +81 -0
  39. package/hooks/blocking/test_code_rules_enforcer_docstring_inline_literal_claim.py +93 -0
  40. package/hooks/blocking/test_code_rules_enforcer_docstring_returns_plural_cardinality.py +207 -0
  41. package/hooks/blocking/test_code_rules_enforcer_docstring_step_dispatch.py +262 -0
  42. package/hooks/blocking/test_code_rules_enforcer_docstring_undefined_constant.py +253 -0
  43. package/hooks/blocking/test_code_rules_enforcer_docstring_unguarded_payload.py +188 -0
  44. package/hooks/blocking/test_code_rules_enforcer_module_docstring_roster.py +279 -0
  45. package/hooks/blocking/test_code_verifier_spawn_preflight_gate.py +501 -0
  46. package/hooks/blocking/test_docstring_rule_gate_count_blocker.py +203 -0
  47. package/hooks/blocking/test_hook_block_logger_coverage.py +53 -0
  48. package/hooks/blocking/test_package_inventory_stale_blocker.py +329 -0
  49. package/hooks/blocking/test_plain_language_blocker.py +36 -0
  50. package/hooks/blocking/test_pre_tool_use_dispatcher.py +816 -0
  51. package/hooks/blocking/test_pre_tool_use_dispatcher_native.py +341 -0
  52. package/hooks/blocking/test_pytest_testpaths_orphan_blocker.py +247 -0
  53. package/hooks/blocking/test_send_user_file_open_locally_blocker.py +114 -0
  54. package/hooks/blocking/test_shared_stdin_adoption.py +208 -0
  55. package/hooks/blocking/test_state_description_blocker.py +41 -0
  56. package/hooks/blocking/test_verdict_directory_write_blocker.py +49 -0
  57. package/hooks/blocking/test_workflow_substitution_slot_blocker.py +4 -19
  58. package/hooks/blocking/verdict_directory_write_blocker.py +21 -7
  59. package/hooks/blocking/verified_commit_gate.py +11 -0
  60. package/hooks/blocking/verified_commit_message_accuracy_blocker.py +16 -1
  61. package/hooks/blocking/windows_rmtree_blocker.py +7 -0
  62. package/hooks/blocking/workflow_substitution_slot_blocker.py +10 -5
  63. package/hooks/blocking/write_existing_file_blocker.py +16 -1
  64. package/hooks/hooks.json +19 -79
  65. package/hooks/hooks_constants/CLAUDE.md +7 -1
  66. package/hooks/hooks_constants/blocking_check_limits.py +74 -0
  67. package/hooks/hooks_constants/code_rules_enforcer_constants.py +9 -0
  68. package/hooks/hooks_constants/code_verifier_spawn_preflight_gate_constants.py +45 -0
  69. package/hooks/hooks_constants/dead_config_field_constants.py +5 -5
  70. package/hooks/hooks_constants/docstring_rule_gate_count_blocker_constants.py +90 -0
  71. package/hooks/hooks_constants/hook_block_logger.py +59 -0
  72. package/hooks/hooks_constants/multi_edit_reconstruction.py +56 -0
  73. package/hooks/hooks_constants/mypy_validator_cache_constants.py +36 -0
  74. package/hooks/hooks_constants/package_inventory_stale_blocker_constants.py +111 -0
  75. package/hooks/hooks_constants/post_tool_use_dispatcher_constants.py +68 -0
  76. package/hooks/hooks_constants/pre_tool_use_dispatcher_constants.py +143 -0
  77. package/hooks/hooks_constants/pytest_testpaths_orphan_blocker_constants.py +79 -0
  78. package/hooks/hooks_constants/send_user_file_open_locally_blocker_constants.py +18 -0
  79. package/hooks/hooks_constants/test_dispatcher_constants_docstrings.py +44 -0
  80. package/hooks/hooks_constants/test_hook_block_logger.py +159 -0
  81. package/hooks/lifecycle/config_change_guard.py +12 -0
  82. package/hooks/lifecycle/test_config_change_guard.py +23 -0
  83. package/hooks/validation/hook_format_validator.py +13 -0
  84. package/hooks/validation/mypy_validator.py +245 -18
  85. package/hooks/validation/post_tool_use_dispatcher.py +344 -0
  86. package/hooks/validation/test_hook_format_validator.py +64 -0
  87. package/hooks/validation/test_mypy_validator.py +206 -1
  88. package/hooks/validation/test_post_tool_use_dispatcher.py +610 -0
  89. package/hooks/workflow/test_auto_formatter.py +10 -9
  90. package/package.json +1 -1
  91. package/rules/CLAUDE.md +1 -0
  92. package/rules/docstring-prose-matches-implementation.md +4 -2
  93. package/rules/package-inventory-stale-entry.md +24 -0
  94. package/skills/autoconverge/SKILL.md +111 -1
  95. package/skills/autoconverge/workflow/converge.contract.test.mjs +106 -0
  96. package/skills/autoconverge/workflow/converge.mjs +29 -3
  97. package/skills/autoconverge/workflow/converge.path-aware.test.mjs +47 -0
  98. package/skills/autoconverge/workflow/converge_multi.mjs +161 -0
  99. package/skills/autoconverge/workflow/converge_multi.run-input.test.mjs +100 -0
package/rules/CLAUDE.md CHANGED
@@ -28,6 +28,7 @@ Rule files installed into `~/.claude/rules/` by `bin/install.mjs`. Claude Code l
28
28
  | `no-historical-clutter.md` | Documentation describes current state only; no historical or transitional language |
29
29
  | `no-inline-destructive-literals.md` | No destructive-command literals in Bash tool command strings, even as data |
30
30
  | `orphan-css-class.md` | Every `class="..."` attribute in Python-generated markup has a matching selector in the `<style>` block |
31
+ | `package-inventory-stale-entry.md` | A new production code file added to a directory carries an entry in that directory's `README.md`/`CLAUDE.md` file inventory |
31
32
  | `parallel-tools.md` | Make all independent tool calls in a single response |
32
33
  | `plain-language.md` | Everyday words, short active sentences, lead with the answer |
33
34
  | `prompt-workflow-context-controls.md` | Keep prompt-workflow instruction layers small and stable; load heavy skills on demand |
@@ -6,7 +6,7 @@
6
6
 
7
7
  When a docstring enumerates the behaviors a body applies, the enumeration covers every behavior the body applies. A reader trusts the list to be complete: an item the code applies but the prose omits is a silent gap that misleads every future reader and reviewer.
8
8
 
9
- The gate validator `check_docstring_args_match_signature` covers the `Args:` section parameter names. Three more gate validators each cover one deterministic slice of the free-form prose. `check_docstring_fallback_branch_coverage` covers a summary that scopes a fallback to a single condition (`only when`, `falls back to ... when`) while the body routes to that same fallback call from two or more distinct early-return guards. `check_class_docstring_names_public_methods` covers a class whose docstring is a single summary line while the class exposes two or more public methods whose names the summary never spells out — the drift where a one-line class summary keeps naming its first feature after the class grows a second public entry point. `check_docstring_no_consumer_claim` covers a producer docstring asserting that no consumer reads its output yet (`producer-only artifact`, `no submission-run consumer reads it yet`) — a transitional claim that drifts the moment a reader lands and contradicts any companion `SKILL.md` that documents the consumer; this is the deterministic slice of the O8 companion-doc producer/consumer drift below. The remaining free-form prose — `"a field counts as read when ..."`, `"resolves to shared temp only"`, `"strip ceremony, then drop blockquotes"`, and module-level responsibility paragraphs — has no signature, method roster, or single structural shape to compare against, so the gate cannot catch its drift. This rule is the judgment standard for that prose; the audit lane below is the enforcement for everything outside the four gated slices.
9
+ The gate validator `check_docstring_args_match_signature` covers the `Args:` section parameter names. Four more gate validators each cover one deterministic slice of the free-form prose. `check_docstring_fallback_branch_coverage` covers a summary that scopes a fallback to a single condition (`only when`, `falls back to ... when`) while the body routes to that same fallback call from two or more distinct early-return guards. `check_class_docstring_names_public_methods` covers a class whose docstring is a single summary line while the class exposes two or more public methods whose names the summary never spells out — the drift where a one-line class summary keeps naming its first feature after the class grows a second public entry point. `check_docstring_no_consumer_claim` covers a producer docstring asserting that no consumer reads its output yet (`producer-only artifact`, `no submission-run consumer reads it yet`) — a transitional claim that drifts the moment a reader lands and contradicts any companion `SKILL.md` that documents the consumer; this is the deterministic slice of the O8 companion-doc producer/consumer drift below. `check_docstring_returns_plural_cardinality` covers a `Returns:` clause that names a dict-key prefix family with a plural noun (`the sheen stops`) while the returned dict literal holds exactly one key in that family (`sheen_mid`) — the drift where a single-key family carries a plural noun, so the prose claims a cardinality of two or more that the dict does not hold. The remaining free-form prose — `"a field counts as read when ..."`, `"resolves to shared temp only"`, `"strip ceremony, then drop blockquotes"`, and module-level responsibility paragraphs — has no signature, method roster, or single structural shape to compare against, so the gate cannot catch its drift. This rule is the judgment standard for that prose; the audit lane below is the enforcement for everything outside the five gated slices.
10
10
 
11
11
  ## What to check before you write the docstring
12
12
 
@@ -15,8 +15,10 @@ Read the body and the docstring side by side:
15
15
  - **Read-source / match-source unions.** A body that computes `read_names = a | b | c` (or any union of "what counts") names each union member in the prose enumeration. A union member the code applies but the prose omits is a gap.
16
16
  - **Suppressor / skip lists.** A body with several early returns that suppress the check names each suppressor in the prose.
17
17
  - **Shared fallback routes.** A summary that scopes a fallback call to one condition names every condition that reaches that call. When the body routes to the same fallback from two or more early-return guards (`if a is None: fallback(); return` and `if random() < p: fallback(); return`), the prose enumerates both guards. The `check_docstring_fallback_branch_coverage` gate blocks the single-condition form of this drift at Write/Edit time.
18
- - **Step order.** A docstring that says `A then B then C` matches the call order in the body.
18
+ - **Step order.** A docstring that says `A then B then C` matches the call order in the body. A step enumeration that names the body's linear steps also names every corrective step the body guards inside an `if`/`elif` branch (`if not await cancel_and_reinitiate_update(...): return`). The `check_docstring_step_enumeration_dispatch_coverage` gate blocks the branch-guarded-dispatch form of this drift — a step-enumeration docstring that omits a two-or-more-token dispatch step the body guards inside a branch — at Write/Edit time.
19
+ - **Returns-clause cardinality.** A `Returns:` clause that names a dict-key prefix family with a plural noun (`the sheen stops`) matches the count of keys in that family in the returned dict literal. When the dict holds one key in the family (`sheen_mid`), the noun is singular (`the sheen stop`); a plural noun there claims two or more entries the dict does not hold. The `check_docstring_returns_plural_cardinality` gate blocks the single-key-with-plural-noun form of this drift at Write/Edit time.
19
20
  - **Predicate breadth.** A boolean helper whose prose promises a narrow check accepts only the inputs the prose names — no broader input class the name and prose do not mention.
21
+ - **Exclusion-clause distinguisher.** A docstring sentence that says a named category of input "are not" / "is not" the thing the function flags (`plain logging, screenshot, or method-on-local calls inside a branch are not dispatch steps`) keys the exclusion to the same axis the body's classification keys on. When the body decides on one axis (a call sits in an `If.test` guard versus a plain statement) but the prose excludes on a different axis (the call's receiver shape — a method on a local), the exclusion clause names a category the body still flags: a guarded method-on-local call is flagged even though the prose lists method-on-local calls as excluded. Read the body's actual branch condition, then state the exclusion on that same axis (`plain (unguarded) calls inside a branch body are not dispatch steps`), so every member the prose excludes is a member the body also excludes.
20
22
  - **Companion-doc ordering and content claims.** A `SKILL.md` (or sibling `.md`) sentence that names a produced artifact and claims its order (`sorted`, `alphabetical`, `in sorted order`) or its content (`the at-risk names`, `just the current set`) matches the producer function's docstring and body for that same artifact. A producer that builds the artifact by merging stored names with new names and appending — preserving file order, not re-sorting the union — leaves a doc that still says `sorted` drifted on both counts: the order claim is wrong, and the content claim hides the merged-in prior entries. When the producer's ordering or union changes, the same change updates the companion doc. The two move together in one commit, even when the producer edit does not touch the `.md` file.
21
23
 
22
24
  When the body changes the set of behaviors it applies, the same edit updates the prose enumeration. The two move together in one commit.
@@ -0,0 +1,24 @@
1
+ # New Production File Absent From Its Package Inventory
2
+
3
+ **When this applies:** Any Write that creates a new production code file (`.py`, `.mjs`, `.js`, `.ts`, `.ps1`, `.sh`) in a directory whose sibling `README.md` or `CLAUDE.md` already names two or more of the directory's files in backticks.
4
+
5
+ ## Rule
6
+
7
+ A package directory that documents its own files in a `README.md` Layout table or a `CLAUDE.md` "Key files" list keeps that inventory in step with the directory. A new production file the inventory does not name leaves the inventory and the directory disagreeing on the package's file set: a reader who trusts the inventory to map the directory misses the new file.
8
+
9
+ When you create a new production file in such a directory, add an entry naming it — a row in the `README.md` table, a bullet in the `CLAUDE.md` list — in the same change. The entry names the file in backticks and says what it does.
10
+
11
+ ## What the gate checks
12
+
13
+ The `package_inventory_stale_blocker.py` hook runs on every Write whose target is a new file (a path not yet on disk). It:
14
+
15
+ 1. Skips a target that is not a production code file (`.py`, `.mjs`, `.js`, `.ts`, `.ps1`, `.sh`), an exempt basename (`__init__.py`, `conftest.py`, `setup.py`, `_path_setup.py`), a test file (`test_*.py`, `*_test.py`, `*.spec.*`, `*.test.*`), or a file directly inside a `config/` or `tests/` directory.
16
+ 2. Reads each `README.md` and `CLAUDE.md` present in the target's own directory and collects every bare filename they name in backticks. A backticked token holding a path contributes its final segment, so `pipeline/seam_continuity.py` in an inventory counts as naming `seam_continuity.py`. A multi-word command-example span — one carrying whitespace or shell punctuation (`:`, `$`, `<`, `>`), such as `parent:node_modules package.json` or `python <file>.py` — names no literal file and is dropped.
17
+ 3. Filters the named basenames to those that exist as a file in the target's own directory — the inventory's own sibling files — and treats the directory as carrying a maintained inventory only when two or more such sibling files are named. A directory with no inventory, one whose `README.md` mentions a single file in passing, or one whose inventory prose names only files living in other directories (so no named basename is an on-disk sibling) is out of scope.
18
+ 4. Blocks the write when the new file's basename appears in no present inventory. An unreadable or oversized inventory document is skipped, so a missing inventory never blocks a write.
19
+
20
+ The check fires on Write only — editing an existing file adds no new inventory entry — and stays quiet for a directory with no inventory document, an inventory naming too few siblings to be a maintained list, an exempt or test file, and a file the inventory already names.
21
+
22
+ ## Why this is a hook, not a lint pass
23
+
24
+ A package inventory that omits a file reads as a complete map of the directory while leaving one file off it. A reader trusting the inventory to list the package misses the new file, and the gap survives review because the inventory still looks complete. Catching it as the new file is written keeps the inventory and the directory in step. This is the counterpart to `claude-md-orphan-file.md`, which catches the reverse drift: an inventory entry naming a file the directory does not hold.
@@ -23,6 +23,22 @@ the workflow journal.
23
23
  autoconverge runs it as a deterministic workflow. The two skills share the same
24
24
  helper scripts and the same convergence gate.
25
25
 
26
+ ## Run scope: one PR or several
27
+
28
+ Decide the scope from how many PRs the user named, then follow that path:
29
+
30
+ 1. **One PR** → the single-PR run described below (`workflow/converge.mjs`): one
31
+ worktree, one workflow launch, one teardown.
32
+ 2. **Several PRs** → the [Multiple PRs](#multiple-prs) run
33
+ (`workflow/converge_multi.mjs`): one worktree per PR and a single workflow
34
+ launch that drives every PR's converge run in parallel, then one teardown per
35
+ PR.
36
+
37
+ The single-PR sections (Requirements, Pre-flight, Run the workflow, Teardown)
38
+ each describe one converge run. The Multiple PRs section reuses them once per PR
39
+ and adds only what fanning out needs: a per-PR worktree and a per-PR teardown
40
+ loop.
41
+
26
42
  ## Requirements
27
43
 
28
44
  Scan the tool list at the top of this conversation for the literal string
@@ -245,7 +261,24 @@ agents never inline a destructive-command literal (`rm -rf`, `git reset --hard`,
245
261
  `dd`) into a Bash command — the `destructive_command_blocker` hook matches those
246
262
  patterns as raw text, and a confirmation prompt no human can answer would stall
247
263
  the run. Agents verify destructive-blocker behavior through the committed test
248
- suite (`python -m pytest`) and keep scratch work in ephemeral temp dirs.
264
+ suite (`python -m pytest`) and keep scratch work in the OS temp dir. The preamble
265
+ describes the narrowest rm auto-allow path — a standalone Bash call whose target
266
+ resolves inside the ephemeral namespace (`/tmp`, `/temp`, the OS temp root, or the
267
+ run worktree) — and a compound path that accepts an rm joined with benign
268
+ reporting segments when every rm target is an absolute ephemeral path. Both of
269
+ those paths fail closed on `$(...)` substitution, backtick subshells, and any `$`
270
+ in the target — including `$CLAUDE_JOB_DIR` — so neither resolves an environment
271
+ variable. A third, broad path matches only when the command itself declares an
272
+ ephemeral working directory (it `cd`s into one, or runs under one): that
273
+ cwd-scoped path resolves the target against the declared cwd, fails closed on
274
+ `$(...)`, backticks, and unknown variables, and resolves the known temporary
275
+ variables `TEMP`, `TMP`, `TMPDIR`, and `CLAUDE_JOB_DIR` to the OS temp root, so
276
+ under that declared ephemeral cwd a bare `$CLAUDE_JOB_DIR/tmp/<name>` target and a
277
+ relative target after a `cd` are auto-allowed. Even so, for any cleanup whose path
278
+ is variable-built or whose teardown spans multiple steps, agents author a Python
279
+ helper file and run it as `python <file>.py` — keeping every destructive literal
280
+ out of a Bash command string entirely and independent of which auto-allow path
281
+ matches.
249
282
 
250
283
  - **Converge:** `parallel([Bugbot lens, code-review lens, bug-audit lens])` on
251
284
  the current HEAD, full `origin/main...HEAD` diff. Dedup findings; one
@@ -270,10 +303,87 @@ suite (`python -m pytest`) and keep scratch work in ephemeral temp dirs.
270
303
  - **Convergence check:** `check_convergence.py` is the authoritative gate; on a
271
304
  full pass the workflow marks `draft=false`.
272
305
 
306
+ ## Multiple PRs
307
+
308
+ The multi-PR run drives several draft PRs to ready in one launch:
309
+ `workflow/converge_multi.mjs` fans out one `converge.mjs` child run per PR with
310
+ `parallel()`, and every child is pinned to its own PR's worktree through the
311
+ `repoPath` it receives, so the children never share a checkout. Each child run is
312
+ the exact single-PR convergence loop — same rounds, same reuse pass, same Copilot
313
+ gate, same convergence check — one per PR at once. The children share the run's
314
+ concurrency cap, so the fan-out self-throttles rather than spawning every PR's
315
+ lenses at the same instant.
316
+
317
+ ### Multi-PR pre-flight (main session)
318
+
319
+ `EnterWorktree` puts the session on one branch only, so the multi-PR path gives
320
+ each PR its own checkout with `git worktree add`. For each PR the user named:
321
+
322
+ 1. **Resolve PR scope** as the single-PR pre-flight step 2 does: capture `owner`,
323
+ `repo`, `prNumber`, and `headRefName`; confirm the PR is a draft, and mark it
324
+ draft (`gh pr ready <n> --repo <o>/<r> --undo`) when it is already ready so the
325
+ loop owns the ready transition.
326
+ 2. **Create a worktree on the PR's head ref** and capture its absolute path. From
327
+ inside the PR's repository checkout:
328
+ `git worktree add <abs worktree path> <headRefName>` (run `git fetch origin
329
+ <headRefName>` first when the ref is not local). Put each PR's worktree under a
330
+ path carrying its PR number so the fan-out keeps them distinct. Confirm
331
+ `git -C <abs worktree path> rev-parse --abbrev-ref HEAD` equals the head ref
332
+ and its `HEAD` equals the PR head SHA.
333
+ 3. **Verify each worktree is the PR's repo (strict pre-flight):**
334
+ `python "$HOME/.claude/skills/_shared/pr-loop/scripts/preflight_worktree.py" --owner <owner> --repo <repo> --mode strict`,
335
+ run with that worktree as the working directory. A non-zero exit prints a
336
+ `PREFLIGHT_OUTCOME` line and an `ABORT` line: report it and drop that PR from
337
+ the run rather than aborting every PR.
338
+ 4. **Grant project permissions once per repository** — the single-PR pre-flight
339
+ step 4 grant covers every worktree of the same repo, so run it one time for
340
+ the repo the PRs live in.
341
+
342
+ ### Launch the multi-PR workflow
343
+
344
+ Call the `Workflow` tool against the fan-out script, passing the absolute path of
345
+ `converge.mjs` and one entry per PR:
346
+
347
+ ```
348
+ Workflow({
349
+ scriptPath: "<this skill dir>/workflow/converge_multi.mjs",
350
+ args: {
351
+ convergeScriptPath: "<this skill dir>/workflow/converge.mjs",
352
+ prs: [
353
+ { owner: "<O>", repo: "<R>", prNumber: <N1>, repoPath: "<abs worktree 1>", bugbotDisabled: false },
354
+ { owner: "<O>", repo: "<R>", prNumber: <N2>, repoPath: "<abs worktree 2>", bugbotDisabled: false }
355
+ ]
356
+ }
357
+ })
358
+ ```
359
+
360
+ `convergeScriptPath` is the absolute path to `workflow/converge.mjs` in this same
361
+ skill directory; each `repoPath` is the absolute path of the worktree that PR is
362
+ checked out in. The workflow runs in the background and notifies this session on
363
+ completion; watch live progress with `/workflows`, where each PR's child run
364
+ appears under its own group.
365
+
366
+ The workflow returns `{ converged, prCount, convergedCount, results, blocker }`,
367
+ where `results` is one record per PR carrying
368
+ `{ owner, repo, prNumber, converged, rounds, finalSha, blocker }`. The top-level
369
+ `converged` is true only when every PR converged.
370
+
371
+ ### Multi-PR teardown (on workflow completion)
372
+
373
+ Run the single-PR [Teardown](#teardown-on-workflow-completion) once per entry in
374
+ `results`, using that PR's `owner`, `repo`, `prNumber`, and `finalSha`, and its
375
+ own worktree as the working directory. Build and publish a PR's closing report
376
+ only for a PR whose `converged` is true; for a PR that returned a blocker, skip
377
+ its report and carry the blocker into the final summary. Revoke project
378
+ permissions once per repository after every PR's teardown. Then print one summary
379
+ report — a line per PR as
380
+ `#<prNumber>: <converged | blocked> — rounds <N>, final <finalSha>[, blocker <blocker>]`.
381
+
273
382
  ## Folder map
274
383
 
275
384
  - `SKILL.md` — this hub.
276
385
  - `workflow/converge.mjs` — the convergence workflow script.
386
+ - `workflow/converge_multi.mjs` — the multi-PR fan-out driver: one `converge.mjs` child run per PR in parallel, each pinned to its PR worktree via `repoPath`.
277
387
  - `workflow/aggregate_runs.py` — merges every autoconverge journal for a PR into one journal and returns its deduped findings, fix summaries, round count, and final SHA.
278
388
  - `workflow/convergence_summary.py` — builds the convergence-summary agent prompt over a PR's merged findings.
279
389
  - `workflow/render_report.py` — builds the closing convergence insights HTML report, taking the summary from `--summary-file`.
@@ -10,6 +10,10 @@ const gotchasSource = readFileSync(
10
10
  join(workflowDirectory, '..', 'reference', 'gotchas.md'),
11
11
  'utf8',
12
12
  );
13
+ const skillSource = readFileSync(
14
+ join(workflowDirectory, '..', 'SKILL.md'),
15
+ 'utf8',
16
+ );
13
17
 
14
18
  function lensPromptBody(builderName) {
15
19
  const builderStart = convergeSource.indexOf(`function ${builderName}(`);
@@ -547,3 +551,105 @@ for (const builderName of editStepBuilders) {
547
551
  );
548
552
  });
549
553
  }
554
+
555
+ function preambleText() {
556
+ const preambleStart = convergeSource.indexOf('const HEADLESS_SAFETY_PREAMBLE =');
557
+ assert.notEqual(preambleStart, -1, 'expected HEADLESS_SAFETY_PREAMBLE to exist');
558
+ const preambleEnd = convergeSource.indexOf('\n\nlet ', preambleStart);
559
+ return convergeSource.slice(preambleStart, preambleEnd === -1 ? undefined : preambleEnd);
560
+ }
561
+
562
+ test('preamble prescribes authoring a Python helper for variable-built or multi-step sandboxes', () => {
563
+ assert.match(
564
+ preambleText(),
565
+ /python\s+<file>\.py|python\s+<.*>\.py|author.*python.*helper|python.*helper.*sandbox|sandbox.*python.*helper/i,
566
+ 'expected the preamble to prescribe running a Python helper file for multi-step sandbox teardown',
567
+ );
568
+ });
569
+
570
+ test('preamble does not claim any $ in the rm target makes the gate fail closed', () => {
571
+ assert.doesNotMatch(
572
+ preambleText(),
573
+ /any\s+\$[^\n]*fail closed/i,
574
+ 'the hook resolves known temp variables (TEMP/TMP/TMPDIR/CLAUDE_JOB_DIR), so a bare $ does not always fail closed',
575
+ );
576
+ });
577
+
578
+ test('preamble does not claim $CLAUDE_JOB_DIR/tmp is blocked', () => {
579
+ assert.doesNotMatch(
580
+ preambleText(),
581
+ /CLAUDE_JOB_DIR\/tmp is NOT auto-allowed/i,
582
+ 'under an ephemeral cwd the hook auto-allows rm targeting $CLAUDE_JOB_DIR/tmp',
583
+ );
584
+ });
585
+
586
+ test('preamble scopes its rm-shape claim to the narrowest auto-allow path, not the full set', () => {
587
+ assert.doesNotMatch(
588
+ preambleText(),
589
+ /auto-allows rm only when ALL of these hold/i,
590
+ 'the hook has three rm auto-allow paths, so the preamble must not assert one narrow shape is the complete set',
591
+ );
592
+ });
593
+
594
+ test('SKILL.md does not claim any $ in the rm target makes the gate fail closed', () => {
595
+ assert.doesNotMatch(
596
+ skillSource,
597
+ /any\s+`?\$`?[^\n]*fail closed/i,
598
+ 'the hook resolves known temp variables (TEMP/TMP/TMPDIR/CLAUDE_JOB_DIR), so a bare $ does not always fail closed',
599
+ );
600
+ });
601
+
602
+ test('SKILL.md does not claim it enforces the exact rm shape the hook auto-allows', () => {
603
+ assert.doesNotMatch(
604
+ skillSource,
605
+ /exact rm shape the hook auto-allows/i,
606
+ 'the hook has multiple rm auto-allow paths, so SKILL.md must not assert one narrow shape is the exact set',
607
+ );
608
+ });
609
+
610
+ test('preamble does not attribute the known-temp-var resolution to the standalone or compound paths', () => {
611
+ assert.doesNotMatch(
612
+ preambleText().replace(/\s+/g, ' '),
613
+ /Across these paths[\s\S]*?CLAUDE_JOB_DIR/i,
614
+ 'the temp-var resolution lives only in the broad cwd-scoped path; the standalone and compound paths fail closed on any $',
615
+ );
616
+ });
617
+
618
+ test('preamble attributes the known-temp-var resolution to a third cwd-scoped auto-allow path', () => {
619
+ const text = preambleText().replace(/\s+/g, ' ');
620
+ const tempVarSentenceMatch =
621
+ /[^.]*\bTMPDIR\b[^.]*CLAUDE_JOB_DIR[^.]*\./i.exec(text);
622
+ assert.notEqual(
623
+ tempVarSentenceMatch,
624
+ null,
625
+ 'expected a sentence describing the TEMP/TMP/TMPDIR/CLAUDE_JOB_DIR resolution',
626
+ );
627
+ assert.match(
628
+ tempVarSentenceMatch[0],
629
+ /declares? an ephemeral cwd|declared ephemeral cwd|ephemeral-cwd path|third (?:auto-allow )?path|cwd-scoped path/i,
630
+ 'expected the temp-var resolution to be tied to the cwd-scoped path that declares an ephemeral working directory, not the standalone or compound paths',
631
+ );
632
+ });
633
+
634
+ test('SKILL.md does not attribute the known-temp-var resolution to the standalone or compound paths', () => {
635
+ assert.doesNotMatch(
636
+ skillSource.replace(/\s+/g, ' '),
637
+ /Across those paths[\s\S]*?CLAUDE_JOB_DIR/i,
638
+ 'the temp-var resolution lives only in the broad cwd-scoped path; the standalone and compound paths fail closed on any $',
639
+ );
640
+ });
641
+
642
+ test('SKILL.md attributes the known-temp-var resolution to the cwd-scoped auto-allow path', () => {
643
+ const tempVarSentenceMatch =
644
+ /[^.]*\bTMPDIR\b[^.]*CLAUDE_JOB_DIR[^.]*\./i.exec(skillSource.replace(/\s+/g, ' '));
645
+ assert.notEqual(
646
+ tempVarSentenceMatch,
647
+ null,
648
+ 'expected a sentence describing the TEMP/TMP/TMPDIR/CLAUDE_JOB_DIR resolution',
649
+ );
650
+ assert.match(
651
+ tempVarSentenceMatch[0],
652
+ /declares? an ephemeral cwd|declared ephemeral cwd|ephemeral-cwd path|third (?:auto-allow )?path|cwd-scoped path/i,
653
+ 'expected the temp-var resolution to be tied to the cwd-scoped path that declares an ephemeral working directory, not the standalone or compound paths',
654
+ );
655
+ });
@@ -33,19 +33,44 @@ const HEADLESS_SAFETY_PREAMBLE =
33
33
  'HEADLESS RUN — you run unattended: no human can answer a permission or confirmation prompt, and any such prompt stalls the entire convergence run. The destructive_command_blocker hook matches dangerous patterns (rm -rf, git reset --hard, dd, mkfs, chmod -R, fork bombs) as raw text anywhere in a Bash command, with no quote-awareness — so a destructive string stalls you even when it is only data you never execute. Therefore:\n' +
34
34
  '- Never place a destructive-command literal inside a Bash command — not in echo, not in a heredoc, and not as an argument to python -c, node -e, or awk. To exercise or verify destructive_command_blocker (or any hook) behavior, run the committed test suite, e.g. python -m pytest <test_file>, which passes the command strings as in-language data rather than as a shell command.\n' +
35
35
  '- When a commit message, or a PR / issue / review-comment body, must describe destructive-command behavior, write that text to a file and pass it by path (git commit -F <file>, gh ... --body-file <file>); never inline it with git commit -m or gh ... -b, where the literal lands in the Bash command and stalls you.\n' +
36
- '- Keep scratch files and cleanup inside the OS temp dir or $CLAUDE_JOB_DIR/tmp (auto-allowed as ephemeral); never target a repository or worktree path with rm -rf.\n' +
36
+ '- Keep scratch files and cleanup inside the OS temp dir; never target a repository or worktree path.\n' +
37
+ '- rm shape rules — the hook grants several rm auto-allow paths. The simplest one accepts a standalone Bash call whose target resolves inside the ephemeral namespace (/tmp, /temp, the OS temp root, or the run worktree); a compound path accepts an rm joined with benign reporting segments when every rm target is an absolute ephemeral path. Both of those paths fail closed on $(...) command substitution, on backtick subshells, and on any $ in the target — including $CLAUDE_JOB_DIR — so neither resolves an environment variable. A third, broad path matches only when the command itself declares an ephemeral working directory (it cds into one, or runs under one): that cwd-scoped path resolves the target against the declared cwd, fails closed on $(...) , backticks, and unknown variables, and resolves the known temporary variables TEMP, TMP, TMPDIR, and CLAUDE_JOB_DIR to the OS temp root, so under that declared ephemeral cwd a bare $CLAUDE_JOB_DIR/tmp/<name> target and a relative target after a cd are auto-allowed. Even so, prefer a Python helper for any cleanup whose path is variable-built or whose setup/teardown spans multiple steps: author the helper file and run it as python <file>.py, which keeps every destructive literal out of a Bash command string entirely and never depends on which auto-allow path matches.\n' +
37
38
  '- If a step appears to require a real destructive command, use a non-destructive equivalent or report it as a blocker instead of running it.\n\n'
38
39
 
40
+ let activeRepoPath = null
41
+
42
+ /**
43
+ * Build the per-agent worktree directive for a path-scoped run.
44
+ *
45
+ * A multi-PR parent run drives several converge children from one shared
46
+ * working directory, so each child pins its own agents to the worktree its PR
47
+ * is checked out in; without that pin every child's git, gh, diff, edit,
48
+ * commit, and test commands would run in the shared launch directory rather
49
+ * than the PR's own checkout. The parent hands the worktree path in as
50
+ * input.repoPath, which sets activeRepoPath. A single-PR run carries no
51
+ * repoPath, so this returns an empty string and every agent keeps its own
52
+ * working directory — behavior identical to a run with no path scoping.
53
+ * @param {string|null} repoPath the PR worktree absolute path, or null for the single-PR default
54
+ * @returns {string} the worktree directive to prepend, or an empty string when repoPath is null
55
+ */
56
+ const worktreeDirective = (repoPath) =>
57
+ repoPath
58
+ ? `WORKTREE — this PR is checked out at ${repoPath}. Unless a step explicitly names a different repository directory (for example an environment-hardening repo checkout, which you cd into exactly as that step directs), run every git, gh, diff, edit, commit, push, and test command for this PR in that worktree: cd "${repoPath}" before any such command, and resolve repository roots from there.\n\n`
59
+ : ''
60
+
39
61
  /**
40
62
  * Spawn a workflow agent with the headless-safety preamble prepended to its
41
63
  * prompt. Every agent in this convergence loop runs unattended, so each one is
42
- * routed through here to inherit the same no-confirmation-prompt guidance.
64
+ * routed through here to inherit the same no-confirmation-prompt guidance. On a
65
+ * path-scoped run the worktree directive is prepended too, so every agent runs
66
+ * in the PR's own worktree (activeRepoPath); on a single-PR run that directive
67
+ * is empty and the agent keeps its own working directory.
43
68
  * @param {string} prompt the agent's role-specific instruction body
44
69
  * @param {object} options the agent() options (label, phase, schema, agentType, model)
45
70
  * @returns {Promise<*>} the agent() result
46
71
  */
47
72
  const convergeAgent = (prompt, options) =>
48
- agent(`${HEADLESS_SAFETY_PREAMBLE}${prompt}`, options)
73
+ agent(`${HEADLESS_SAFETY_PREAMBLE}${worktreeDirective(activeRepoPath)}${prompt}`, options)
49
74
 
50
75
  const PRE_COMMIT_GATE_STEP =
51
76
  `\n\nFINAL STEP — pre-commit gate check (do NOT commit): before your turn ends, prove your working-tree changes CAN be committed by dry-running the CODE_RULES commit gate that gates git commit (precommit_code_rules_gate). From inside the checkout that holds your changes, resolve its root with git rev-parse --show-toplevel, stage your changes with git add -A, then run exactly:\n` +
@@ -696,6 +721,7 @@ if (runInput.blocker) {
696
721
  return { converged: false, rounds: 0, finalSha: null, blocker: runInput.blocker }
697
722
  }
698
723
  const input = runInput.input
724
+ activeRepoPath = typeof input.repoPath === 'string' && input.repoPath ? input.repoPath : null
699
725
  const prCoordinates = `owner=${input.owner} repo=${input.repo} PR #${input.prNumber} (https://github.com/${input.owner}/${input.repo}/pull/${input.prNumber})`
700
726
 
701
727
  /**
@@ -0,0 +1,47 @@
1
+ import { test } from 'node:test';
2
+ import { strict as assert } from 'node:assert';
3
+ import { readFileSync } from 'node:fs';
4
+ import { fileURLToPath } from 'node:url';
5
+ import { dirname, join } from 'node:path';
6
+
7
+ const workflowDirectory = dirname(fileURLToPath(import.meta.url));
8
+ const convergeSource = readFileSync(join(workflowDirectory, 'converge.mjs'), 'utf8');
9
+
10
+ function sliceBetween(startNeedle, endNeedle) {
11
+ const sliceStart = convergeSource.indexOf(startNeedle);
12
+ assert.notEqual(sliceStart, -1, `expected ${startNeedle} to exist`);
13
+ const sliceEnd = convergeSource.indexOf(endNeedle, sliceStart + startNeedle.length);
14
+ assert.notEqual(sliceEnd, -1, `expected ${endNeedle} to exist after ${startNeedle}`);
15
+ return convergeSource.slice(sliceStart, sliceEnd);
16
+ }
17
+
18
+ const productionModule = new Function(
19
+ `${sliceBetween('const worktreeDirective =', '\nconst convergeAgent =')}\n` +
20
+ 'return { worktreeDirective };',
21
+ )();
22
+ const { worktreeDirective } = productionModule;
23
+
24
+ test('a single-PR run (no repoPath) produces an empty worktree directive', () => {
25
+ assert.equal(worktreeDirective(null), '');
26
+ });
27
+
28
+ test('a path-scoped run pins every agent to the PR worktree by absolute path', () => {
29
+ const directive = worktreeDirective('/worktrees/pr-398');
30
+ assert.match(directive, /\/worktrees\/pr-398/);
31
+ assert.match(directive, /cd /);
32
+ assert.match(directive, /git, gh, diff, edit, commit, push, and test/);
33
+ });
34
+
35
+ test('a path-scoped run defers to a step that names a different repository directory', () => {
36
+ assert.match(worktreeDirective('/worktrees/pr-398'), /different repository directory/i);
37
+ });
38
+
39
+ test('convergeAgent prepends the worktree directive for the active repo path', () => {
40
+ const agentDefinition = sliceBetween('const convergeAgent =', '\nconst PRE_COMMIT_GATE_STEP');
41
+ assert.match(agentDefinition, /worktreeDirective\(activeRepoPath\)/);
42
+ assert.match(agentDefinition, /HEADLESS_SAFETY_PREAMBLE/);
43
+ });
44
+
45
+ test('the run binds activeRepoPath from input.repoPath after the input is parsed', () => {
46
+ assert.match(convergeSource, /activeRepoPath = typeof input\.repoPath === 'string'/);
47
+ });
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Autoconverge multi-PR fan-out workflow driver.
3
+ *
4
+ * SINGLE-FILE CONTRACT — keep this file self-contained. The Workflow runtime
5
+ * wraps this body in a function (so top-level await and return work) and rejects
6
+ * static import statements, and `export const meta` must be the first statement.
7
+ * This driver fans out one converge.mjs child run per PR with parallel(); the
8
+ * converge.mjs child uses only agent()/parallel() (never workflow()), so the
9
+ * one-level workflow() nesting limit holds.
10
+ */
11
+
12
+ export const meta = {
13
+ name: 'autoconverge-multi',
14
+ description: 'Drive several draft PRs to convergence in one run: fan out one autoconverge converge.mjs child per PR in parallel, each pinned to its own checked-out worktree via repoPath, then report every PR\'s outcome together.',
15
+ whenToUse: 'Launched by the /autoconverge skill when the user names more than one PR to converge at once; the single-PR path launches workflow/converge.mjs directly.',
16
+ phases: [
17
+ { title: 'Converge all', detail: 'One converge.mjs child run per PR, all in parallel; each child is pinned to its own PR worktree through repoPath' },
18
+ ],
19
+ }
20
+
21
+ /**
22
+ * Normalize the workflow args global into a parsed object.
23
+ *
24
+ * The Workflow runtime may deliver args as a JSON-encoded string or as an
25
+ * object; a string is parsed and an object passes through unchanged. A non-JSON
26
+ * or empty string yields null so a malformed payload becomes a structured
27
+ * blocker rather than aborting the run.
28
+ * @param {string|object} rawArgs the workflow args global (JSON string or object)
29
+ * @returns {object|null} the parsed args, or null when a string payload fails to parse
30
+ */
31
+ function normalizeMultiInput(rawArgs) {
32
+ if (typeof rawArgs !== 'string') return rawArgs
33
+ try {
34
+ return JSON.parse(rawArgs)
35
+ } catch {
36
+ return null
37
+ }
38
+ }
39
+
40
+ /**
41
+ * Decide whether one PR entry carries every coordinate a child run needs.
42
+ *
43
+ * A child converge run needs the PR's owner, repo, and number to address its
44
+ * GitHub calls, and the absolute worktree path the PR is checked out in to pin
45
+ * its agents there.
46
+ * @param {object} prEntry one element of the args.prs array
47
+ * @returns {boolean} true when owner, repo, prNumber, and a non-empty string repoPath are all present
48
+ */
49
+ function isUsablePrEntry(prEntry) {
50
+ return (
51
+ prEntry != null &&
52
+ Boolean(prEntry.owner) &&
53
+ Boolean(prEntry.repo) &&
54
+ Boolean(prEntry.prNumber) &&
55
+ typeof prEntry.repoPath === 'string' &&
56
+ Boolean(prEntry.repoPath)
57
+ )
58
+ }
59
+
60
+ /**
61
+ * Validate the normalized multi-PR input into usable coordinates or a blocker.
62
+ *
63
+ * A fan-out run needs the absolute converge.mjs script path and a non-empty list
64
+ * of PR entries that each carry owner, repo, prNumber, and the absolute worktree
65
+ * path the PR is checked out in. A payload that fails to parse, a non-string
66
+ * convergeScriptPath, a missing or empty prs list, or any entry missing a
67
+ * coordinate yields a blocker the top-level run reports as
68
+ * {converged:false, blocker} rather than throwing on a missing field.
69
+ * @param {string|object} rawArgs the workflow args global (JSON string or object)
70
+ * @returns {{input: object|null, blocker: string|null}} usable coordinates or a blocker
71
+ */
72
+ function classifyMultiInput(rawArgs) {
73
+ const candidate = normalizeMultiInput(rawArgs)
74
+ if (candidate == null) {
75
+ return {
76
+ input: null,
77
+ blocker: 'invalid run coordinates: the workflow args did not parse into an object',
78
+ }
79
+ }
80
+ if (typeof candidate.convergeScriptPath !== 'string' || !candidate.convergeScriptPath) {
81
+ return {
82
+ input: null,
83
+ blocker:
84
+ 'invalid run coordinates: convergeScriptPath (absolute path to converge.mjs) is required',
85
+ }
86
+ }
87
+ if (!Array.isArray(candidate.prs) || candidate.prs.length === 0) {
88
+ return {
89
+ input: null,
90
+ blocker: 'invalid run coordinates: prs must be a non-empty array of PR entries',
91
+ }
92
+ }
93
+ const unusableEntryCount = candidate.prs.filter(
94
+ (eachEntry) => !isUsablePrEntry(eachEntry),
95
+ ).length
96
+ if (unusableEntryCount > 0) {
97
+ return {
98
+ input: null,
99
+ blocker: `invalid run coordinates: ${unusableEntryCount} PR entry/entries missing owner, repo, prNumber, or repoPath`,
100
+ }
101
+ }
102
+ return { input: candidate, blocker: null }
103
+ }
104
+
105
+ const multiInput = classifyMultiInput(args)
106
+ if (multiInput.blocker) {
107
+ return { converged: false, prCount: 0, convergedCount: 0, results: [], blocker: multiInput.blocker }
108
+ }
109
+ const input = multiInput.input
110
+
111
+ phase('Converge all')
112
+ log(`autoconverge multi-PR: driving ${input.prs.length} PR(s) to ready in parallel`)
113
+
114
+ const childResults = await parallel(
115
+ input.prs.map((eachPr) => async () => {
116
+ const childOutcome = await workflow(
117
+ { scriptPath: input.convergeScriptPath },
118
+ {
119
+ owner: eachPr.owner,
120
+ repo: eachPr.repo,
121
+ prNumber: eachPr.prNumber,
122
+ repoPath: eachPr.repoPath,
123
+ bugbotDisabled: Boolean(eachPr.bugbotDisabled),
124
+ },
125
+ )
126
+ return {
127
+ owner: eachPr.owner,
128
+ repo: eachPr.repo,
129
+ prNumber: eachPr.prNumber,
130
+ converged: Boolean(childOutcome && childOutcome.converged),
131
+ rounds: childOutcome && childOutcome.rounds !== undefined ? childOutcome.rounds : null,
132
+ finalSha: childOutcome && childOutcome.finalSha !== undefined ? childOutcome.finalSha : null,
133
+ blocker: childOutcome && childOutcome.blocker !== undefined ? childOutcome.blocker : null,
134
+ }
135
+ }),
136
+ )
137
+
138
+ const results = childResults.map((eachResult, eachIndex) =>
139
+ eachResult === null
140
+ ? {
141
+ owner: input.prs[eachIndex].owner,
142
+ repo: input.prs[eachIndex].repo,
143
+ prNumber: input.prs[eachIndex].prNumber,
144
+ converged: false,
145
+ rounds: null,
146
+ finalSha: null,
147
+ blocker: 'child run threw or was skipped before returning an outcome',
148
+ }
149
+ : eachResult,
150
+ )
151
+
152
+ const convergedCount = results.filter((eachResult) => eachResult.converged).length
153
+ log(`autoconverge multi-PR done: ${convergedCount}/${results.length} PR(s) converged`)
154
+
155
+ return {
156
+ converged: convergedCount === results.length,
157
+ prCount: results.length,
158
+ convergedCount,
159
+ results,
160
+ blocker: null,
161
+ }