claude-dev-env 1.58.0 → 1.59.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +2 -2
- package/_shared/pr-loop/scripts/code_rules_gate.py +36 -3
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/code_rules_gate_constants.py +6 -0
- package/_shared/pr-loop/scripts/pr_loop_shared_constants/reviews_disabled_constants.py +1 -0
- package/_shared/pr-loop/scripts/reviews_disabled.py +12 -0
- package/_shared/pr-loop/scripts/tests/test_code_rules_gate.py +265 -0
- package/_shared/pr-loop/scripts/tests/test_reviews_disabled.py +29 -0
- package/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md +1 -1
- package/bin/install.mjs +100 -27
- package/bin/install.test.mjs +133 -1
- package/docs/CODE_RULES.md +3 -3
- package/hooks/blocking/code_rules_annotations_length.py +153 -0
- package/hooks/blocking/code_rules_dead_dataclass_field.py +319 -0
- package/hooks/blocking/code_rules_duplicate_body.py +287 -0
- package/hooks/blocking/code_rules_enforcer.py +175 -21
- package/hooks/blocking/code_rules_magic_values.py +98 -0
- package/hooks/blocking/code_rules_shared.py +41 -0
- package/hooks/blocking/destructive_command_blocker.py +1027 -12
- package/hooks/blocking/hook_prose_detector_consistency.py +150 -0
- package/hooks/blocking/subprocess_budget_completeness.py +380 -0
- package/hooks/blocking/test_code_rules_enforcer_annotations.py +225 -0
- package/hooks/blocking/test_code_rules_enforcer_cap_meta.py +1 -0
- package/hooks/blocking/test_code_rules_enforcer_dead_dataclass_field.py +467 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body.py +330 -0
- package/hooks/blocking/test_code_rules_enforcer_duplicate_body_hook_routing.py +179 -0
- package/hooks/blocking/test_code_rules_enforcer_magic_slice_bounds.py +133 -0
- package/hooks/blocking/test_destructive_command_blocker.py +622 -3
- package/hooks/blocking/test_hook_prose_detector_consistency.py +265 -0
- package/hooks/blocking/test_subprocess_budget_completeness.py +588 -0
- package/hooks/blocking/test_workflow_substitution_slot_blocker.py +242 -0
- package/hooks/blocking/workflow_substitution_slot_blocker.py +159 -0
- package/hooks/hooks.json +15 -0
- package/hooks/hooks_constants/code_rules_enforcer_constants.py +16 -0
- package/hooks/hooks_constants/dead_dataclass_field_constants.py +25 -0
- package/hooks/hooks_constants/destructive_command_segment_constants.py +178 -0
- package/hooks/hooks_constants/duplicate_function_body_constants.py +17 -0
- package/hooks/hooks_constants/hook_prose_detector_consistency_constants.py +30 -0
- package/hooks/hooks_constants/subprocess_budget_completeness_constants.py +5 -0
- package/hooks/hooks_constants/workflow_substitution_slot_blocker_constants.py +22 -0
- package/package.json +1 -1
- package/rules/docstring-prose-matches-implementation.md +43 -0
- package/rules/hook-prose-matches-detector.md +26 -0
- package/rules/no-inline-destructive-literals.md +11 -0
- package/rules/workflow-substitution-slots.md +7 -0
- package/skills/autoconverge/SKILL.md +13 -2
- package/skills/autoconverge/reference/convergence.md +7 -3
- package/skills/autoconverge/reference/stop-conditions.md +7 -2
- package/skills/autoconverge/workflow/converge.copilot-gate.test.mjs +265 -0
- package/skills/autoconverge/workflow/converge.mjs +106 -36
- package/skills/pr-converge/scripts/check_convergence.py +195 -64
- package/skills/pr-converge/scripts/test_check_convergence.py +173 -2
- package/skills/update/SKILL.md +37 -5
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Configuration constants for the hook_prose_detector_consistency PreToolUse hook."""
|
|
2
|
+
|
|
3
|
+
WRITE_TOOL_NAME: str = "Write"
|
|
4
|
+
EDIT_TOOL_NAME: str = "Edit"
|
|
5
|
+
|
|
6
|
+
HOOK_MODULE_PATH_SEGMENT: str = "/hooks/"
|
|
7
|
+
PYTHON_FILE_SUFFIX: str = ".py"
|
|
8
|
+
CONSTANTS_MODULE_SUFFIX: str = "_constants.py"
|
|
9
|
+
TEST_MODULE_PREFIX: str = "test_"
|
|
10
|
+
|
|
11
|
+
PATH_SEPARATOR_CLASS_PATTERN: str = (
|
|
12
|
+
r"\[[^\]/]*\\\\[^\]/]*\]|\[[^\]]*\\\\?/[^\]]*\]|\[[^\]]*/\\\\?[^\]]*\]"
|
|
13
|
+
)
|
|
14
|
+
OVERSTATED_OUTPUT_KEY_PHRASE_PATTERN: str = r"output[- ]key\s+segment"
|
|
15
|
+
|
|
16
|
+
CORRECTIVE_MESSAGE: str = (
|
|
17
|
+
"BLOCKED [hook-prose-detector-consistency]: A hook module's user-facing prose "
|
|
18
|
+
"(its docstring lead narrative or CORRECTIVE_MESSAGE) claims the hook blocks an "
|
|
19
|
+
"'output-key segment', but the module's detector keys off a path separator only "
|
|
20
|
+
"(it matches a token next to `\\` or `/`). A quoted structured-output key alone "
|
|
21
|
+
"never triggers a block, so the prose overstates the contract: an author whose "
|
|
22
|
+
"only per-iteration token is an output key would never see this message, and an "
|
|
23
|
+
"author who does see it is told an output key caused a block it cannot cause.\n\n"
|
|
24
|
+
"Describe only the trigger the detector implements: a per-iteration path segment "
|
|
25
|
+
"next to a path separator. Drop 'or output-key segment' (or restate it as 'a "
|
|
26
|
+
"per-iteration path segment') so the message and docstring match what the regex "
|
|
27
|
+
"catches.\n\n"
|
|
28
|
+
"Invariant: a hook's docstring and corrective message describe exactly the shapes "
|
|
29
|
+
"its detector flags -- no broader trigger surface than the regex enforces."
|
|
30
|
+
)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Configuration constants for the workflow_substitution_slot_blocker PreToolUse hook."""
|
|
2
|
+
|
|
3
|
+
WRITE_TOOL_NAME: str = "Write"
|
|
4
|
+
EDIT_TOOL_NAME: str = "Edit"
|
|
5
|
+
MULTI_EDIT_TOOL_NAME: str = "MultiEdit"
|
|
6
|
+
|
|
7
|
+
WORKFLOW_FILE_SUFFIX: str = ".workflow.js"
|
|
8
|
+
|
|
9
|
+
CORRECTIVE_MESSAGE: str = (
|
|
10
|
+
"BLOCKED [workflow-substitution-slot]: A bare per-iteration index token "
|
|
11
|
+
"(for example `cand_i`) appears as a per-iteration path segment inside a "
|
|
12
|
+
".workflow.js agent-prompt block that loops over an index. A bare `_i` "
|
|
13
|
+
"token reads as a fixed literal, so an agent can create one literal "
|
|
14
|
+
"directory and overwrite it across every iteration -- collapsing an "
|
|
15
|
+
"N-iteration gate into one.\n\n"
|
|
16
|
+
"Mark the index as a substitution slot with the angle-bracket convention "
|
|
17
|
+
"this template already uses for per-call values (`<plate.svg>`, `<glow_hex>`): "
|
|
18
|
+
"write `cand_<i>` instead of `cand_i`, or spell out 'replace <i> with the "
|
|
19
|
+
"iteration index 0, 1, 2' in the step text.\n\n"
|
|
20
|
+
"Convention: every per-call substitution slot in a .workflow.js template is "
|
|
21
|
+
"marked with angle brackets, so an agent fills in a fresh value per call."
|
|
22
|
+
)
|
package/package.json
CHANGED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Docstring Prose Matches Implementation
|
|
2
|
+
|
|
3
|
+
**When this applies:** Any Write or Edit to a public function, method, class, or module whose docstring prose makes an enumerable claim about behavior — a list of inputs the code handles, the conditions it treats as a match, the cases it skips, or the order of its steps.
|
|
4
|
+
|
|
5
|
+
## Rule
|
|
6
|
+
|
|
7
|
+
When a docstring enumerates the behaviors a body applies, the enumeration covers every behavior the body applies. A reader trusts the list to be complete: an item the code applies but the prose omits is a silent gap that misleads every future reader and reviewer.
|
|
8
|
+
|
|
9
|
+
The gate validator `check_docstring_args_match_signature` covers the `Args:` section parameter names. Free-form prose — `"a field counts as read when ..."`, `"resolves to shared temp only"`, `"strip ceremony, then drop blockquotes"` — has no signature to compare against, so the gate cannot catch its drift. This rule is the judgment standard for that prose. It carries documented-but-pending hook coverage; the audit lane below is the enforcement until a deterministic gate check exists.
|
|
10
|
+
|
|
11
|
+
## What to check before you write the docstring
|
|
12
|
+
|
|
13
|
+
Read the body and the docstring side by side:
|
|
14
|
+
|
|
15
|
+
- **Read-source / match-source unions.** A body that computes `read_names = a | b | c` (or any union of "what counts") names each union member in the prose enumeration. A union member the code applies but the prose omits is a gap.
|
|
16
|
+
- **Suppressor / skip lists.** A body with several early returns that suppress the check names each suppressor in the prose.
|
|
17
|
+
- **Step order.** A docstring that says `A then B then C` matches the call order in the body.
|
|
18
|
+
- **Predicate breadth.** A boolean helper whose prose promises a narrow check accepts only the inputs the prose names — no broader input class the name and prose do not mention.
|
|
19
|
+
|
|
20
|
+
When the body changes the set of behaviors it applies, the same edit updates the prose enumeration. The two move together in one commit.
|
|
21
|
+
|
|
22
|
+
## Worked example
|
|
23
|
+
|
|
24
|
+
A `@dataclass` dead-field check builds its set of "field counts as read" sources by union:
|
|
25
|
+
|
|
26
|
+
```python
|
|
27
|
+
read_names = (
|
|
28
|
+
attribute_read_names
|
|
29
|
+
| dynamic_literal_names
|
|
30
|
+
| _match_pattern_attribute_names(tree)
|
|
31
|
+
| _exported_names(tree)
|
|
32
|
+
)
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
A docstring that enumerates "attribute read, augmented-assignment target, class-pattern keyword, literal `getattr`/`attrgetter`" but omits the `__all__` source (`_exported_names`) is drifted: a field whose name appears in `__all__` is treated as read, and the prose hides that. The fix adds the missing source to the enumeration so the list matches the union.
|
|
36
|
+
|
|
37
|
+
## Enforcement (audit lane)
|
|
38
|
+
|
|
39
|
+
This drift class is sub-bucket **O6** in `packages/claude-dev-env/audit-rubrics/category_rubrics/category-o-docstring-vs-impl-drift.md` (free-form `Note:` / `Returns:` / responsibility-list claims). The audit teammate lists every prose enumeration in a changed docstring and verifies each item against the body, and lists every union member / suppressor / step in the body and verifies each appears in the prose. A union member or suppressor in the body that the prose omits is an O6 finding.
|
|
40
|
+
|
|
41
|
+
## Why
|
|
42
|
+
|
|
43
|
+
A docstring enumeration earns its place by being trustworthy. A complete list lets a reader reason about the function without scanning the body; a list missing one item is worse than no list, because it asserts completeness it does not have. Naming this standard makes the gap a first-class finding at write time and at audit, rather than a surprise a reader hits months later.
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
---
|
|
2
|
+
paths: **/hooks/**/*.py
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Hook Prose Matches Its Detector
|
|
6
|
+
|
|
7
|
+
**When this applies:** Any Write or Edit to a hook module (`.py` under `hooks/`) or its `*_constants.py` companion.
|
|
8
|
+
|
|
9
|
+
**Hook enforcement:** `hook-prose-detector-consistency` (PreToolUse on Write|Edit) blocks a hook whose user-facing prose claims a trigger its detector never fires on. See `hooks.json` for registration.
|
|
10
|
+
|
|
11
|
+
## Rule
|
|
12
|
+
|
|
13
|
+
A hook's docstring lead narrative and its `CORRECTIVE_MESSAGE` describe exactly the shapes the detector flags — no broader trigger surface than the regex enforces. An author reads the corrective message to learn what they did wrong; an author reads the docstring to learn what the hook guards. When either claims a trigger the detector cannot fire on, both audiences are misled: an author whose only token is that shape never sees the block, and an author who does see the block is told the wrong cause.
|
|
14
|
+
|
|
15
|
+
## The path-shape blocker case
|
|
16
|
+
|
|
17
|
+
A path-shape blocker detects a per-iteration token only when the token sits next to a path separator (its detection regex keys off a `[\\/]`-style character class). Such a hook must not claim it blocks an "output-key segment": a quoted structured-output key alone, with no looped path, is never flagged. The `*_constants.py` companion holds the corrective message and not the detector, so the phrase "output-key segment" describing a blocked trigger is itself the violation there, regardless of which file holds the regex.
|
|
18
|
+
|
|
19
|
+
| Prohibited claim | Why it overstates | Correct phrasing |
|
|
20
|
+
|---|---|---|
|
|
21
|
+
| "appears as a path or output-key segment" | the detector keys off a path separator only | "appears as a per-iteration path segment" |
|
|
22
|
+
| docstring: "blocks a bare token like `cand_i`" | a bare prose token next to no separator is not flagged | "blocks a per-iteration path like `${work}\cand_i\plate.svg`" |
|
|
23
|
+
|
|
24
|
+
## The test
|
|
25
|
+
|
|
26
|
+
After writing a hook, ask: **would a token that matches every word of this message actually trip the detector?** When the message names a shape the regex skips, rewrite the message to name only what the regex catches.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# No Inline Destructive-Command Literals in Bash
|
|
2
|
+
|
|
3
|
+
The `destructive_command_blocker` PreToolUse hook matches destructive patterns (`rm -rf`, `git reset --hard`, `dd`, `mkfs`, `chmod -R`, fork bombs) as raw text anywhere in a Bash-tool command, with no quote-awareness — so a destructive literal carried only as DATA (a commit message, a PR/issue/review-comment body, an echoed string, a `python -c`/`node -e`/`awk` argument, a heredoc) trips the confirmation prompt even though the shell never executes it. In a background or auto-mode run no human can answer that prompt, so the call stalls.
|
|
4
|
+
|
|
5
|
+
Keep destructive literals out of the Bash command string:
|
|
6
|
+
|
|
7
|
+
- Commit messages and PR/issue/review-comment bodies that describe destructive-command behavior go in a file passed by path — `git commit -F <file>`, `gh ... --body-file <file>` (see [`gh-body-file`](gh-body-file.md)) — never `git commit -m` / `gh ... -b`.
|
|
8
|
+
- To exercise or verify `destructive_command_blocker` (or any hook) behavior, run the committed test suite (`python -m pytest <test_file>`), which passes the command strings as in-language data, not as a shell command — never an inline `python -c` harness.
|
|
9
|
+
- Genuine cleanup targets the OS temp dir or `$CLAUDE_JOB_DIR/tmp` (auto-allowed as ephemeral), never a repository or worktree path.
|
|
10
|
+
|
|
11
|
+
The `destructive_command_blocker` hook is the enforcement surface; this rule is how to keep a non-executing mention from tripping it.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Workflow Substitution Slot Rule
|
|
2
|
+
|
|
3
|
+
In a `.workflow.js` agent-prompt template, every per-call or per-iteration value an agent must fill in is marked with the angle-bracket convention — `<plate.svg>`, `<object.svg>`, `<glow_hex>`, `cand_<i>`. A bare token such as `cand_i` reads as a fixed literal, so an agent can create one literal directory named `cand_i` and overwrite it across every iteration of a loop, collapsing an N-iteration gate into a single run.
|
|
4
|
+
|
|
5
|
+
When a loop builds a per-iteration path or output key, write the index as a slot — `cand_<i>` — or spell out `replace <i> with the iteration index 0, 1, 2` in the step text. Every per-call value in a `.workflow.js` template carries angle brackets so an agent fills in a fresh value per call.
|
|
6
|
+
|
|
7
|
+
`workflow_substitution_slot_blocker.py` (PreToolUse on Write/Edit) blocks a `.workflow.js` write whose looped content carries a bare `<word>_<i|j|k>` token as a per-iteration path segment, and returns the corrective message.
|
|
@@ -66,7 +66,7 @@ own. The workflow runs in the background and notifies this session on
|
|
|
66
66
|
completion. Watch live progress with `/workflows`.
|
|
67
67
|
|
|
68
68
|
The workflow returns
|
|
69
|
-
`{ converged, rounds, finalSha, blocker, standardsNote }`.
|
|
69
|
+
`{ converged, rounds, finalSha, blocker, standardsNote, copilotNote }`.
|
|
70
70
|
|
|
71
71
|
## Budget-aware round boundaries
|
|
72
72
|
|
|
@@ -146,6 +146,7 @@ round records nothing resumable and replays dirty.
|
|
|
146
146
|
Final commit: <finalSha>
|
|
147
147
|
Blocker: <blocker> # only when blocked
|
|
148
148
|
Standards: <standardsNote> # only when a round deferred code-standard findings
|
|
149
|
+
Copilot: <copilotNote> # only when Copilot was down or out of quota
|
|
149
150
|
```
|
|
150
151
|
|
|
151
152
|
## What the workflow does each round
|
|
@@ -156,6 +157,13 @@ shape and the exact convergence definition, and
|
|
|
156
157
|
run ends short of ready. Hard-won failure lessons live in
|
|
157
158
|
[`reference/gotchas.md`](reference/gotchas.md).
|
|
158
159
|
|
|
160
|
+
Every agent prompt carries a headless-safety preamble: the run is unattended, so
|
|
161
|
+
agents never inline a destructive-command literal (`rm -rf`, `git reset --hard`,
|
|
162
|
+
`dd`) into a Bash command — the `destructive_command_blocker` hook matches those
|
|
163
|
+
patterns as raw text, and a confirmation prompt no human can answer would stall
|
|
164
|
+
the run. Agents verify destructive-blocker behavior through the committed test
|
|
165
|
+
suite (`python -m pytest`) and keep scratch work in ephemeral temp dirs.
|
|
166
|
+
|
|
159
167
|
- **Converge:** `parallel([Bugbot lens, code-review lens, bug-audit lens])` on
|
|
160
168
|
the current HEAD, full `origin/main...HEAD` diff. Dedup findings; one
|
|
161
169
|
`clean-coder` applies all fixes in a single commit, pushes, replies to and
|
|
@@ -168,7 +176,10 @@ run ends short of ready. Hard-won failure lessons live in
|
|
|
168
176
|
any bot threads with a deferral note, and reports the deferral in
|
|
169
177
|
`standardsNote`.
|
|
170
178
|
- **Copilot gate:** request a Copilot review, poll up to three times; findings
|
|
171
|
-
route back into Converge
|
|
179
|
+
route back into Converge. When Copilot is down or out of quota — it posts an
|
|
180
|
+
out-of-usage notice (the requester hit their quota) on the HEAD, or surfaces no
|
|
181
|
+
review at all after the cap — the gate logs a notice and the run marks the PR
|
|
182
|
+
ready with the Copilot gate bypassed. `copilotNote` records the bypass.
|
|
172
183
|
- **Convergence check:** `check_convergence.py` is the authoritative gate; on a
|
|
173
184
|
full pass the workflow marks `draft=false`.
|
|
174
185
|
|
|
@@ -42,7 +42,9 @@ tracks CONVERGE passes only and is never the cap.
|
|
|
42
42
|
to three times, 360 seconds apart.
|
|
43
43
|
- Copilot findings → fix them and return to CONVERGE on the new HEAD.
|
|
44
44
|
- Copilot clean or approved → move to the convergence check.
|
|
45
|
-
-
|
|
45
|
+
- Copilot down or out of quota (an out-of-usage notice, or no review after three
|
|
46
|
+
polls) → log a notice and move to the convergence check with the Copilot gate
|
|
47
|
+
bypassed.
|
|
46
48
|
|
|
47
49
|
**Convergence check**:
|
|
48
50
|
|
|
@@ -67,13 +69,15 @@ the current HEAD:
|
|
|
67
69
|
2. The Bugbot review body on HEAD reports no findings (checked when a Bugbot
|
|
68
70
|
review is present).
|
|
69
71
|
3. A CLEAN bugteam audit review sits on HEAD.
|
|
70
|
-
4. The Copilot review on HEAD is clean or approved
|
|
72
|
+
4. The Copilot review on HEAD is clean or approved (bypassed when Copilot is down
|
|
73
|
+
or out of quota this run).
|
|
71
74
|
5. Zero unresolved bot review threads anywhere on the PR — counting Cursor,
|
|
72
75
|
Claude, and Copilot authored threads where `isResolved` is false (`isOutdated`
|
|
73
76
|
threads are excluded by the gate, but the fix lens still verifies and resolves
|
|
74
77
|
them during the round).
|
|
75
78
|
6. The PR is mergeable (`mergeable` true and `mergeable_state` clean).
|
|
76
|
-
7. No requested reviewers are still pending
|
|
79
|
+
7. No requested reviewers are still pending (bypassed when Copilot is down or out
|
|
80
|
+
of quota this run).
|
|
77
81
|
|
|
78
82
|
## Audit-trail design
|
|
79
83
|
|
|
@@ -6,8 +6,6 @@ skill still runs teardown (revoke permissions, final report).
|
|
|
6
6
|
|
|
7
7
|
## Blockers (end the run short of ready)
|
|
8
8
|
|
|
9
|
-
- **Copilot no-show** — Copilot surfaces no review on the current HEAD after
|
|
10
|
-
three polls (360 seconds apart). `blocker` names the Copilot timeout.
|
|
11
9
|
- **Iteration cap** — 20 loop iterations pass without a full convergence-check
|
|
12
10
|
pass. The iteration counter increments on every pass through any phase, so a
|
|
13
11
|
convergence-check gate that no round can clear (for example a `mergeable_state`
|
|
@@ -40,6 +38,13 @@ skill still runs teardown (revoke permissions, final report).
|
|
|
40
38
|
run or review after the lens poll budget, the Bugbot lens returns `down: true`.
|
|
41
39
|
The run continues, and the convergence check runs with `--bugbot-down` so its
|
|
42
40
|
Bugbot gate is bypassed.
|
|
41
|
+
- **Copilot down or out of quota** — when Copilot posts an out-of-usage notice on
|
|
42
|
+
the current HEAD (the user who requested the review reached their quota limit)
|
|
43
|
+
rather than a code review, or surfaces no review at all after three polls, the
|
|
44
|
+
Copilot gate returns `down: true`. The run logs a notice, runs the convergence
|
|
45
|
+
check with `--copilot-down` (the Copilot review gate and the
|
|
46
|
+
pending-requested-reviews gate bypassed), and marks the PR ready. `copilotNote`
|
|
47
|
+
records the bypass for the final report.
|
|
43
48
|
- **A lens agent dies** — when one parallel lens returns null (a terminal agent
|
|
44
49
|
failure), the round proceeds on the surviving lenses. A real defect it would
|
|
45
50
|
have caught surfaces in a later round or at the convergence check. A dead
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import { test } from 'node:test';
|
|
2
|
+
import { strict as assert } from 'node:assert';
|
|
3
|
+
import { readFileSync } from 'node:fs';
|
|
4
|
+
import { fileURLToPath } from 'node:url';
|
|
5
|
+
import { dirname, join } from 'node:path';
|
|
6
|
+
|
|
7
|
+
const workflowDirectory = dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
const convergeSource = readFileSync(join(workflowDirectory, 'converge.mjs'), 'utf8');
|
|
9
|
+
|
|
10
|
+
function functionBody(functionName) {
|
|
11
|
+
const functionStart = convergeSource.indexOf(`function ${functionName}(`);
|
|
12
|
+
assert.notEqual(functionStart, -1, `expected ${functionName} to exist`);
|
|
13
|
+
const nextFunctionStart = convergeSource.indexOf('\nfunction ', functionStart + 1);
|
|
14
|
+
const functionEnd = nextFunctionStart === -1 ? convergeSource.length : nextFunctionStart;
|
|
15
|
+
return convergeSource.slice(functionStart, functionEnd);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const productionModule = new Function(
|
|
19
|
+
`${functionBody('classifyCopilotOutcome')}\n` +
|
|
20
|
+
`${functionBody('resolveCopilotDown')}\n` +
|
|
21
|
+
'return { classifyCopilotOutcome, resolveCopilotDown };',
|
|
22
|
+
)();
|
|
23
|
+
const { classifyCopilotOutcome, resolveCopilotDown } = productionModule;
|
|
24
|
+
|
|
25
|
+
function copilotResult(overrides) {
|
|
26
|
+
return {
|
|
27
|
+
sha: 'abcdef0',
|
|
28
|
+
clean: false,
|
|
29
|
+
down: false,
|
|
30
|
+
findings: [],
|
|
31
|
+
...overrides,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
test('an out-of-usage Copilot result (down) routes to the down kind', () => {
|
|
36
|
+
const outcome = classifyCopilotOutcome(copilotResult({ clean: true, down: true }));
|
|
37
|
+
assert.equal(outcome.kind, 'down');
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test('a down Copilot result routes to down even when clean is false', () => {
|
|
41
|
+
const outcome = classifyCopilotOutcome(copilotResult({ clean: false, down: true }));
|
|
42
|
+
assert.equal(outcome.kind, 'down');
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test('a dead Copilot gate agent retries rather than passing', () => {
|
|
46
|
+
assert.equal(classifyCopilotOutcome(null).kind, 'retry');
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
test('a reachable Copilot gate with no findings and no clean verdict retries', () => {
|
|
50
|
+
const outcome = classifyCopilotOutcome(copilotResult({ clean: false, down: false }));
|
|
51
|
+
assert.equal(outcome.kind, 'retry');
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test('Copilot findings route to a fix when Copilot is reachable and not down', () => {
|
|
55
|
+
const outcome = classifyCopilotOutcome(
|
|
56
|
+
copilotResult({
|
|
57
|
+
findings: [
|
|
58
|
+
{
|
|
59
|
+
file: 'a.py',
|
|
60
|
+
line: 1,
|
|
61
|
+
severity: 'P1',
|
|
62
|
+
category: 'bug',
|
|
63
|
+
title: 't',
|
|
64
|
+
detail: 'd',
|
|
65
|
+
replyToCommentId: null,
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
}),
|
|
69
|
+
);
|
|
70
|
+
assert.equal(outcome.kind, 'fix');
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
test('COPILOT_SCHEMA carries a required down field', () => {
|
|
74
|
+
const schemaStart = convergeSource.indexOf('const COPILOT_SCHEMA =');
|
|
75
|
+
const schemaEnd = convergeSource.indexOf('const HEAD_SCHEMA =');
|
|
76
|
+
assert.notEqual(schemaStart, -1, 'expected COPILOT_SCHEMA to exist');
|
|
77
|
+
const schemaSource = convergeSource.slice(schemaStart, schemaEnd);
|
|
78
|
+
assert.match(schemaSource, /down:\s*\{\s*type:\s*'boolean'/);
|
|
79
|
+
assert.match(schemaSource, /required:\s*\[[^\]]*'down'[^\]]*\]/);
|
|
80
|
+
assert.doesNotMatch(
|
|
81
|
+
schemaSource,
|
|
82
|
+
/blocker:/,
|
|
83
|
+
'the Copilot gate no longer surfaces a blocker; a down result carries the outage',
|
|
84
|
+
);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
test('the Copilot gate prompt detects an out-of-usage notice and returns a down result', () => {
|
|
88
|
+
const copilotPrompt = functionBody('runCopilotGate');
|
|
89
|
+
assert.match(
|
|
90
|
+
copilotPrompt,
|
|
91
|
+
/quota|out of usage|out-of-usage/i,
|
|
92
|
+
'expected the gate to name the out-of-usage / quota signal',
|
|
93
|
+
);
|
|
94
|
+
assert.match(
|
|
95
|
+
copilotPrompt,
|
|
96
|
+
/down:\s*true/,
|
|
97
|
+
'expected the gate to return down:true on an out-of-usage notice',
|
|
98
|
+
);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
test('the step-1 out-of-usage down-detection requires the notice commit_id to start with HEAD', () => {
|
|
102
|
+
const copilotPrompt = functionBody('runCopilotGate');
|
|
103
|
+
const stepOneStart = copilotPrompt.indexOf('`1.');
|
|
104
|
+
assert.notEqual(stepOneStart, -1, 'expected a step-1 instruction in the gate prompt');
|
|
105
|
+
const stepTwoStart = copilotPrompt.indexOf('`2.', stepOneStart);
|
|
106
|
+
assert.notEqual(stepTwoStart, -1, 'expected a step-2 instruction in the gate prompt');
|
|
107
|
+
const stepOneText = copilotPrompt.slice(stepOneStart, stepTwoStart);
|
|
108
|
+
assert.match(
|
|
109
|
+
stepOneText,
|
|
110
|
+
/commit_id starts with \$\{head\}/,
|
|
111
|
+
'expected step 1 to scope the out-of-usage notice to reviews whose commit_id starts with HEAD, matching step 2 and the convergence gate',
|
|
112
|
+
);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
test('a Copilot no-show after the poll cap returns a down result rather than a blocker', () => {
|
|
116
|
+
const copilotPrompt = functionBody('runCopilotGate');
|
|
117
|
+
const noReviewStart = copilotPrompt.indexOf('No review after');
|
|
118
|
+
assert.notEqual(noReviewStart, -1, 'expected a no-show branch in the gate prompt');
|
|
119
|
+
const noReviewBranch = copilotPrompt.slice(noReviewStart, noReviewStart + 200);
|
|
120
|
+
assert.match(
|
|
121
|
+
noReviewBranch,
|
|
122
|
+
/down:\s*true/,
|
|
123
|
+
'expected a Copilot no-show after the poll cap to return down:true',
|
|
124
|
+
);
|
|
125
|
+
assert.doesNotMatch(
|
|
126
|
+
noReviewBranch,
|
|
127
|
+
/blocker:/,
|
|
128
|
+
'expected the no-show branch to carry a down result, not a blocker',
|
|
129
|
+
);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test('checkConvergence wires the --copilot-down flag from a copilotDown argument', () => {
|
|
133
|
+
const checkConvergenceBody = functionBody('checkConvergence');
|
|
134
|
+
assert.match(
|
|
135
|
+
checkConvergenceBody,
|
|
136
|
+
/copilotDown \? ' --copilot-down' : ''/,
|
|
137
|
+
'expected checkConvergence to append --copilot-down when copilotDown is set',
|
|
138
|
+
);
|
|
139
|
+
assert.match(
|
|
140
|
+
checkConvergenceBody,
|
|
141
|
+
/\$\{copilotDownFlag\}/,
|
|
142
|
+
'expected the --copilot-down flag to be interpolated into the script invocation',
|
|
143
|
+
);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
test('the COPILOT phase routes a down outcome to FINALIZE with the gate bypassed', () => {
|
|
147
|
+
const copilotPhaseStart = convergeSource.indexOf("if (phase === 'COPILOT') {");
|
|
148
|
+
assert.notEqual(copilotPhaseStart, -1, 'expected a COPILOT phase block');
|
|
149
|
+
const downBranchStart = convergeSource.indexOf("copilotOutcome.kind === 'down'", copilotPhaseStart);
|
|
150
|
+
assert.notEqual(downBranchStart, -1, 'expected the COPILOT phase to handle a down outcome');
|
|
151
|
+
const downBranch = convergeSource.slice(downBranchStart, downBranchStart + 400);
|
|
152
|
+
assert.match(downBranch, /copilotDown = true/);
|
|
153
|
+
assert.match(downBranch, /copilotNote =/);
|
|
154
|
+
assert.match(downBranch, /phase = 'FINALIZE'/);
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test('resolveCopilotDown reports down only for a down outcome', () => {
|
|
158
|
+
assert.equal(resolveCopilotDown({ kind: 'down' }), true);
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
test('resolveCopilotDown clears the bypass for an approved outcome', () => {
|
|
162
|
+
assert.equal(resolveCopilotDown({ kind: 'approved' }), false);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
test('resolveCopilotDown clears the bypass for a fix outcome carrying findings', () => {
|
|
166
|
+
assert.equal(
|
|
167
|
+
resolveCopilotDown({
|
|
168
|
+
kind: 'fix',
|
|
169
|
+
findings: [
|
|
170
|
+
{
|
|
171
|
+
file: 'a.py',
|
|
172
|
+
line: 1,
|
|
173
|
+
severity: 'P1',
|
|
174
|
+
category: 'bug',
|
|
175
|
+
title: 't',
|
|
176
|
+
detail: 'd',
|
|
177
|
+
replyToCommentId: null,
|
|
178
|
+
},
|
|
179
|
+
],
|
|
180
|
+
}),
|
|
181
|
+
false,
|
|
182
|
+
);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
test('resolveCopilotDown clears the bypass for a retry outcome', () => {
|
|
186
|
+
assert.equal(resolveCopilotDown({ kind: 'retry' }), false);
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
test('the standards-only Copilot sub-path resets copilotDown before FINALIZE', () => {
|
|
190
|
+
const standardsBranchStart = convergeSource.indexOf(
|
|
191
|
+
'isStandardsOnlyRound(copilotOutcome.findings)',
|
|
192
|
+
);
|
|
193
|
+
assert.notEqual(
|
|
194
|
+
standardsBranchStart,
|
|
195
|
+
-1,
|
|
196
|
+
'expected the COPILOT phase to handle a standards-only Copilot fix outcome',
|
|
197
|
+
);
|
|
198
|
+
const standardsBranch = convergeSource.slice(standardsBranchStart, standardsBranchStart + 600);
|
|
199
|
+
const resetIndex = standardsBranch.indexOf('copilotDown = false');
|
|
200
|
+
const finalizeIndex = standardsBranch.indexOf("phase = 'FINALIZE'");
|
|
201
|
+
assert.notEqual(
|
|
202
|
+
resetIndex,
|
|
203
|
+
-1,
|
|
204
|
+
'expected the standards-only sub-path to reset copilotDown so a recovered Copilot is not bypassed',
|
|
205
|
+
);
|
|
206
|
+
assert.notEqual(finalizeIndex, -1, 'expected the standards-only sub-path to reach FINALIZE');
|
|
207
|
+
assert.ok(
|
|
208
|
+
resetIndex < finalizeIndex,
|
|
209
|
+
'expected copilotDown to be cleared before the transition to FINALIZE',
|
|
210
|
+
);
|
|
211
|
+
assert.match(
|
|
212
|
+
standardsBranch.slice(0, finalizeIndex),
|
|
213
|
+
/copilotNote = null/,
|
|
214
|
+
'expected the standards-only sub-path to clear the stale copilotNote alongside copilotDown',
|
|
215
|
+
);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
test('the COPILOT phase recomputes copilotDown from each gate outcome via resolveCopilotDown', () => {
|
|
219
|
+
const copilotPhaseStart = convergeSource.indexOf("if (phase === 'COPILOT') {");
|
|
220
|
+
assert.notEqual(copilotPhaseStart, -1, 'expected a COPILOT phase block');
|
|
221
|
+
const finalizePhaseStart = convergeSource.indexOf(
|
|
222
|
+
"if (phase === 'FINALIZE') {",
|
|
223
|
+
copilotPhaseStart,
|
|
224
|
+
);
|
|
225
|
+
assert.notEqual(finalizePhaseStart, -1, 'expected a FINALIZE phase block after COPILOT');
|
|
226
|
+
const copilotPhase = convergeSource.slice(copilotPhaseStart, finalizePhaseStart);
|
|
227
|
+
assert.match(
|
|
228
|
+
copilotPhase,
|
|
229
|
+
/copilotDown = resolveCopilotDown\(copilotOutcome\)/,
|
|
230
|
+
'expected the COPILOT phase to recompute copilotDown from the current outcome so a recovered Copilot is never bypassed',
|
|
231
|
+
);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
test('markReady receives copilotDown so it can opt the unflagged hook out of the Copilot gate', () => {
|
|
235
|
+
const finalizeStart = convergeSource.indexOf("if (phase === 'FINALIZE') {");
|
|
236
|
+
assert.notEqual(finalizeStart, -1, 'expected a FINALIZE phase block');
|
|
237
|
+
const markReadyCall = convergeSource.indexOf('await markReady(', finalizeStart);
|
|
238
|
+
assert.notEqual(markReadyCall, -1, 'expected the FINALIZE phase to call markReady');
|
|
239
|
+
const callSlice = convergeSource.slice(markReadyCall, markReadyCall + 40);
|
|
240
|
+
assert.match(
|
|
241
|
+
callSlice,
|
|
242
|
+
/markReady\(head,\s*copilotDown\)/,
|
|
243
|
+
'expected markReady to receive copilotDown so the mark-ready agent can opt the unflagged hook out of the Copilot gate',
|
|
244
|
+
);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
test('the markReady prompt opts the unflagged convergence hook out of Copilot when copilotDown', () => {
|
|
248
|
+
const markReadyBody = functionBody('markReady');
|
|
249
|
+
assert.match(
|
|
250
|
+
markReadyBody,
|
|
251
|
+
/copilotDown/,
|
|
252
|
+
'expected markReady to branch on copilotDown',
|
|
253
|
+
);
|
|
254
|
+
assert.match(
|
|
255
|
+
markReadyBody,
|
|
256
|
+
/CLAUDE_REVIEWS_DISABLED/,
|
|
257
|
+
'expected the markReady prompt to set CLAUDE_REVIEWS_DISABLED so the unflagged hook re-derives the Copilot bypass',
|
|
258
|
+
);
|
|
259
|
+
assert.match(
|
|
260
|
+
markReadyBody,
|
|
261
|
+
/copilot/,
|
|
262
|
+
'expected the markReady opt-out to name the copilot token',
|
|
263
|
+
);
|
|
264
|
+
});
|
|
265
|
+
|