cclaw-cli 0.47.0 → 0.48.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -1
- package/dist/artifact-linter.d.ts +9 -2
- package/dist/artifact-linter.js +45 -2
- package/dist/config.d.ts +6 -6
- package/dist/config.js +22 -0
- package/dist/constants.d.ts +10 -1
- package/dist/constants.js +19 -10
- package/dist/content/contracts.d.ts +1 -1
- package/dist/content/contracts.js +1 -1
- package/dist/content/core-agents.d.ts +53 -1
- package/dist/content/core-agents.js +6 -0
- package/dist/content/{harnesses-doc.js → harness-doc.js} +32 -1
- package/dist/content/harness-playbooks.js +4 -4
- package/dist/content/ideate-command.js +19 -19
- package/dist/content/observe.js +22 -1
- package/dist/content/opencode-plugin.js +5 -1
- package/dist/content/skills.js +2 -2
- package/dist/content/stage-schema.js +36 -8
- package/dist/content/stages/design.js +2 -2
- package/dist/content/stages/review.js +1 -1
- package/dist/content/stages/ship.js +2 -0
- package/dist/content/stages/tdd.js +8 -4
- package/dist/content/templates.js +15 -13
- package/dist/content/utility-skills.d.ts +7 -1
- package/dist/content/utility-skills.js +5 -0
- package/dist/delegation.d.ts +10 -0
- package/dist/delegation.js +111 -33
- package/dist/doctor.js +80 -12
- package/dist/flow-state.d.ts +9 -1
- package/dist/flow-state.js +26 -9
- package/dist/fs-utils.d.ts +9 -0
- package/dist/fs-utils.js +35 -1
- package/dist/gate-evidence.js +21 -2
- package/dist/gitignore.js +6 -3
- package/dist/harness-adapters.d.ts +2 -2
- package/dist/harness-adapters.js +13 -3
- package/dist/install.js +68 -10
- package/dist/internal/detect-public-api-changes.d.ts +5 -0
- package/dist/internal/detect-public-api-changes.js +45 -0
- package/dist/knowledge-store.js +2 -2
- package/dist/policy.js +3 -2
- package/dist/retro-gate.js +41 -15
- package/dist/run-archive.js +63 -33
- package/dist/run-persistence.js +12 -4
- package/dist/tdd-cycle.js +6 -1
- package/dist/types.d.ts +6 -1
- package/package.json +4 -1
- /package/dist/content/{harnesses-doc.d.ts → harness-doc.d.ts} +0 -0
|
@@ -1,7 +1,18 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { FLOW_STAGES, FLOW_TRACKS, TRACK_STAGES } from "../types.js";
|
|
2
|
+
import { STAGE_TO_SKILL_FOLDER } from "../constants.js";
|
|
3
3
|
import { BRAINSTORM, SCOPE, DESIGN, SPEC, PLAN, TDD, REVIEW, SHIP } from "./stages/index.js";
|
|
4
4
|
import { tddStageForTrack } from "./stages/tdd.js";
|
|
5
|
+
const ARTIFACT_STAGE_BY_PATH = {
|
|
6
|
+
".cclaw/artifacts/01-brainstorm.md": "brainstorm",
|
|
7
|
+
".cclaw/artifacts/02-scope.md": "scope",
|
|
8
|
+
".cclaw/artifacts/02a-research.md": "design",
|
|
9
|
+
".cclaw/artifacts/03-design.md": "design",
|
|
10
|
+
".cclaw/artifacts/04-spec.md": "spec",
|
|
11
|
+
".cclaw/artifacts/05-plan.md": "plan",
|
|
12
|
+
".cclaw/artifacts/06-tdd.md": "tdd",
|
|
13
|
+
".cclaw/artifacts/07-review.md": "review",
|
|
14
|
+
".cclaw/artifacts/08-ship.md": "ship"
|
|
15
|
+
};
|
|
5
16
|
const REQUIRED_GATE_IDS = {
|
|
6
17
|
brainstorm: [
|
|
7
18
|
"brainstorm_approaches_compared",
|
|
@@ -36,6 +47,7 @@ const REQUIRED_GATE_IDS = {
|
|
|
36
47
|
"tdd_green_full_suite",
|
|
37
48
|
"tdd_refactor_completed",
|
|
38
49
|
"tdd_verified_before_complete",
|
|
50
|
+
"tdd_docs_drift_check",
|
|
39
51
|
...(track === "quick" ? [] : ["tdd_traceable_to_plan"])
|
|
40
52
|
],
|
|
41
53
|
review: (track) => [
|
|
@@ -92,6 +104,16 @@ function tieredArtifactValidation(stage, rows) {
|
|
|
92
104
|
};
|
|
93
105
|
});
|
|
94
106
|
}
|
|
107
|
+
function readsFromForTrack(readsFrom, track) {
|
|
108
|
+
const stageSet = new Set(TRACK_STAGES[track]);
|
|
109
|
+
return readsFrom.filter((artifactPath) => {
|
|
110
|
+
const stage = ARTIFACT_STAGE_BY_PATH[artifactPath];
|
|
111
|
+
if (!stage) {
|
|
112
|
+
return true;
|
|
113
|
+
}
|
|
114
|
+
return stageSet.has(stage);
|
|
115
|
+
});
|
|
116
|
+
}
|
|
95
117
|
// ---------------------------------------------------------------------------
|
|
96
118
|
// Stage map and accessors
|
|
97
119
|
// ---------------------------------------------------------------------------
|
|
@@ -199,8 +221,8 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
|
|
|
199
221
|
},
|
|
200
222
|
{
|
|
201
223
|
agent: "reviewer",
|
|
202
|
-
mode: "
|
|
203
|
-
when: "
|
|
224
|
+
mode: "mandatory",
|
|
225
|
+
when: "Mandatory when the diff exceeds 100 changed lines, touches more than 10 files, or modifies trust boundaries — dispatch a SECOND, independent reviewer with the adversarial-review skill loaded so the review army has at least two voices on a high-blast-radius change.",
|
|
204
226
|
purpose: "Adversarial second-opinion review on large or trust-sensitive diffs. The second reviewer treats the implementation as hostile and tries to break it (hostile-user, future-maintainer, competitor lenses) instead of sympathetically explaining it.",
|
|
205
227
|
requiresUserGate: false,
|
|
206
228
|
skill: "adversarial-review"
|
|
@@ -233,23 +255,29 @@ const STAGE_AUTO_SUBAGENT_DISPATCH = {
|
|
|
233
255
|
};
|
|
234
256
|
/** Transition guard: agents with `mode: "mandatory"` in auto-subagent dispatch for this stage. */
|
|
235
257
|
export function mandatoryDelegationsForStage(stage) {
|
|
236
|
-
return STAGE_AUTO_SUBAGENT_DISPATCH[stage]
|
|
237
|
-
|
|
238
|
-
|
|
258
|
+
return [...new Set(STAGE_AUTO_SUBAGENT_DISPATCH[stage]
|
|
259
|
+
.filter((d) => d.mode === "mandatory")
|
|
260
|
+
.map((d) => d.agent))];
|
|
239
261
|
}
|
|
240
262
|
export function stageSchema(stage, track = "standard") {
|
|
241
263
|
const base = stage === "tdd" ? tddStageForTrack(track) : STAGE_SCHEMA_MAP[stage];
|
|
242
264
|
const tieredGates = tieredStageGates(stage, base.requiredGates, track);
|
|
243
265
|
const tieredValidation = tieredArtifactValidation(stage, base.artifactValidation);
|
|
266
|
+
const crossStageTrace = {
|
|
267
|
+
...base.crossStageTrace,
|
|
268
|
+
readsFrom: readsFromForTrack(base.crossStageTrace.readsFrom, track)
|
|
269
|
+
};
|
|
244
270
|
return {
|
|
245
271
|
...base,
|
|
272
|
+
skillFolder: STAGE_TO_SKILL_FOLDER[stage],
|
|
273
|
+
crossStageTrace,
|
|
246
274
|
requiredGates: tieredGates,
|
|
247
275
|
artifactValidation: tieredValidation,
|
|
248
276
|
mandatoryDelegations: mandatoryDelegationsForStage(stage)
|
|
249
277
|
};
|
|
250
278
|
}
|
|
251
279
|
export function orderedStageSchemas(track = "standard") {
|
|
252
|
-
return
|
|
280
|
+
return FLOW_STAGES.map((stage) => stageSchema(stage, track));
|
|
253
281
|
}
|
|
254
282
|
export function stageGateIds(stage, track = "standard") {
|
|
255
283
|
return stageSchema(stage, track).requiredGates
|
|
@@ -10,7 +10,7 @@ export const DESIGN = {
|
|
|
10
10
|
ironLaw: "NO DESIGN DECISION WITHOUT A LABELED DIAGRAM, A REJECTED ALTERNATIVE, AND A NAMED FAILURE MODE.",
|
|
11
11
|
purpose: "Lock architecture, data flow, failure modes, and test/performance expectations through rigorous interactive review.",
|
|
12
12
|
whenToUse: [
|
|
13
|
-
"After scope
|
|
13
|
+
"After scope agreement approval",
|
|
14
14
|
"Before writing final spec and execution plan",
|
|
15
15
|
"When architecture risks need explicit treatment"
|
|
16
16
|
],
|
|
@@ -79,7 +79,7 @@ export const DESIGN = {
|
|
|
79
79
|
"What-already-exists section produced.",
|
|
80
80
|
"Completion dashboard lists every review section status, decision count, and unresolved items (or 'None')."
|
|
81
81
|
],
|
|
82
|
-
inputs: ["scope
|
|
82
|
+
inputs: ["scope agreement artifact", "system constraints", "non-functional requirements"],
|
|
83
83
|
requiredContext: [
|
|
84
84
|
"parallel research synthesis from `.cclaw/artifacts/02a-research.md`",
|
|
85
85
|
"existing architecture and boundaries",
|
|
@@ -201,7 +201,7 @@ export const REVIEW = {
|
|
|
201
201
|
},
|
|
202
202
|
artifactValidation: [
|
|
203
203
|
{ section: "Layer 1 Verdict", required: true, validationRule: "Per-criterion pass/fail with references." },
|
|
204
|
-
{ section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status." },
|
|
204
|
+
{ section: "Layer 2 Findings", required: false, validationRule: "Each finding has severity, description, and resolution status. Security coverage must include either explicit security findings or `NO_CHANGE_ATTESTATION: <reason>` when no security-relevant changes were found." },
|
|
205
205
|
{ section: "Review Army Contract", required: true, validationRule: "Structured findings include id/severity/confidence/fingerprint/reportedBy/status with dedup reconciliation summary." },
|
|
206
206
|
{ section: "Review Readiness Dashboard", required: false, validationRule: "Includes a per-pass table (Layer 1 / Layer 2 / Adversarial / Schema) with a 'Completed at' column, a Delegation log snapshot block (path .cclaw/state/delegation-log.json with required/completed/waived/pending), a Staleness signal block (commit at last review pass and current commit), and a Headline with open critical blockers + ship recommendation. At minimum, the section text must contain the substrings 'Completed at', 'delegation-log.json', 'commit at last review pass', and 'Ship recommendation'." },
|
|
207
207
|
{ section: "Completeness Score", required: false, validationRule: "Records AC coverage, task coverage, test-slice coverage, and adversarial-review pass status as numeric or boolean values. At minimum, a line like 'AC coverage: N/M' or 'AC coverage: 100%'." },
|
|
@@ -100,6 +100,8 @@ export const SHIP = {
|
|
|
100
100
|
"FINALIZE_NO_VCS"
|
|
101
101
|
],
|
|
102
102
|
artifactFile: "08-ship.md",
|
|
103
|
+
// `done` exits the stage pipeline. Archive semantics are handled by the
|
|
104
|
+
// closeout substate machine (`idle` -> ... -> `archived`) in flow-state.
|
|
103
105
|
next: "done",
|
|
104
106
|
reviewSections: [
|
|
105
107
|
{
|
|
@@ -63,7 +63,8 @@ export const TDD = {
|
|
|
63
63
|
{ id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
|
|
64
64
|
{ id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
|
|
65
65
|
{ id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, commit SHA, and explicit pass/fail status." },
|
|
66
|
-
{ id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." }
|
|
66
|
+
{ id: "tdd_traceable_to_plan", description: "Change traceability to plan slice is explicit." },
|
|
67
|
+
{ id: "tdd_docs_drift_check", description: "When public API/config/CLI surfaces change, docs drift is addressed via a completed doc-updater pass." }
|
|
67
68
|
],
|
|
68
69
|
requiredEvidence: [
|
|
69
70
|
"Artifact updated at `.cclaw/artifacts/06-tdd.md` with RED, GREEN, and REFACTOR sections.",
|
|
@@ -206,9 +207,12 @@ function tddQuickTrackVariant() {
|
|
|
206
207
|
checklist: TDD.checklist.map(quickTrackText),
|
|
207
208
|
interactionProtocol: TDD.interactionProtocol.map(quickTrackText),
|
|
208
209
|
process: TDD.process.map(quickTrackText),
|
|
209
|
-
requiredGates: TDD.requiredGates
|
|
210
|
-
|
|
211
|
-
|
|
210
|
+
requiredGates: TDD.requiredGates
|
|
211
|
+
.filter((gate) => gate.id !== "tdd_traceable_to_plan")
|
|
212
|
+
.map((gate) => ({
|
|
213
|
+
...gate,
|
|
214
|
+
description: quickTrackText(gate.description)
|
|
215
|
+
})),
|
|
212
216
|
requiredEvidence: TDD.requiredEvidence.map(quickTrackText),
|
|
213
217
|
inputs: TDD.inputs.map(quickTrackText),
|
|
214
218
|
requiredContext: ["spec artifact", "existing test patterns"],
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { CCLAW_VERSION } from "../constants.js";
|
|
2
2
|
import { orderedStageSchemas } from "./stage-schema.js";
|
|
3
|
+
import { FLOW_STAGES } from "../types.js";
|
|
3
4
|
export const ARTIFACT_TEMPLATES = {
|
|
4
5
|
"01-brainstorm.md": `---
|
|
5
6
|
stage: brainstorm
|
|
6
7
|
schema_version: 1
|
|
7
|
-
version:
|
|
8
|
+
version: ${CCLAW_VERSION}
|
|
8
9
|
feature: <feature-id>
|
|
9
10
|
locked_decisions: []
|
|
10
11
|
inputs_hash: sha256:pending
|
|
@@ -52,7 +53,7 @@ inputs_hash: sha256:pending
|
|
|
52
53
|
"02-scope.md": `---
|
|
53
54
|
stage: scope
|
|
54
55
|
schema_version: 1
|
|
55
|
-
version:
|
|
56
|
+
version: ${CCLAW_VERSION}
|
|
56
57
|
feature: <feature-id>
|
|
57
58
|
locked_decisions: []
|
|
58
59
|
inputs_hash: sha256:pending
|
|
@@ -158,7 +159,7 @@ inputs_hash: sha256:pending
|
|
|
158
159
|
"02a-research.md": `---
|
|
159
160
|
stage: design
|
|
160
161
|
schema_version: 1
|
|
161
|
-
version:
|
|
162
|
+
version: ${CCLAW_VERSION}
|
|
162
163
|
feature: <feature-id>
|
|
163
164
|
locked_decisions: []
|
|
164
165
|
inputs_hash: sha256:pending
|
|
@@ -199,7 +200,7 @@ inputs_hash: sha256:pending
|
|
|
199
200
|
"03-design.md": `---
|
|
200
201
|
stage: design
|
|
201
202
|
schema_version: 1
|
|
202
|
-
version:
|
|
203
|
+
version: ${CCLAW_VERSION}
|
|
203
204
|
feature: <feature-id>
|
|
204
205
|
locked_decisions: []
|
|
205
206
|
inputs_hash: sha256:pending
|
|
@@ -303,7 +304,7 @@ inputs_hash: sha256:pending
|
|
|
303
304
|
"04-spec.md": `---
|
|
304
305
|
stage: spec
|
|
305
306
|
schema_version: 1
|
|
306
|
-
version:
|
|
307
|
+
version: ${CCLAW_VERSION}
|
|
307
308
|
feature: <feature-id>
|
|
308
309
|
locked_decisions: []
|
|
309
310
|
inputs_hash: sha256:pending
|
|
@@ -359,7 +360,7 @@ inputs_hash: sha256:pending
|
|
|
359
360
|
"05-plan.md": `---
|
|
360
361
|
stage: plan
|
|
361
362
|
schema_version: 1
|
|
362
|
-
version:
|
|
363
|
+
version: ${CCLAW_VERSION}
|
|
363
364
|
feature: <feature-id>
|
|
364
365
|
locked_decisions: []
|
|
365
366
|
inputs_hash: sha256:pending
|
|
@@ -438,7 +439,7 @@ Execution rule: complete and verify each batch before starting the next batch.
|
|
|
438
439
|
"06-tdd.md": `---
|
|
439
440
|
stage: tdd
|
|
440
441
|
schema_version: 1
|
|
441
|
-
version:
|
|
442
|
+
version: ${CCLAW_VERSION}
|
|
442
443
|
feature: <feature-id>
|
|
443
444
|
locked_decisions: []
|
|
444
445
|
inputs_hash: sha256:pending
|
|
@@ -505,7 +506,7 @@ inputs_hash: sha256:pending
|
|
|
505
506
|
"07-review.md": `---
|
|
506
507
|
stage: review
|
|
507
508
|
schema_version: 1
|
|
508
|
-
version:
|
|
509
|
+
version: ${CCLAW_VERSION}
|
|
509
510
|
feature: <feature-id>
|
|
510
511
|
locked_decisions: []
|
|
511
512
|
inputs_hash: sha256:pending
|
|
@@ -522,6 +523,7 @@ inputs_hash: sha256:pending
|
|
|
522
523
|
| ID | Severity | Category | Description | Status |
|
|
523
524
|
|---|---|---|---|---|
|
|
524
525
|
| R-1 | Critical/Important/Suggestion | correctness/security/performance/architecture | | open/resolved |
|
|
526
|
+
- NO_CHANGE_ATTESTATION: <required when Category=security has no entries; explain why no security-relevant changes were detected>
|
|
525
527
|
|
|
526
528
|
## Incoming Feedback Queue
|
|
527
529
|
| ID | Source | Severity | File:line | Request | Status | Evidence |
|
|
@@ -613,7 +615,7 @@ inputs_hash: sha256:pending
|
|
|
613
615
|
"08-ship.md": `---
|
|
614
616
|
stage: ship
|
|
615
617
|
schema_version: 1
|
|
616
|
-
version:
|
|
618
|
+
version: ${CCLAW_VERSION}
|
|
617
619
|
feature: <feature-id>
|
|
618
620
|
locked_decisions: []
|
|
619
621
|
inputs_hash: sha256:pending
|
|
@@ -668,7 +670,7 @@ inputs_hash: sha256:pending
|
|
|
668
670
|
"09-retro.md": `---
|
|
669
671
|
stage: retro
|
|
670
672
|
schema_version: 1
|
|
671
|
-
version:
|
|
673
|
+
version: ${CCLAW_VERSION}
|
|
672
674
|
feature: <feature-id>
|
|
673
675
|
locked_decisions: []
|
|
674
676
|
inputs_hash: sha256:pending
|
|
@@ -802,7 +804,7 @@ Track-specific skips are allowed only when \`flow-state.track\` + \`skippedStage
|
|
|
802
804
|
export function buildRulesJson() {
|
|
803
805
|
return {
|
|
804
806
|
version: 1,
|
|
805
|
-
stage_order:
|
|
807
|
+
stage_order: FLOW_STAGES,
|
|
806
808
|
stage_gates: Object.fromEntries(orderedStageSchemas().map((schema) => [
|
|
807
809
|
schema.stage,
|
|
808
810
|
schema.requiredGates.map((gate) => gate.id)
|
|
@@ -820,7 +822,7 @@ export function buildRulesJson() {
|
|
|
820
822
|
"conventional_commits"
|
|
821
823
|
],
|
|
822
824
|
MUST_NEVER: [
|
|
823
|
-
"
|
|
825
|
+
"skip_tdd_stage",
|
|
824
826
|
"ship_with_critical_findings",
|
|
825
827
|
"implement_in_brainstorm",
|
|
826
828
|
"manual_edit_generated",
|
|
@@ -48,4 +48,10 @@ export declare const LANGUAGE_RULE_PACK_GENERATORS: Record<string, () => string>
|
|
|
48
48
|
*/
|
|
49
49
|
export declare const LEGACY_LANGUAGE_RULE_PACK_FOLDERS: readonly ["language-typescript", "language-python", "language-go"];
|
|
50
50
|
export declare const UTILITY_SKILL_FOLDERS: readonly ["security", "debugging", "performance", "ci-cd", "docs", "executing-plans", "verification-before-completion", "finishing-a-development-branch", "context-engineering", "source-driven-development", "frontend-accessibility", "landscape-check", "adversarial-review", "security-audit", "knowledge-curation", "retrospective", "document-review", "receiving-code-review"];
|
|
51
|
-
export
|
|
51
|
+
export type UtilitySkillFolder = (typeof UTILITY_SKILL_FOLDERS)[number];
|
|
52
|
+
/**
|
|
53
|
+
* One entry per `UTILITY_SKILL_FOLDERS` slot. Typed via the tuple so that
|
|
54
|
+
* adding a folder without a generator (or vice versa) is a TypeScript
|
|
55
|
+
* error — keeps the two sources of truth in lockstep at compile time.
|
|
56
|
+
*/
|
|
57
|
+
export declare const UTILITY_SKILL_MAP: Record<UtilitySkillFolder, () => string>;
|
|
@@ -1735,6 +1735,11 @@ export const UTILITY_SKILL_FOLDERS = [
|
|
|
1735
1735
|
"document-review",
|
|
1736
1736
|
"receiving-code-review"
|
|
1737
1737
|
];
|
|
1738
|
+
/**
|
|
1739
|
+
* One entry per `UTILITY_SKILL_FOLDERS` slot. Typed via the tuple so that
|
|
1740
|
+
* adding a folder without a generator (or vice versa) is a TypeScript
|
|
1741
|
+
* error — keeps the two sources of truth in lockstep at compile time.
|
|
1742
|
+
*/
|
|
1738
1743
|
export const UTILITY_SKILL_MAP = {
|
|
1739
1744
|
security: securityReviewSkill,
|
|
1740
1745
|
debugging: debuggingSkill,
|
package/dist/delegation.d.ts
CHANGED
|
@@ -66,6 +66,16 @@ export type DelegationLedger = {
|
|
|
66
66
|
runId: string;
|
|
67
67
|
entries: DelegationEntry[];
|
|
68
68
|
};
|
|
69
|
+
/**
|
|
70
|
+
* Heuristic: does a changed file path strongly imply a trust-boundary
|
|
71
|
+
* surface? Used to gate adversarial-reviewer requirements on review.
|
|
72
|
+
*
|
|
73
|
+
* Matches authN/Z, credentials, crypto, policy, or explicit sanitization
|
|
74
|
+
* or injection handling. Intentionally excludes broad terms like `input`
|
|
75
|
+
* and `validation` because they match innocuous paths such as
|
|
76
|
+
* `form-input.ts` or `number-validation.ts` and produce false positives.
|
|
77
|
+
*/
|
|
78
|
+
export declare function isTrustBoundaryPath(filePath: string): boolean;
|
|
69
79
|
export declare function readDelegationLedger(projectRoot: string): Promise<DelegationLedger>;
|
|
70
80
|
export declare function appendDelegation(projectRoot: string, entry: DelegationEntry): Promise<void>;
|
|
71
81
|
/**
|
package/dist/delegation.js
CHANGED
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import fs from "node:fs/promises";
|
|
2
2
|
import path from "node:path";
|
|
3
|
+
import { execFile } from "node:child_process";
|
|
4
|
+
import { promisify } from "node:util";
|
|
3
5
|
import { RUNTIME_ROOT } from "./constants.js";
|
|
4
6
|
import { readConfig } from "./config.js";
|
|
5
7
|
import { exists, withDirectoryLock, writeFileSafe } from "./fs-utils.js";
|
|
6
8
|
import { HARNESS_ADAPTERS } from "./harness-adapters.js";
|
|
7
9
|
import { readFlowState } from "./runs.js";
|
|
8
10
|
import { stageSchema } from "./content/stage-schema.js";
|
|
11
|
+
const execFileAsync = promisify(execFile);
|
|
9
12
|
function delegationLogPath(projectRoot) {
|
|
10
13
|
return path.join(projectRoot, RUNTIME_ROOT, "state", "delegation-log.json");
|
|
11
14
|
}
|
|
@@ -15,6 +18,94 @@ function delegationLockPath(projectRoot) {
|
|
|
15
18
|
function createSpanId() {
|
|
16
19
|
return `dspan-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 10)}`;
|
|
17
20
|
}
|
|
21
|
+
async function resolveReviewDiffBase(projectRoot) {
|
|
22
|
+
let head = "";
|
|
23
|
+
try {
|
|
24
|
+
head = (await execFileAsync("git", ["rev-parse", "HEAD"], { cwd: projectRoot })).stdout.trim();
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
return null;
|
|
28
|
+
}
|
|
29
|
+
const candidates = ["origin/main", "origin/master", "main", "master"];
|
|
30
|
+
for (const candidate of candidates) {
|
|
31
|
+
try {
|
|
32
|
+
await execFileAsync("git", ["rev-parse", "--verify", candidate], { cwd: projectRoot });
|
|
33
|
+
const { stdout } = await execFileAsync("git", ["merge-base", "HEAD", candidate], {
|
|
34
|
+
cwd: projectRoot
|
|
35
|
+
});
|
|
36
|
+
const base = stdout.trim();
|
|
37
|
+
if (base.length > 0 && base !== head) {
|
|
38
|
+
return base;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
continue;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
try {
|
|
46
|
+
const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD~1"], {
|
|
47
|
+
cwd: projectRoot
|
|
48
|
+
});
|
|
49
|
+
const base = stdout.trim();
|
|
50
|
+
return base.length > 0 ? base : null;
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Heuristic: does a changed file path strongly imply a trust-boundary
|
|
58
|
+
* surface? Used to gate adversarial-reviewer requirements on review.
|
|
59
|
+
*
|
|
60
|
+
* Matches authN/Z, credentials, crypto, policy, or explicit sanitization
|
|
61
|
+
* or injection handling. Intentionally excludes broad terms like `input`
|
|
62
|
+
* and `validation` because they match innocuous paths such as
|
|
63
|
+
* `form-input.ts` or `number-validation.ts` and produce false positives.
|
|
64
|
+
*/
|
|
65
|
+
export function isTrustBoundaryPath(filePath) {
|
|
66
|
+
return /(auth|security|secret|token|credential|permission|acl|policy|oauth|session|encrypt|decrypt|sanitize|untrusted|csrf|xss|injection|taint)/iu.test(filePath);
|
|
67
|
+
}
|
|
68
|
+
async function detectReviewTriggers(projectRoot) {
|
|
69
|
+
const empty = {
|
|
70
|
+
changedFiles: 0,
|
|
71
|
+
changedLines: 0,
|
|
72
|
+
trustBoundaryChanged: false,
|
|
73
|
+
requireAdversarialReviewer: false
|
|
74
|
+
};
|
|
75
|
+
const base = await resolveReviewDiffBase(projectRoot);
|
|
76
|
+
if (!base) {
|
|
77
|
+
return empty;
|
|
78
|
+
}
|
|
79
|
+
try {
|
|
80
|
+
const range = `${base}..HEAD`;
|
|
81
|
+
const shortstat = await execFileAsync("git", ["diff", "--shortstat", range], {
|
|
82
|
+
cwd: projectRoot
|
|
83
|
+
});
|
|
84
|
+
const short = shortstat.stdout.trim();
|
|
85
|
+
const changedFiles = Number((/(\d+)\s+files?\s+changed/u.exec(short)?.[1] ?? "0"));
|
|
86
|
+
const insertions = Number((/(\d+)\s+insertions?\(\+\)/u.exec(short)?.[1] ?? "0"));
|
|
87
|
+
const deletions = Number((/(\d+)\s+deletions?\(-\)/u.exec(short)?.[1] ?? "0"));
|
|
88
|
+
const changedLines = insertions + deletions;
|
|
89
|
+
const names = await execFileAsync("git", ["diff", "--name-only", range], {
|
|
90
|
+
cwd: projectRoot
|
|
91
|
+
});
|
|
92
|
+
const changedPaths = names.stdout
|
|
93
|
+
.split(/\r?\n/gu)
|
|
94
|
+
.map((line) => line.trim())
|
|
95
|
+
.filter((line) => line.length > 0);
|
|
96
|
+
const trustBoundaryChanged = changedPaths.some((p) => isTrustBoundaryPath(p));
|
|
97
|
+
const requireAdversarialReviewer = changedLines > 100 || changedFiles > 10 || trustBoundaryChanged;
|
|
98
|
+
return {
|
|
99
|
+
changedFiles,
|
|
100
|
+
changedLines,
|
|
101
|
+
trustBoundaryChanged,
|
|
102
|
+
requireAdversarialReviewer
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
catch {
|
|
106
|
+
return empty;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
18
109
|
function isDelegationTokenUsage(value) {
|
|
19
110
|
if (!value || typeof value !== "object" || Array.isArray(value))
|
|
20
111
|
return false;
|
|
@@ -76,6 +167,8 @@ function parseLedger(raw, runId) {
|
|
|
76
167
|
for (const item of entriesRaw) {
|
|
77
168
|
if (isDelegationEntry(item)) {
|
|
78
169
|
const ts = item.startTs ?? item.ts ?? new Date().toISOString();
|
|
170
|
+
const inferredFulfillmentMode = item.fulfillmentMode
|
|
171
|
+
?? (item.status === "completed" ? "isolated" : undefined);
|
|
79
172
|
entries.push({
|
|
80
173
|
...item,
|
|
81
174
|
spanId: item.spanId ?? createSpanId(),
|
|
@@ -85,6 +178,7 @@ function parseLedger(raw, runId) {
|
|
|
85
178
|
? item.retryCount
|
|
86
179
|
: 0,
|
|
87
180
|
evidenceRefs: Array.isArray(item.evidenceRefs) ? item.evidenceRefs : [],
|
|
181
|
+
fulfillmentMode: inferredFulfillmentMode,
|
|
88
182
|
schemaVersion: 1
|
|
89
183
|
});
|
|
90
184
|
}
|
|
@@ -126,6 +220,12 @@ export async function appendDelegation(projectRoot, entry) {
|
|
|
126
220
|
if (!Array.isArray(stamped.evidenceRefs)) {
|
|
127
221
|
stamped.evidenceRefs = [];
|
|
128
222
|
}
|
|
223
|
+
if (stamped.status === "completed" && stamped.fulfillmentMode === undefined) {
|
|
224
|
+
const config = await readConfig(projectRoot).catch(() => null);
|
|
225
|
+
const harnesses = config?.harnesses ?? [];
|
|
226
|
+
const fallbacks = harnesses.map((h) => HARNESS_ADAPTERS[h].capabilities.subagentFallback);
|
|
227
|
+
stamped.fulfillmentMode = expectedFulfillmentMode(fallbacks);
|
|
228
|
+
}
|
|
129
229
|
// Idempotency: if a caller (or a retried hook) tries to append a row
|
|
130
230
|
// with a spanId that already exists in the ledger, treat it as a no-op
|
|
131
231
|
// instead of growing the log with duplicate entries that subsequent
|
|
@@ -174,51 +274,29 @@ export async function checkMandatoryDelegations(projectRoot, stage) {
|
|
|
174
274
|
const harnesses = config?.harnesses ?? [];
|
|
175
275
|
const fallbacks = harnesses.map((h) => HARNESS_ADAPTERS[h].capabilities.subagentFallback);
|
|
176
276
|
const expectedMode = expectedFulfillmentMode(fallbacks);
|
|
177
|
-
const
|
|
277
|
+
const reviewTriggers = stage === "review" ? await detectReviewTriggers(projectRoot) : null;
|
|
178
278
|
for (const agent of mandatory) {
|
|
179
279
|
const rows = forRun.filter((e) => e.agent === agent);
|
|
180
280
|
const completedRows = rows.filter((e) => e.status === "completed");
|
|
181
281
|
const waivedRows = rows.filter((e) => e.status === "waived");
|
|
182
|
-
const
|
|
282
|
+
const requiredCompletedCount = stage === "review" &&
|
|
283
|
+
agent === "reviewer" &&
|
|
284
|
+
reviewTriggers?.requireAdversarialReviewer
|
|
285
|
+
? 2
|
|
286
|
+
: 1;
|
|
287
|
+
const hasCompleted = completedRows.length >= requiredCompletedCount;
|
|
183
288
|
const hasWaived = waivedRows.length > 0;
|
|
184
289
|
const ok = hasCompleted || hasWaived;
|
|
185
290
|
if (!ok) {
|
|
186
|
-
|
|
187
|
-
const existingHarnessWaiver = rows.some((e) => e.status === "waived" && e.waiverReason === "harness_limitation");
|
|
188
|
-
if (!existingHarnessWaiver) {
|
|
189
|
-
await appendDelegation(projectRoot, {
|
|
190
|
-
stage,
|
|
191
|
-
agent,
|
|
192
|
-
mode: "mandatory",
|
|
193
|
-
status: "waived",
|
|
194
|
-
waiverReason: "harness_limitation",
|
|
195
|
-
fulfillmentMode: "harness-waiver",
|
|
196
|
-
ts: new Date().toISOString(),
|
|
197
|
-
runId: activeRunId
|
|
198
|
-
});
|
|
199
|
-
}
|
|
200
|
-
waived.push(agent);
|
|
201
|
-
autoWaived.push(agent);
|
|
202
|
-
}
|
|
203
|
-
else {
|
|
204
|
-
missing.push(agent);
|
|
205
|
-
}
|
|
291
|
+
missing.push(agent);
|
|
206
292
|
continue;
|
|
207
293
|
}
|
|
208
294
|
if (hasWaived) {
|
|
209
295
|
waived.push(agent);
|
|
210
296
|
}
|
|
211
|
-
// Evidence
|
|
212
|
-
//
|
|
213
|
-
|
|
214
|
-
// 2. Any completed row is explicitly stamped `fulfillmentMode:
|
|
215
|
-
// "role-switch"` — even in a mixed install. This closes the loop
|
|
216
|
-
// where a Codex session logs a role-switch completion inside a
|
|
217
|
-
// claude+codex project: the aggregate expectedMode is "isolated"
|
|
218
|
-
// (claude wins), so the role-switch row would previously sail
|
|
219
|
-
// through without evidenceRefs.
|
|
220
|
-
const hasExplicitRoleSwitchRow = completedRows.some((e) => e.fulfillmentMode === "role-switch");
|
|
221
|
-
const evidenceRequired = expectedMode === "role-switch" || hasExplicitRoleSwitchRow;
|
|
297
|
+
// Evidence is required for any non-isolated completion mode. Legacy rows
|
|
298
|
+
// without fulfillmentMode are inferred to `isolated` during parse.
|
|
299
|
+
const evidenceRequired = completedRows.some((e) => (e.fulfillmentMode ?? "isolated") !== "isolated");
|
|
222
300
|
if (hasCompleted &&
|
|
223
301
|
evidenceRequired &&
|
|
224
302
|
!completedRows.some((e) => Array.isArray(e.evidenceRefs) && e.evidenceRefs.length > 0)) {
|