cclaw-cli 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/artifact-linter/brainstorm.js +15 -1
- package/dist/artifact-linter/design.js +14 -0
- package/dist/artifact-linter/scope.js +14 -0
- package/dist/artifact-linter/shared.d.ts +1 -0
- package/dist/artifact-linter/shared.js +32 -0
- package/dist/artifact-linter.js +13 -5
- package/dist/cli.js +2 -9
- package/dist/config.d.ts +11 -67
- package/dist/config.js +59 -649
- package/dist/content/hook-events.js +1 -5
- package/dist/content/hook-manifest.d.ts +6 -4
- package/dist/content/hook-manifest.js +16 -65
- package/dist/content/hooks.js +54 -14
- package/dist/content/meta-skill.js +4 -3
- package/dist/content/node-hooks.d.ts +0 -26
- package/dist/content/node-hooks.js +459 -157
- package/dist/content/observe.js +5 -4
- package/dist/content/opencode-plugin.js +1 -78
- package/dist/content/skills-elicitation.d.ts +1 -0
- package/dist/content/skills-elicitation.js +123 -0
- package/dist/content/skills.js +6 -4
- package/dist/content/stages/brainstorm.js +7 -3
- package/dist/content/stages/design.js +6 -2
- package/dist/content/stages/plan.js +2 -2
- package/dist/content/stages/scope.js +9 -5
- package/dist/content/stages/tdd.js +11 -11
- package/dist/content/start-command.js +4 -4
- package/dist/content/templates.js +21 -0
- package/dist/flow-state.d.ts +7 -0
- package/dist/flow-state.js +1 -0
- package/dist/gate-evidence.js +1 -5
- package/dist/hook-schema.js +3 -0
- package/dist/hook-schemas/claude-hooks.v1.json +2 -5
- package/dist/hook-schemas/codex-hooks.v1.json +1 -4
- package/dist/hook-schemas/cursor-hooks.v1.json +1 -3
- package/dist/install.d.ts +2 -7
- package/dist/install.js +32 -123
- package/dist/internal/advance-stage/advance.js +22 -1
- package/dist/internal/advance-stage/parsers.d.ts +1 -0
- package/dist/internal/advance-stage/parsers.js +6 -0
- package/dist/internal/compound-readiness.js +1 -16
- package/dist/internal/early-loop-status.js +1 -3
- package/dist/internal/runtime-integrity.js +0 -20
- package/dist/policy.js +6 -9
- package/dist/run-persistence.d.ts +1 -1
- package/dist/run-persistence.js +29 -2
- package/dist/runtime/run-hook.mjs +459 -265
- package/dist/tdd-verification-evidence.js +6 -18
- package/dist/track-heuristics.d.ts +7 -1
- package/dist/track-heuristics.js +12 -0
- package/dist/types.d.ts +0 -56
- package/package.json +1 -1
package/dist/content/observe.js
CHANGED
|
@@ -31,7 +31,8 @@ function buildClaudeLikeEvents(harness) {
|
|
|
31
31
|
const hookEntry = {
|
|
32
32
|
type: "command",
|
|
33
33
|
command: hookDispatcherCommand(entry.handler),
|
|
34
|
-
...(entry.timeout !== undefined ? { timeout: entry.timeout } : {})
|
|
34
|
+
...(entry.timeout !== undefined ? { timeout: entry.timeout } : {}),
|
|
35
|
+
...(entry.statusMessage !== undefined ? { statusMessage: entry.statusMessage } : {})
|
|
35
36
|
};
|
|
36
37
|
bucket.hooks.push(hookEntry);
|
|
37
38
|
}
|
|
@@ -56,20 +57,20 @@ function buildCursorEvents() {
|
|
|
56
57
|
}
|
|
57
58
|
export function claudeHooksJsonWithObservation() {
|
|
58
59
|
return JSON.stringify({
|
|
59
|
-
cclawHookSchemaVersion:
|
|
60
|
+
cclawHookSchemaVersion: 2,
|
|
60
61
|
hooks: buildClaudeLikeEvents("claude")
|
|
61
62
|
}, null, 2);
|
|
62
63
|
}
|
|
63
64
|
export function cursorHooksJsonWithObservation() {
|
|
64
65
|
return JSON.stringify({
|
|
65
|
-
cclawHookSchemaVersion:
|
|
66
|
+
cclawHookSchemaVersion: 2,
|
|
66
67
|
version: 1,
|
|
67
68
|
hooks: buildCursorEvents()
|
|
68
69
|
}, null, 2);
|
|
69
70
|
}
|
|
70
71
|
export function codexHooksJsonWithObservation() {
|
|
71
72
|
return JSON.stringify({
|
|
72
|
-
cclawHookSchemaVersion:
|
|
73
|
+
cclawHookSchemaVersion: 2,
|
|
73
74
|
hooks: buildClaudeLikeEvents("codex")
|
|
74
75
|
}, null, 2);
|
|
75
76
|
}
|
|
@@ -607,18 +607,9 @@ export default function cclawPlugin(ctx) {
|
|
|
607
607
|
eventType === "session.compacted" ||
|
|
608
608
|
eventType === "session.cleared" ||
|
|
609
609
|
eventType === "session.updated";
|
|
610
|
-
// session.compacted must run pre-compact BEFORE canonical rehydration,
|
|
611
|
-
// otherwise the injected system prompt can show the pre-compact
|
|
612
|
-
// digest/state until the next lifecycle event.
|
|
613
|
-
if (eventType === "session.compacted") {
|
|
614
|
-
await runHookScript("pre-compact", eventData ?? {});
|
|
615
|
-
}
|
|
616
610
|
if (isSessionLifecycle) {
|
|
617
611
|
// Keep OpenCode aligned with Claude/Cursor/Codex: session-start is
|
|
618
|
-
// the canonical rehydrate path
|
|
619
|
-
// Ralph Loop, compound readiness, and hook-error breadcrumbs. The
|
|
620
|
-
// plugin refreshes its local bootstrap cache afterwards so the system
|
|
621
|
-
// transform sees the side effects from the hook runtime.
|
|
612
|
+
// the canonical rehydrate path.
|
|
622
613
|
await runHookScript("session-start", eventData ?? {});
|
|
623
614
|
await refreshBootstrapCache(true);
|
|
624
615
|
}
|
|
@@ -626,74 +617,6 @@ export default function cclawPlugin(ctx) {
|
|
|
626
617
|
await runHookScript("stop-handoff", { loop_count: 0 });
|
|
627
618
|
}
|
|
628
619
|
},
|
|
629
|
-
"tool.execute.before": async (input, output) => {
|
|
630
|
-
const disabled = isCclawDisabled();
|
|
631
|
-
if (disabled.disabled) {
|
|
632
|
-
// Explicit user override (CCLAW_DISABLE=1 et al): stay fully out
|
|
633
|
-
// of the way. Any real problem with the guard chain should not
|
|
634
|
-
// prevent the user from unblocking themselves.
|
|
635
|
-
noteDisabled(disabled);
|
|
636
|
-
return;
|
|
637
|
-
}
|
|
638
|
-
const payload = normalizeToolPayload(input, output);
|
|
639
|
-
if (isSafeReadOnlyTool(payload)) {
|
|
640
|
-
// Read-only tools bypass guards — they cannot mutate state and
|
|
641
|
-
// blocking them gives users an unusable session when guards are
|
|
642
|
-
// misconfigured or cclaw isn't fully initialized.
|
|
643
|
-
return;
|
|
644
|
-
}
|
|
645
|
-
if (!isCclawInitialized()) {
|
|
646
|
-
// Project has no flow-state or hook runtime: cclaw isn't in use
|
|
647
|
-
// here. Never block the user's tools because of setup they didn't
|
|
648
|
-
// ask for. Surface a single advisory so they can notice.
|
|
649
|
-
noteNotInitialized();
|
|
650
|
-
return;
|
|
651
|
-
}
|
|
652
|
-
const [promptOk, workflowOk] = await Promise.all([
|
|
653
|
-
runHookScript("prompt-guard", payload),
|
|
654
|
-
runHookScript("workflow-guard", payload)
|
|
655
|
-
]);
|
|
656
|
-
if (!promptOk || !workflowOk) {
|
|
657
|
-
const failed = !promptOk ? "prompt-guard" : "workflow-guard";
|
|
658
|
-
const rawDetail = lastHookStderr.get(failed) || "";
|
|
659
|
-
const detail = rawDetail.length > 0 ? rawDetail.slice(-400) : "(no stderr captured)";
|
|
660
|
-
if (looksLikeInfrastructureFailure(rawDetail)) {
|
|
661
|
-
// Never let a broken hook runtime or misrouted child-process
|
|
662
|
-
// stderr (yargs help, Node crash, ENOENT, timeout) masquerade
|
|
663
|
-
// as a policy block. Log the infra hit and let the user keep
|
|
664
|
-
// working regardless of strictness.
|
|
665
|
-
logToFile(
|
|
666
|
-
"infra: " + failed + " non-zero exit with non-guard stderr — treated as infrastructure failure, tool allowed. " +
|
|
667
|
-
"stderr=" + detail.replace(/\\s+/g, " ").slice(0, 300)
|
|
668
|
-
);
|
|
669
|
-
return;
|
|
670
|
-
}
|
|
671
|
-
const strictness = await resolveStrictness();
|
|
672
|
-
if (strictness !== "strict") {
|
|
673
|
-
// Advisory mode (the default) — every guard refusal is a hint,
|
|
674
|
-
// not a hard stop. Users report the "failure" as a log line
|
|
675
|
-
// and keep working. Only \`strictness: strict\` in config.yaml
|
|
676
|
-
// or CCLAW_STRICTNESS=strict upgrades this to a thrown block.
|
|
677
|
-
logToFile(
|
|
678
|
-
"advisory: " + failed + " flagged tool.execute.before (strictness=" +
|
|
679
|
-
strictness + "). detail=" + detail.replace(/\\s+/g, " ").slice(0, 300)
|
|
680
|
-
);
|
|
681
|
-
return;
|
|
682
|
-
}
|
|
683
|
-
throw new Error(
|
|
684
|
-
"cclaw " + failed + " blocked tool.execute.before.\\n" +
|
|
685
|
-
"Reason: " + detail + "\\n" +
|
|
686
|
-
"Diagnose: run \`npx cclaw-cli sync\` in project root.\\n" +
|
|
687
|
-
"Bypass (temporary): export CCLAW_DISABLE=1 before starting OpenCode,\\n" +
|
|
688
|
-
"or set \`strictness: advisory\` in .cclaw/config.yaml."
|
|
689
|
-
);
|
|
690
|
-
}
|
|
691
|
-
},
|
|
692
|
-
"tool.execute.after": async (input, output) => {
|
|
693
|
-
const payload = normalizeToolPayload(input, output);
|
|
694
|
-
await runHookScript("context-monitor", payload);
|
|
695
|
-
void refreshBootstrapCache(false);
|
|
696
|
-
},
|
|
697
620
|
"experimental.chat.system.transform": (payload) => {
|
|
698
621
|
const bootstrap = getBootstrap();
|
|
699
622
|
if (!bootstrap) return payload;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function adaptiveElicitationSkillMarkdown(): string;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { RUNTIME_ROOT } from "../constants.js";
|
|
2
|
+
import { questionBudgetHint } from "../track-heuristics.js";
|
|
3
|
+
import { FLOW_TRACKS } from "../types.js";
|
|
4
|
+
const ELICITATION_STAGES = ["brainstorm", "scope", "design"];
|
|
5
|
+
function renderQuestionBudgetHintTable() {
|
|
6
|
+
const rows = [];
|
|
7
|
+
for (const track of FLOW_TRACKS) {
|
|
8
|
+
for (const stage of ELICITATION_STAGES) {
|
|
9
|
+
const hint = questionBudgetHint(track, stage);
|
|
10
|
+
rows.push(`| \`${track}\` | \`${stage}\` | ${hint.min} | ${hint.recommended} | ${hint.hardCapWarning} |`);
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
return `| Track | Stage | Min | Recommended | Hard cap warning |
|
|
14
|
+
|---|---|---|---|---|
|
|
15
|
+
${rows.join("\n")}`;
|
|
16
|
+
}
|
|
17
|
+
export function adaptiveElicitationSkillMarkdown() {
|
|
18
|
+
const budgetTable = renderQuestionBudgetHintTable();
|
|
19
|
+
return `---
|
|
20
|
+
name: adaptive-elicitation
|
|
21
|
+
description: "Harness-native one-question-at-a-time dialogue for brainstorm/scope/design with stop signals, smart-skip, and append-only Q&A logging."
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
# Adaptive Elicitation
|
|
25
|
+
|
|
26
|
+
Pinned anchor: "Don't tell it what to do, give it success criteria and watch it go."
|
|
27
|
+
|
|
28
|
+
## HARD-GATE
|
|
29
|
+
- User does not run cclaw manually. Do not tell the user to run CLI commands for answers.
|
|
30
|
+
- Ask exactly one question per turn and wait for the answer before asking the next one.
|
|
31
|
+
- Use harness-native question tools first; prose fallback is allowed only when the tool is unavailable.
|
|
32
|
+
- Keep a running Q&A trace in the active artifact under \`## Q&A Log\` in \`${RUNTIME_ROOT}/artifacts/\` as append-only rows.
|
|
33
|
+
|
|
34
|
+
## Harness Question Surface
|
|
35
|
+
|
|
36
|
+
Preferred native tool names:
|
|
37
|
+
- Claude Code: \`AskUserQuestion\`
|
|
38
|
+
- Codex: \`request_user_input\`
|
|
39
|
+
- Gemini: \`ask_user\`
|
|
40
|
+
- Cursor: \`AskQuestion\`
|
|
41
|
+
|
|
42
|
+
If unavailable, ask one concise prose question and explicitly wait for chat answer.
|
|
43
|
+
|
|
44
|
+
## Core Protocol
|
|
45
|
+
|
|
46
|
+
1. Ask one decision-changing question.
|
|
47
|
+
2. Wait for the answer.
|
|
48
|
+
3. Append one row to \`## Q&A Log\`: \`Turn | Question | User answer (1-line) | Decision impact\`.
|
|
49
|
+
4. Self-evaluate:
|
|
50
|
+
- What did I learn?
|
|
51
|
+
- Is context enough to draft now? (yes/no + reason)
|
|
52
|
+
- If no, what is the next most decision-changing question?
|
|
53
|
+
5. Repeat until context is clear OR user asks to proceed.
|
|
54
|
+
|
|
55
|
+
## Question Shape Rules
|
|
56
|
+
|
|
57
|
+
- Prefer single-select multiple choice when one direction/priority/next step must be chosen.
|
|
58
|
+
- Use multi-select only for compatible sets (goals, constraints, non-goals).
|
|
59
|
+
- Smart-skip questions already answered earlier (directly or implicitly) and log "skipped (already covered)" when relevant.
|
|
60
|
+
|
|
61
|
+
## Stop Signals (Natural Language)
|
|
62
|
+
|
|
63
|
+
Treat these as stop-and-draft signals:
|
|
64
|
+
- RU: "достаточно", "хватит", "давай драфт"
|
|
65
|
+
- EN: "enough", "skip", "just draft it", "stop asking", "move on"
|
|
66
|
+
- UA: "досить", "вистачить", "давай драфт", "рухаємось далі"
|
|
67
|
+
|
|
68
|
+
When detected:
|
|
69
|
+
- Do not ask another question in this stage loop.
|
|
70
|
+
- Move to drafting with available context.
|
|
71
|
+
- For internal agent calls only, pass \`--skip-questions\` on the next advance helper call.
|
|
72
|
+
|
|
73
|
+
## Conditional Grilling (Only On Risk Triggers)
|
|
74
|
+
|
|
75
|
+
Ask an extra 3-5 sharp questions only when one of these triggers appears:
|
|
76
|
+
- Irreversibility (data deletion, schema migration, breaking API/contract)
|
|
77
|
+
- Security/auth boundary changes
|
|
78
|
+
- Domain-model ambiguity with multiple plausible invariants
|
|
79
|
+
|
|
80
|
+
Do not ask extra questions "for theater" on simple low-risk work.
|
|
81
|
+
|
|
82
|
+
## Question Budget Hint (Soft Guidance)
|
|
83
|
+
|
|
84
|
+
Use as orientation, never as a hard stop. Source of truth is \`questionBudgetHint(track, stage)\`:
|
|
85
|
+
|
|
86
|
+
${budgetTable}
|
|
87
|
+
|
|
88
|
+
Track mapping note: \`quick\` ~= lightweight, \`medium\` ~= standard, \`standard\` ~= deep.
|
|
89
|
+
Stop based on clarity/user signal, not raw count.
|
|
90
|
+
|
|
91
|
+
## Stage Forcing Questions
|
|
92
|
+
|
|
93
|
+
Always keep at least one unresolved forcing question in play until answered or explicitly waived:
|
|
94
|
+
|
|
95
|
+
- Brainstorm:
|
|
96
|
+
- What pain are we solving?
|
|
97
|
+
- What is the most direct path?
|
|
98
|
+
- What happens if we do nothing?
|
|
99
|
+
- Who is the operator/user impacted first?
|
|
100
|
+
- What are non-negotiable no-go boundaries?
|
|
101
|
+
- Scope:
|
|
102
|
+
- What is definitely in and definitely out?
|
|
103
|
+
- Which decisions are already locked upstream?
|
|
104
|
+
- What is the rollback path if this fails?
|
|
105
|
+
- What are the top failure modes we must design for?
|
|
106
|
+
- Design:
|
|
107
|
+
- What is the data flow end-to-end?
|
|
108
|
+
- Where are the seams/interfaces and ownership boundaries?
|
|
109
|
+
- Which invariants must always hold?
|
|
110
|
+
- What will we explicitly NOT refactor now?
|
|
111
|
+
|
|
112
|
+
## One-Way Override (Irreversible Decisions)
|
|
113
|
+
|
|
114
|
+
For irreversible moves (deletion, schema migration, breaking API):
|
|
115
|
+
- Ask for explicit confirmation even if user asked to stop questions.
|
|
116
|
+
- Proceed only after explicit override ("I understand the irreversible risk; proceed").
|
|
117
|
+
- Record the override in \`## Q&A Log\` and in the stage artifact decision section.
|
|
118
|
+
|
|
119
|
+
## Completion Rule
|
|
120
|
+
|
|
121
|
+
"Continue until clear OR user wants to proceed."
|
|
122
|
+
Never force a fixed N-question script.`;
|
|
123
|
+
}
|
package/dist/content/skills.js
CHANGED
|
@@ -148,11 +148,12 @@ function contextLoadingBlock(stage, trace, executionModel) {
|
|
|
148
148
|
|
|
149
149
|
Before execution:
|
|
150
150
|
1. Read \`.cclaw/state/flow-state.json\`.
|
|
151
|
+
- If the file is missing, do **not** invent an active run — this is normal for fresh init. Route to \`/cc <idea>\` first.
|
|
151
152
|
2. Load active artifacts from \`.cclaw/artifacts/\`.
|
|
152
153
|
3. Load upstream artifacts required by this stage:
|
|
153
154
|
${readLines}
|
|
154
155
|
4. Read the state contract from \`.cclaw/templates/state-contracts/<stage>.json\` for required fields, taxonomies, and derived markdown path.
|
|
155
|
-
5. Read the canonical artifact template at \`${artifactTemplatePath}\`
|
|
156
|
+
5. Read the canonical artifact template at \`${artifactTemplatePath}\` to preserve heading/per-row tables contracts (stable section names and column order) plus calibrated review block scaffolding. Preserve existing substantive bullets/rows already in the artifact; never overwrite the artifact wholesale from the template — patch only sections you author this turn.
|
|
156
157
|
6. Extract upstream decisions, constraints, and open questions into the current artifact's \`Upstream Handoff\` section when present.
|
|
157
158
|
7. Confirm context readiness: upstream artifact freshness, required context, canonical template shape, relevant in-repo/reference patterns, and unresolved blockers are known. If any item is missing, load it or stop before drafting.
|
|
158
159
|
8. Before doing stage work, give a compact user-facing drift preamble: "Carrying forward: <1-3 bullets>. Drift since upstream: None / <specific drift>. Recommendation: continue / re-scope."
|
|
@@ -376,7 +377,7 @@ function completionParametersBlock(schema, track) {
|
|
|
376
377
|
- \`delegation lifecycle proof\`: use the delegation helper recipe in this section with explicit lifecycle rows: \`--status=scheduled\` -> \`--status=launched\` -> \`--status=acknowledged\` -> \`--status=completed\` (completed isolated/generic requires prior ACK for the same span or \`--ack-ts=<iso>\`).
|
|
377
378
|
- Fill \`## Learnings\` before closeout: either \`- None this stage.\` or JSON bullets with required keys \`type\`, \`trigger\`, \`action\`, \`confidence\` (knowledge-schema compatible).
|
|
378
379
|
- Record mandatory delegation lifecycle in \`${RUNTIME_ROOT}/state/delegation-log.json\` and append proof events to \`${RUNTIME_ROOT}/state/delegation-events.jsonl\`; the ledger is current state, the event log is audit proof.${mandatoryAgents.length > 0 ? ` If a mandatory delegation cannot run in this harness, use \`--waive-delegation=${mandatoryAgents.join(",")} --waiver-reason="<why safe>"\` on the completion helper.` : ""} If proactive delegations were intentionally skipped, rerun only with \`--accept-proactive-waiver\` (optionally \`--accept-proactive-waiver-reason="<why safe>"\`) after explicit user approval.
|
|
379
|
-
- Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If
|
|
380
|
+
- Never edit raw \`flow-state.json\` to complete a stage, even in advisory mode; that bypasses validation, gate evidence, and Learnings harvest. If a helper fails, report a one-line human-readable failure plus fenced JSON diagnostics; never echo the invoking command line or apply a manual state workaround.
|
|
380
381
|
- Completion protocol: verify required gates, update the artifact, then use the completion helper with \`--evidence-json\` and \`--passed\` for every satisfied gate.
|
|
381
382
|
`;
|
|
382
383
|
}
|
|
@@ -636,10 +637,11 @@ CLI commands, using existing \`cclaw run resume\` and \`internal verify-current-
|
|
|
636
637
|
## Process
|
|
637
638
|
|
|
638
639
|
1. **Wave Start**: author wave plan as \`.cclaw/wave-plans/<wave-n>.md\` referencing previous wave's ship artifact.
|
|
639
|
-
2. **Carry-forward Audit**: at brainstorm of the next wave, re-read previous wave ship artifact and explicitly record:
|
|
640
|
-
- Carrying forward: <
|
|
640
|
+
2. **Carry-forward Audit**: at brainstorm of the next wave, re-read previous wave ship artifact and explicitly record in the existing \`## Wave Carry-forward\` section:
|
|
641
|
+
- Carrying forward: <scope LD# hash references still valid>
|
|
641
642
|
- Drift detected: <decisions no longer valid + reason>
|
|
642
643
|
- Re-scope needed: <yes/no>
|
|
644
|
+
- Never create a second \`## Locked Decisions\` heading in brainstorm; reference prior LD# hashes inline.
|
|
643
645
|
3. **Resume Path**: if a wave was interrupted mid-stage, \`cclaw run resume\` restores state. Run \`internal verify-current-state\` before continuing.
|
|
644
646
|
4. **Wave End**: at ship, architect cross-stage verification runs from dispatch matrix. If \`DRIFT_DETECTED\`, fix before ship.
|
|
645
647
|
5. **Next Wave Trigger**: launch new \`/cc <topic>\` for next wave and reference previous wave ship artifact in upstream handoff.
|
|
@@ -37,14 +37,16 @@ export const BRAINSTORM = {
|
|
|
37
37
|
executionModel: {
|
|
38
38
|
checklist: [
|
|
39
39
|
"**Explore project context** — inspect existing files/docs/recent activity before asking what to build; capture matching files/patterns/seeds in `Context > Discovered context` so downstream stages don't redo discovery.",
|
|
40
|
+
"**Adaptive elicitation loop (shared skill)** — load `.cclaw/skills/adaptive-elicitation/SKILL.md` and run one decision-changing question at a time via harness-native question tools. After each answer, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`). Continue until context is clear or user signals to proceed.",
|
|
41
|
+
"**Brainstorm forcing questions (must be covered or explicitly waived)** — what pain are we solving, what is the direct path, what happens if we do nothing, who is the first operator/user affected, and what no-go boundaries are non-negotiable.",
|
|
40
42
|
"**Classify stage depth** — choose `lite` for clear low-risk tasks, `standard` for normal engineering/product changes, or `deep` for ambiguity, architecture, external dependency, security/data risk, or explicit think-bigger requests.",
|
|
41
43
|
"**Write the Problem Decision Record** — pick a free-form `Frame type` label that names how this work is framed (examples: product, technical-maintenance, research-spike, ops-incident, infrastructure), then fill the universal Framing fields: affected user/role/operator, current state/failure mode/opportunity, desired observable outcome, evidence/signal, why now, do-nothing consequence, and non-goals.",
|
|
42
44
|
"**Premise check (one pass)** — answer the three gstack-style questions in the artifact body: *Right problem? Direct path? What if we do nothing?* Take a position; do not hedge.",
|
|
43
45
|
"**Reframe with How Might We** — write a single `How Might We …?` line that names the user/operator, the desired outcome, and the constraint. This is the altitude check before approaches.",
|
|
44
46
|
"**Run Clarity Gate** — record ambiguity score (0.00-1.00), decision boundaries, reaffirmed non-goals, and residual-risk handoff before locking recommendations. If ambiguity remains high (>0.40), ask one decision-changing question before recommending.",
|
|
45
47
|
"**Sharpening question discipline** — ask one decision-changing question at a time. Do not default to 3-5 batched questions; record only questions that changed the direction or a critical stop decision.",
|
|
46
|
-
"**Use compact discovery for low-risk asks** — for concrete bounded requests, do one context pass, compare one baseline and one challenger,
|
|
47
|
-
"**Early-exit concrete asks** — for unambiguous implementation-only requests, write a compact Problem Decision Record plus short-circuit handoff (context, approved intent, constraints, assumptions, next-stage risks) and
|
|
48
|
+
"**Use compact discovery for low-risk asks** — for concrete bounded requests, do one context pass, compare one baseline and one challenger, and move to draft once context is sufficient; do not drag the user through a full workshop.",
|
|
49
|
+
"**Early-exit concrete asks** — for unambiguous implementation-only requests, write a compact Problem Decision Record plus short-circuit handoff (context, approved intent, constraints, assumptions, next-stage risks) and request explicit approval when the draft is ready.",
|
|
48
50
|
"**Ask only decision-changing questions** — one at a time; if answers would not change approach and are non-critical preference/default assumptions, state the assumption and continue; STOP on scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval uncertainty.",
|
|
49
51
|
"**Compare 2-3 distinct approaches with stable Role/Upside columns** — Role values are `baseline` | `challenger` | `wild-card`; Upside is `low` | `modest` | `high` | `higher`; include real trade-offs, reuse notes, and reference-pattern source/disposition when a known pattern influenced the option; include exactly one challenger with explicit `high` or `higher` upside.",
|
|
50
52
|
"**Collect reaction before recommending** — ask which option feels closest and what concern remains, then recommend based on that reaction.",
|
|
@@ -52,14 +54,16 @@ export const BRAINSTORM = {
|
|
|
52
54
|
"**Run early Ralph loop discipline** — after each producer iteration, append a `Critic Pass` JSONL row to `.cclaw/state/early-loop-log.jsonl`, refresh `.cclaw/state/early-loop.json`, and iterate until open concerns clear or convergence guard escalates.",
|
|
53
55
|
"**Embedded Grill (post-pick)** — after `Selected Direction` is named, run 3-5 sharp checks on hidden constraints, reversibility/rollback, scope boundaries, existing-pattern conformance, and domain-language fit; record each question with recommended answer and disposition (accept/refine/reject).",
|
|
54
56
|
"**Self-review before user approval** — re-read the artifact and patch contradictions, weak trade-offs, placeholders, ambiguity, and weak handoff language. Record the result in `Self-Review Notes` using the calibrated review format: `- Status: Approved` (or `Issues Found`), `- Patches applied:` with inline note or sub-bullets, `- Remaining concerns:` with inline note or sub-bullets. Use `Patches applied: None` and `Remaining concerns: None` when there is nothing to record.",
|
|
55
|
-
"**Request explicit approval** — state exactly what direction is being approved; do not advance without approval and artifact review.",
|
|
57
|
+
"**Request explicit approval to close the stage** — state exactly what direction is being approved after the adaptive elicitation loop converges; do not advance without approval and artifact review.",
|
|
56
58
|
"**Handoff packet** — only after approval, produce a scope handoff packet with selected direction, why rejected options were rejected, explicit non-goals, unresolved questions, risk hints, and explicit drift from the initial ask so scope starts from locked upstream decisions instead of rediscovering intent."
|
|
57
59
|
],
|
|
58
60
|
interactionProtocol: [
|
|
61
|
+
"\"If something is unclear, stop. Name what's confusing. Ask.\"",
|
|
59
62
|
"Start from observed project context; if the idea is vague, first narrow the project type with **one** structured question, then keep going.",
|
|
60
63
|
"Select depth explicitly: `lite`, `standard`, or `deep`; keep lite concise, but escalate when risk/ambiguity changes decisions.",
|
|
61
64
|
"Lead with the premise check (right problem / direct path / what if nothing) and the `How Might We` reframing before approaches; both go in the artifact, not just the chat.",
|
|
62
65
|
"Ask at most one question per turn, only when decision-changing; if using a structured question tool, send exactly one question object, not a multi-question form.",
|
|
66
|
+
"Run the shared adaptive elicitation cycle from `.cclaw/skills/adaptive-elicitation/SKILL.md`, including stop-signal handling (RU/EN/UA), smart-skip, conditional grilling triggers, and append-only `## Q&A Log` updates.",
|
|
63
67
|
"Only non-critical preference/default assumptions may continue inline. STOP and ask when uncertainty affects scope, architecture, security, data loss, public API, migration, auth/pricing, or user approval.",
|
|
64
68
|
"For simple low-risk greenfield work, present a compact A/B choice with one recommended path and one higher-upside challenger; keep the artifact concise but structurally complete (Context, Premise, How Might We, Sharpening Questions, Approaches, Reaction, Selected Direction, Not Doing).",
|
|
65
69
|
"Show approaches before the recommendation; include a higher-upside challenger and gather reaction first.",
|
|
@@ -40,6 +40,8 @@ export const DESIGN = {
|
|
|
40
40
|
},
|
|
41
41
|
executionModel: {
|
|
42
42
|
checklist: [
|
|
43
|
+
"**Adaptive elicitation loop (shared skill)** — load `.cclaw/skills/adaptive-elicitation/SKILL.md` and run one decision-changing question per turn via harness-native tools. After each answer, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`). Continue until architecture context is clear or user signals to proceed.",
|
|
44
|
+
"**Design forcing questions (must be covered or explicitly waived)** — what is the end-to-end data flow, where are seams/ownership boundaries, which invariants must hold, and what will explicitly NOT be refactored now.",
|
|
43
45
|
"Compact design lock — design does not decide what to build; it decides how the approved scope works. For simple slices, produce a tight lock: upstream handoff, existing fit, architecture boundary, one labeled diagram, data/state flow, critical path, failure/rescue, trust boundaries, test/perf expectations, rollout/rollback, rejected alternative, and spec handoff.",
|
|
44
46
|
"Trivial-Change Escape Hatch — for <=3 files, no new interfaces, and no cross-module data flow, produce a mini-design (rationale, changed files, one risk) and proceed to spec.",
|
|
45
47
|
"Tiered Research — for simple/medium work, do compact inline codebase/research synthesis in `Research Fleet Synthesis`; write `.cclaw/artifacts/02a-research.md` and run the full fleet only for deep/high-risk work or when external framework/architecture uncertainty exists.",
|
|
@@ -55,8 +57,10 @@ export const DESIGN = {
|
|
|
55
57
|
"Capture leftovers — seed high-upside deferred ideas, list unresolved decisions with defaults, document distribution for new artifact types, and cross-reference deferred items to scope or unresolved decisions."
|
|
56
58
|
],
|
|
57
59
|
interactionProtocol: [
|
|
60
|
+
"\"Constrain, don't micromanage - enforce invariants, separate the doer from the checker.\"",
|
|
58
61
|
"Review section-by-section: investigator first, critic second, then reconcile. For simple apps, collapse this into one compact design lock with explicit risks and a single approval stop.",
|
|
59
62
|
"Present each issue one at a time; do not batch issues or move sections until current issues are resolved.",
|
|
63
|
+
"Run the shared adaptive elicitation cycle from `.cclaw/skills/adaptive-elicitation/SKILL.md`, including stop-signal handling (RU/EN/UA), smart-skip, conditional grilling triggers, and append-only `## Q&A Log` updates.",
|
|
60
64
|
decisionProtocolInstruction("each issue", "describe concretely with file/line references, present labeled options (A/B/C) with trade-offs, effort estimate (S/M/L/XL), risk level (Low/Med/High), and mark one as (recommended)", "recommend the option that closes the issue with the smallest blast radius and clearest verification path"),
|
|
61
65
|
"If a section has no issues, say 'No issues found' and move on.",
|
|
62
66
|
"Do not skip failure-mode mapping; use Method/Exception/Rescue/UserSees and treat silent user impact without rescue as critical.",
|
|
@@ -90,7 +94,7 @@ export const DESIGN = {
|
|
|
90
94
|
"Artifact written to `.cclaw/artifacts/03-design-<slug>.md`.",
|
|
91
95
|
"Failure-mode table exists in Method/Exception/Rescue/UserSees format.",
|
|
92
96
|
"Tier-required diagram markers are present: architecture (all tiers). Standard/Deep add-ons (shadow/error) and Deep add-ons (state-machine/rollback/deployment-sequence) are included only when risk warrants them.",
|
|
93
|
-
"Stale diagram audit finding is clear
|
|
97
|
+
"Stale diagram audit finding is clear: no blast-radius file newer than diagram markers without explicit update.",
|
|
94
98
|
"Security & threat model findings are documented with mitigations.",
|
|
95
99
|
"Observability and deployment plans are explicit for critical flows.",
|
|
96
100
|
"Outside-voice findings and dispositions are recorded (accept/reject/defer).",
|
|
@@ -161,7 +165,7 @@ export const DESIGN = {
|
|
|
161
165
|
{ section: "Data-Flow Shadow Paths", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: data-flow-shadow-paths -->` marker plus a table for high-risk choices: chosen path, shadow alternative, switch trigger, failure/rescue/degraded behavior, and verification evidence." },
|
|
162
166
|
{ section: "Error Flow Diagram", required: false, validationRule: "Standard/Deep add-on: include `<!-- diagram: error-flow -->` marker and failure-detection -> rescue -> user-visible outcome flow." },
|
|
163
167
|
{ section: "Data Flow", required: false, validationRule: "Must include data/state flow, happy path, nil input, empty input, upstream error paths, plus Interaction Edge Case matrix rows for double-click, nav-away-mid-request, 10K-result dataset, background-job abandonment, zombie connection. Each row declares handled yes/no and deferred item when not handled." },
|
|
164
|
-
{ section: "Stale Diagram Audit", required: false, validationRule: "
|
|
168
|
+
{ section: "Stale Diagram Audit", required: false, validationRule: "Blast-radius files from Codebase Investigation must not be newer than the current design diagram-marker baseline unless explicitly refreshed." },
|
|
165
169
|
{ section: "Failure Mode Table", required: true, validationRule: "Use Method/Exception/Rescue/UserSees columns and treat silent user impact without rescue as critical." },
|
|
166
170
|
{ section: "Pre-mortem", required: false, validationRule: "Recommended: list top failure scenarios, early warning signal, mitigation owner, and containment action before implementation." },
|
|
167
171
|
{ section: "Security & Threat Model", required: true, validationRule: "Must list trust boundaries, abuse/failure scenarios, mitigations, and residual risks." },
|
|
@@ -45,7 +45,7 @@ export const PLAN = {
|
|
|
45
45
|
"Group tasks into dependency batches — batch N+1 cannot start until batch N has verification evidence.",
|
|
46
46
|
"Slice into vertical tasks — each task targets 2-5 minutes, produces one testable outcome, and touches one coherent area.",
|
|
47
47
|
"Task Contract — every task has one coherent outcome, AC mapping, exact verification command/manual step, and expected evidence snippet or pass condition. Avoid vague `run tests` wording.",
|
|
48
|
-
"Annotate slice-review metadata —
|
|
48
|
+
"Annotate slice-review metadata — task rows may carry `touchCount` (rough number of files expected to change), `touchPaths` (glob hints, e.g. `migrations/**`, `src/auth/**`), and optional `highRisk: true` to force a review pass. These fields feed the TDD stage's Per-Slice Review point.",
|
|
49
49
|
"Map scope Locked Decisions — every LD#hash anchor from scope is referenced by at least one plan task (or explicitly marked deferred with reason).",
|
|
50
50
|
"Run anti-placeholder + anti-scope-reduction scans — block `TODO/TBD/...` and phrasing like `v1`, `for now`, `later` for locked boundaries.",
|
|
51
51
|
"Define validation points — mark where progress must be checked before continuing, with concrete command and expected evidence.",
|
|
@@ -127,7 +127,7 @@ export const PLAN = {
|
|
|
127
127
|
{ section: "Upstream Handoff", required: false, validationRule: "Summarizes spec/design/scope decisions, constraints, open questions, and explicit drift before task breakdown." },
|
|
128
128
|
{ section: "Dependency Graph", required: false, validationRule: "Ordering and parallel opportunities explicit. No circular dependencies." },
|
|
129
129
|
{ section: "Dependency Batches", required: true, validationRule: "Every task belongs to a batch. Each batch has an exit gate and dependency statement." },
|
|
130
|
-
{ section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, exact verification command/manual step, expected evidence/pass condition, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When
|
|
130
|
+
{ section: "Task List", required: true, validationRule: "Each task row includes ID, description, acceptance criterion, exact verification command/manual step, expected evidence/pass condition, and effort estimate (S/M/L). Every task must also carry a minutes estimate within the 2-5 minute budget. When present, touchCount/touchPaths/highRisk metadata drives Per-Slice Review escalation in TDD." },
|
|
131
131
|
{ section: "Acceptance Mapping", required: true, validationRule: "Every spec criterion is covered by at least one task." },
|
|
132
132
|
{ section: "Execution Posture", required: true, validationRule: "States sequential/batch/parallel posture, stop conditions, risk triggers, and RED/GREEN/REFACTOR checkpoint or commit expectations for TDD when consistent with the repo workflow." },
|
|
133
133
|
{ section: "Locked Decision Coverage", required: false, validationRule: "Every locked decision ID (D-XX) from scope is listed with linked task IDs or explicit defer rationale." },
|
|
@@ -45,10 +45,12 @@ export const SCOPE = {
|
|
|
45
45
|
},
|
|
46
46
|
executionModel: {
|
|
47
47
|
checklist: [
|
|
48
|
+
"**Adaptive elicitation loop (shared skill)** — load `.cclaw/skills/adaptive-elicitation/SKILL.md` and run one decision-changing question per turn via harness-native tools. After each answer, append one row to `## Q&A Log` (`Turn | Question | User answer (1-line) | Decision impact`). Continue until scope clarity is sufficient or user signals to proceed.",
|
|
49
|
+
"**Scope forcing questions (must be covered or explicitly waived)** — what is definitely in/out, which upstream decisions are locked, and what rollback path protects users if scope assumptions fail.",
|
|
48
50
|
"**Scope contract first** — read brainstorm handoff, name upstream decisions used, explicit drift, confidence, unresolved questions, and next-stage risk hints; draft the in-scope/out-of-scope/deferred/discretion contract before any design choice.",
|
|
49
51
|
"**Premise and leverage check** — answer in the artifact: *Right problem? Direct path? What if nothing? Where can we leverage existing code? What is the reversibility cost?* Take a position; do not hedge.",
|
|
50
52
|
"**Conditional 10-star boundary** — for deep/high-risk/product-strategy work, show what would make the product meaningfully better, then explicitly choose what ships now, what is deferred, and what is excluded without vague `later/for now` placeholders. Skip this for straightforward repair work and record `not needed: compact scope`.",
|
|
51
|
-
"**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then
|
|
53
|
+
"**Pick one operational mode with the user** — HOLD SCOPE preserves focus; SELECTIVE EXPANSION cherry-picks high-leverage reference ideas; SCOPE EXPANSION explores ambitious alternatives; SCOPE REDUCTION cuts to the essential wedge. Recommend one, state why and what signal would change it, then keep elicitation focused until the user either approves or asks to proceed with draft boundaries.",
|
|
52
54
|
"**Run mode-specific analysis only to needed depth** — lite keeps the selected-mode row compact; standard adds requirements/locked decisions/discretion; deep may add Landscape Check, Taste Calibration, Reference Pattern Registry, Reference Pull, Ambitious Alternatives, and Ruthless Minimum Slice evidence when mode/risk warrants it.",
|
|
53
55
|
"**Decision-driver contract** — list weighted decision drivers (value, risk, reversibility, effort, timeline) and score candidate scope moves so the selected mode and boundaries are evidence-backed, not preference-led.",
|
|
54
56
|
"**Compare implementation alternatives** — include minimum viable, product-grade, and ideal architecture options with effort (S/M/L/XL), risk (Low/Med/High), pros, cons, and reuses. Recommend one and tie it to mode.",
|
|
@@ -58,9 +60,11 @@ export const SCOPE = {
|
|
|
58
60
|
"**Write the scope contract after approval** — include selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, design handoff, completion dashboard, and explicit approval evidence."
|
|
59
61
|
],
|
|
60
62
|
interactionProtocol: [
|
|
63
|
+
"\"Strong success criteria let you loop independently. Weak criteria require constant clarification.\"",
|
|
61
64
|
decisionProtocolInstruction("scope mode selection", "present expand/selective/hold/reduce as labeled options with trade-offs and mark one as (recommended)", "recommend the option that best covers the prime-directive failure modes, four data-flow paths, observability, and deferred handling for the in-scope set with the smallest blast radius. Base your recommendation on default heuristics: greenfield -> expand, enhancement -> selective, bugfix/hotfix/refactor -> hold, broad blast radius -> reduce"),
|
|
65
|
+
"Run the shared adaptive elicitation cycle from `.cclaw/skills/adaptive-elicitation/SKILL.md`, including stop-signal handling (RU/EN/UA), smart-skip, conditional grilling triggers, and append-only `## Q&A Log` updates.",
|
|
62
66
|
"Do not walk the full checklist by default. Lead with a proposed scope contract, selected depth (`lite`/`standard`/`deep`), and the one decision that matters most; label the mode as recommended, not selected, until the user answers.",
|
|
63
|
-
"For low-risk concrete asks, keep the proposal compact but still explicit: recommend (do not auto-select) one mode, show exact in/out/deferred boundaries, and
|
|
67
|
+
"For low-risk concrete asks, keep the proposal compact but still explicit: recommend (do not auto-select) one mode, show exact in/out/deferred boundaries, and request explicit approval before finalizing the artifact or completing the stage.",
|
|
64
68
|
"Challenge premise first, take a firm position, and name one concrete condition that would change it.",
|
|
65
69
|
"Push back on weak framing: vague scope needs a specific user/problem, platform vision needs a narrow wedge, social proof needs behavioral evidence.",
|
|
66
70
|
"Resolve one structural scope issue at a time. Only non-critical preference/default assumptions may continue; STOP on uncertainty about scope boundary, architecture commitment, security, data loss, public API, migration, auth/pricing, or required user approval.",
|
|
@@ -70,7 +74,7 @@ export const SCOPE = {
|
|
|
70
74
|
"**STOP BEFORE ADVANCE.** Mandatory delegation `planner` must be completed or explicitly waived for a real blocker. If the active harness cannot isolate a planner, run a role-switch planner pass instead: announce `## cclaw role-switch: scope/planner (mandatory)`, write the planner output/evidence into the scope artifact, and append a completed delegation row with `fulfillmentMode: \"role-switch\"` plus non-empty `evidenceRefs`. Then close with `node .cclaw/hooks/stage-complete.mjs scope --passed=scope_mode_selected,scope_contract_written,scope_user_approved --evidence-json '{\"scope_mode_selected\":\"<user-approved mode + rationale>\",\"scope_contract_written\":\"<artifact path + sections>\",\"scope_user_approved\":\"<explicit user approval quote or summary>\"}'`. `scope_user_approved` must cite the user's approval; review-loop evidence alone is not approval."
|
|
71
75
|
],
|
|
72
76
|
process: [
|
|
73
|
-
"Run
|
|
77
|
+
"Run pre-scope audit before premise challenge.",
|
|
74
78
|
"Run the scope pass scaled to risk: default to job-to-be-done plus explicit scope contract; add premise challenge, 10-star upside, smallest useful wedge, and change conditions only for deep/high-risk scope.",
|
|
75
79
|
"Compare minimum viable, product-grade, and ideal architecture scope alternatives with explicit reuse/effort/risk.",
|
|
76
80
|
"Recommend a scope mode with explicit rationale, then ask for user opt-in before treating it as selected.",
|
|
@@ -85,7 +89,7 @@ export const SCOPE = {
|
|
|
85
89
|
],
|
|
86
90
|
requiredEvidence: [
|
|
87
91
|
"Artifact written to `.cclaw/artifacts/02-scope-<slug>.md`.",
|
|
88
|
-
"
|
|
92
|
+
"Pre-Scope System Audit findings are captured (git log/diff/stash/debt markers).",
|
|
89
93
|
"In-scope and out-of-scope lists are explicit.",
|
|
90
94
|
"Discretion areas are explicit (or marked as `None`).",
|
|
91
95
|
"Selected mode and rationale are documented using HOLD SCOPE, SELECTIVE EXPANSION, SCOPE EXPANSION, or SCOPE REDUCTION.",
|
|
@@ -144,7 +148,7 @@ export const SCOPE = {
|
|
|
144
148
|
},
|
|
145
149
|
artifactValidation: [
|
|
146
150
|
{ section: "Upstream Handoff", required: false, validationRule: "Summarizes brainstorm/idea decisions, constraints, open questions, and explicit drift before scope decisions." },
|
|
147
|
-
{ section: "Pre-Scope System Audit", required:
|
|
151
|
+
{ section: "Pre-Scope System Audit", required: true, validationRule: "Must capture git log -30, git diff --stat, git stash list, and debt-marker scan (TODO/FIXME/XXX/HACK) before premise challenge." },
|
|
148
152
|
{ section: "Prime Directives", required: false, validationRule: "For each scoped capability: named failure modes, explicit error surface, four data-flow paths, interaction edge cases, observability expectations, and deferred-item handling." },
|
|
149
153
|
{ section: "Premise Challenge", required: false, validationRule: "Must list at least 3 question/answer rows in a markdown table or bullet list (gstack default trio: right problem? direct path? what if we do nothing? — extend with leverage and reversibility for richer scope). The linter enforces structure, not English wording — answers may be in any language." },
|
|
150
154
|
{ section: "Scope Contract", required: true, validationRule: "Canonical contract: selected mode, in scope, out of scope, requirements, locked decisions, discretion areas, deferred ideas, accepted/rejected reference ideas, success definition, and design handoff." },
|
|
@@ -41,7 +41,7 @@ export const TDD = {
|
|
|
41
41
|
"Map to acceptance criterion — identify the specific spec criterion this test proves.",
|
|
42
42
|
"Discover the test surface — inspect existing tests, fixtures, helpers, test commands, and nearby assertions before authoring RED. Reuse the local test style unless the slice genuinely needs a new pattern.",
|
|
43
43
|
"Run a system-wide impact check — name callbacks, state transitions, interfaces, schemas, CLI/config/API contracts, persistence, or event boundaries that this slice can affect. Add RED coverage for each affected public contract or record why it is out of scope.",
|
|
44
|
-
"Source/test preflight — before production edits, classify planned paths using
|
|
44
|
+
"Source/test preflight — before production edits, classify planned paths using test-path patterns; verify the RED touches a test path and the GREEN touches only source paths needed for the failing behavior.",
|
|
45
45
|
"Set execution posture — record whether this slice is sequential, batch-safe, or blocked; when the existing git workflow permits small commits, checkpoint after RED, GREEN, and REFACTOR (or record why commits are deferred).",
|
|
46
46
|
"Use the mandatory `test-author` delegation for RED — after discovery and impact check, produce failing behavior tests and RED evidence only (no production edits). Set `CCLAW_ACTIVE_AGENT=tdd-red` when the harness supports phase labels.",
|
|
47
47
|
"RED: Capture failure output — copy the exact failure output as RED evidence. Record in artifact.",
|
|
@@ -52,7 +52,7 @@ export const TDD = {
|
|
|
52
52
|
"REFACTOR: continue the `test-author` evidence cycle (or a dedicated refactor mode when available) to improve code quality without behavior changes. Set `CCLAW_ACTIVE_AGENT=tdd-refactor` when the harness supports phase labels.",
|
|
53
53
|
"Record evidence — capture test discovery, system-wide impact check, RED failure, GREEN output, and REFACTOR notes in the TDD artifact. When logging a `green` row, attach the closed acceptance-criterion IDs in `acIds` so Ralph Loop status counts them.",
|
|
54
54
|
"Annotate traceability — link to the active track's source: plan task ID + spec criterion on standard/medium, or spec acceptance item / bug reproduction slice on quick.",
|
|
55
|
-
"Per-Slice Review (conditional) — if
|
|
55
|
+
"Per-Slice Review (conditional) — if the slice meets any trigger (touchCount >= filesChangedThreshold, touchPaths match touchTriggers, or highRisk=true), append a `## Per-Slice Review` entry for this slice before moving on (see the dedicated section below).",
|
|
56
56
|
"Repeat for each slice — return to step 1 for the next plan slice."
|
|
57
57
|
],
|
|
58
58
|
interactionProtocol: [
|
|
@@ -70,7 +70,7 @@ export const TDD = {
|
|
|
70
70
|
"Use incremental RED/GREEN/REFACTOR commits when the repository workflow and working tree make that appropriate; otherwise record the checkpoint boundaries in the artifact.",
|
|
71
71
|
"Stop if regressions appear and fix before proceeding.",
|
|
72
72
|
"If a test passes unexpectedly, investigate: does the behavior already exist, or is the test wrong?",
|
|
73
|
-
"**Per-Slice Review point (conditional
|
|
73
|
+
"**Per-Slice Review point (conditional).** Check every slice against the triggers before declaring it DONE. Triggers: `touchCount >= filesChangedThreshold`, any `touchPaths` match a `touchTriggers` glob, or the plan row declares `highRisk: true`. On a trigger, run two passes on the slice alone — (1) Spec-Compliance: trace RED/GREEN/REFACTOR evidence back to its plan task + spec criterion, noting edge cases the tests skip; (2) Quality: diff-scan for naming, error handling, dead code, simpler alternatives. Record both under `## Per-Slice Review` in `06-tdd.md`, naming the trigger that fired. Dispatch the `reviewer` subagent natively when available (log `fulfillmentMode: \"isolated\"`); otherwise fulfil via in-session role switch (`fulfillmentMode: \"role-switch\"`). Never fabricate an isolated pass from memory."
|
|
74
74
|
],
|
|
75
75
|
process: [
|
|
76
76
|
"Select one vertical slice and map it to acceptance criterion(s).",
|
|
@@ -81,10 +81,10 @@ export const TDD = {
|
|
|
81
81
|
"Run tests and capture failure output.",
|
|
82
82
|
"Use `test-author` in GREEN intent and implement the smallest change needed for GREEN.",
|
|
83
83
|
"Run full tests and build checks.",
|
|
84
|
-
"Run a fresh verification-before-completion check and capture command + PASS/FAIL plus a commit SHA when
|
|
84
|
+
"Run a fresh verification-before-completion check and capture command + PASS/FAIL plus a commit SHA when `.git` is present; otherwise record explicit no-vcs reason plus content/artifact hash.",
|
|
85
85
|
"Run the REFACTOR intent preserving behavior.",
|
|
86
86
|
"Record RED, GREEN, and REFACTOR evidence in artifact.",
|
|
87
|
-
"Annotate traceability to plan task and spec criterion; on
|
|
87
|
+
"Annotate traceability to plan task and spec criterion; on per-slice triggers, append a Per-Slice Review entry before closing the slice."
|
|
88
88
|
],
|
|
89
89
|
requiredGates: [
|
|
90
90
|
{ id: "tdd_test_discovery_complete", description: "Relevant existing tests, fixtures, helpers, and runnable commands were discovered before RED tests were written." },
|
|
@@ -92,7 +92,7 @@ export const TDD = {
|
|
|
92
92
|
{ id: "tdd_red_test_written", description: "Failing tests exist before implementation changes." },
|
|
93
93
|
{ id: "tdd_green_full_suite", description: "Full relevant suite passes in GREEN state." },
|
|
94
94
|
{ id: "tdd_refactor_completed", description: "Refactor pass completed with behavior preservation verified." },
|
|
95
|
-
{ id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, explicit pass/fail status, and a
|
|
95
|
+
{ id: "tdd_verified_before_complete", description: "Fresh verification evidence includes test command, explicit pass/fail status, and a durable ref: commit SHA when `.git` is present or explicit no-VCS attestation + hash when not." },
|
|
96
96
|
{ id: "tdd_iron_law_acknowledged", description: "Iron Law acknowledgement is explicit (`Acknowledged: yes`) before implementation proceeds." },
|
|
97
97
|
{ id: "tdd_watched_red_observed", description: "Watched-RED Proof records at least one observed failing test with ISO timestamp evidence." },
|
|
98
98
|
{ id: "tdd_slice_cycle_complete", description: "Vertical Slice Cycle records RED, GREEN, and REFACTOR phases per active slice." },
|
|
@@ -106,7 +106,7 @@ export const TDD = {
|
|
|
106
106
|
"Execution posture and vertical-slice RED/GREEN/REFACTOR checkpoint plan recorded, including commit boundaries when the repo workflow supports them.",
|
|
107
107
|
"Failing command output captured (RED).",
|
|
108
108
|
"Full test/build output recorded (GREEN).",
|
|
109
|
-
"Fresh verification evidence recorded with command, PASS/FAIL status, and
|
|
109
|
+
"Fresh verification evidence recorded with command, PASS/FAIL status, and commit SHA or no-VCS reason plus content/artifact hash before completion.",
|
|
110
110
|
"Iron Law Acknowledgement section explicitly states `Acknowledged: yes`.",
|
|
111
111
|
"Watched-RED Proof includes at least one populated row with an ISO timestamp.",
|
|
112
112
|
"Vertical Slice Cycle records RED, GREEN, and REFACTOR per active slice.",
|
|
@@ -125,7 +125,7 @@ export const TDD = {
|
|
|
125
125
|
"behavior changed during refactor",
|
|
126
126
|
"no evidence recorded",
|
|
127
127
|
"RED/GREEN blocked — classify with the managed taxonomy `NO_SOURCE_CONTEXT`, `NO_TEST_SURFACE`, `NO_IMPLEMENTABLE_SLICE`, `RED_NOT_EXPRESSIBLE`, or `NO_VCS_MODE` and capture blockedBecause, missingInputs, recommendedRoute, nextCommand, resumeCriteria, and the repair path: RED needs a failing test surface, GREEN needs full-suite pass evidence, REFACTOR needs behavior-preservation evidence.",
|
|
128
|
-
"no-VCS workspace without explicit
|
|
128
|
+
"no-VCS workspace without explicit no-vcs reason and content/artifact hash"
|
|
129
129
|
],
|
|
130
130
|
exitCriteria: [
|
|
131
131
|
"test discovery and system-wide impact check are recorded",
|
|
@@ -170,7 +170,7 @@ export const TDD = {
|
|
|
170
170
|
{ section: "Test Pyramid Shape", required: false, validationRule: "If present: per-slice count of Small/Medium/Large tests added, to let reviewers verify the suite is not drifting top-heavy." },
|
|
171
171
|
{ section: "Mock Preference Order", required: false, validationRule: "When mocks/spies appear in Test Discovery or RED Evidence, prefer Real > Fake > Stub > Mock. Mock-heavy slices should include explicit boundary justification (for example network/fs/time/external trust boundaries)." },
|
|
172
172
|
{ section: "Prove-It Reproduction", required: false, validationRule: "Required for bug-fix slices: original failing reproduction test (RED without fix), passing output with fix (GREEN), and a note confirming the test fails again if the fix is reverted." },
|
|
173
|
-
{ section: "Per-Slice Review", required: false, validationRule: "
|
|
173
|
+
{ section: "Per-Slice Review", required: false, validationRule: "Per triggered slice, a two-part record — Spec-Compliance (slice <-> plan task <-> spec criterion trace plus edge-case notes) and Quality (diff-focused review of naming, error handling, dead code, simpler alternatives). Each entry names the trigger (touchCount, touchPaths glob, or highRisk) and the delegation fulfillmentMode (`isolated` when a reviewer subagent was dispatched natively; `role-switch` when fulfilled in-session). Slices that did not meet any trigger may list `not triggered` instead of a full pass." }
|
|
174
174
|
]
|
|
175
175
|
},
|
|
176
176
|
reviewLens: {
|
|
@@ -224,9 +224,9 @@ export const TDD = {
|
|
|
224
224
|
{
|
|
225
225
|
title: "Per-Slice Review Audit (conditional)",
|
|
226
226
|
evaluationPoints: [
|
|
227
|
-
"
|
|
227
|
+
"Does every triggered slice (touchCount >= threshold, touchPaths match, or highRisk=true) carry a Per-Slice Review entry with BOTH a Spec-Compliance pass (plan task <-> spec criterion + edge-case notes) AND a Quality pass (diff-level naming/errors/dead code/simpler alternatives)?",
|
|
228
228
|
"Is the delegation `fulfillmentMode` recorded (`isolated` for a dispatched reviewer subagent, `role-switch` for an in-session pass) and does it match an entry in `.cclaw/state/delegation-log.json`?",
|
|
229
|
-
"
|
|
229
|
+
"Are there zero missed triggered slices when triggers fired?"
|
|
230
230
|
],
|
|
231
231
|
stopGate: false
|
|
232
232
|
},
|
|
@@ -90,7 +90,7 @@ ${conversationLanguagePolicyMarkdown()}
|
|
|
90
90
|
If the harness's native ask tool is available (\`AskUserQuestion\` / \`AskQuestion\` / \`question\` / \`request_user_input\`), send exactly ONE question; on schema error, fall back to a plain-text lettered list.
|
|
91
91
|
10. Start the tracked flow only through the managed helper:
|
|
92
92
|
\`node .cclaw/hooks/start-flow.mjs --track=<quick|medium|standard> --class=<class> --prompt=<prompt> --stack=<stack> --reason=<matched heuristic>\`
|
|
93
|
-
If this helper fails, STOP and
|
|
93
|
+
If this helper fails, STOP. Report one human-readable failure line from the JSON \`error\` field, include the helper JSON payload in a fenced \`json\` block, and never echo the invoking command line. Do **not** manually edit \`${flowPath}\`.
|
|
94
94
|
11. The helper persists \`${flowPath}\`, computes \`skippedStages\`, sets the first stage for the track, resets the gate catalog, and writes \`.cclaw/artifacts/00-idea.md\`.
|
|
95
95
|
12. Load the **first-stage skill for the chosen track** and its command file:
|
|
96
96
|
- quick → \`.cclaw/skills/spec/SKILL.md\`
|
|
@@ -105,7 +105,7 @@ If during any stage the agent discovers evidence that contradicts the initial Ph
|
|
|
105
105
|
1. Surface the new evidence in plain text.
|
|
106
106
|
2. Propose the updated \`Class\` + \`Track\` with a one-line reason.
|
|
107
107
|
3. Use the Decision Protocol to let the user accept, override, or cancel.
|
|
108
|
-
4. On acceptance: run \`node .cclaw/hooks/start-flow.mjs --reclassify --track=<new-track> --class=<new-class> --reason=<why>\`. The helper appends a \`Reclassification:\` entry to \`00-idea.md\` and updates flow state atomically. If it fails, STOP and report the
|
|
108
|
+
4. On acceptance: run \`node .cclaw/hooks/start-flow.mjs --reclassify --track=<new-track> --class=<new-class> --reason=<why>\`. The helper appends a \`Reclassification:\` entry to \`00-idea.md\` and updates flow state atomically. If it fails, STOP and report one human-readable line plus the helper JSON payload in a fenced \`json\` block; never echo the invoking command line. Do NOT manually edit \`flow-state.json\`.
|
|
109
109
|
|
|
110
110
|
### Without prompt (\`/cc\`)
|
|
111
111
|
|
|
@@ -187,12 +187,12 @@ ${conversationLanguagePolicyMarkdown()}
|
|
|
187
187
|
|
|
188
188
|
- On conflict, prefer \`standard\` over \`medium\`, and \`medium\` over \`quick\`.
|
|
189
189
|
- Always state the recommendation as a one-line reason citing matched triggers and a high/medium/low track selection confidence. Clarify that the heuristic is advisory until the managed helper writes state; after that, \`/cc\` follows the selected track. Include override guidance: switch to standard when architecture, schema, migration, security, or unclear scope appears; switch to medium when product framing is needed but architecture is known.
|
|
190
|
-
8. Run the managed start helper: \`node .cclaw/hooks/start-flow.mjs --track=<quick|medium|standard> --class=<class> --prompt=<prompt> --stack=<stack> --reason=<matched heuristic>\`. The helper writes \`${flowPath}\`, computes \`skippedStages\`, resets the gate catalog, and writes \`${RUNTIME_ROOT}/artifacts/00-idea.md\`. If it fails, STOP and
|
|
190
|
+
8. Run the managed start helper: \`node .cclaw/hooks/start-flow.mjs --track=<quick|medium|standard> --class=<class> --prompt=<prompt> --stack=<stack> --reason=<matched heuristic>\`. The helper writes \`${flowPath}\`, computes \`skippedStages\`, resets the gate catalog, and writes \`${RUNTIME_ROOT}/artifacts/00-idea.md\`. If it fails, STOP, report one human-readable failure line from the JSON \`error\` field, and include the helper JSON payload in a fenced \`json\` block; do not echo the invoking command line, and do not manually edit flow state.
|
|
191
191
|
9. Load and execute the **first stage skill of the chosen track** (\`brainstorm\` for medium/standard, \`spec\` for quick) plus its matching command file.
|
|
192
192
|
|
|
193
193
|
### Reclassification on discovery
|
|
194
194
|
|
|
195
|
-
If mid-stage evidence contradicts the initial Class/Track decision (the "trivial" change needs a migration, the "quick" bug fix needs architecture work, an origin doc multiplies scope), STOP and re-classify using the Decision Protocol. On acceptance, run \`node .cclaw/hooks/start-flow.mjs --reclassify --track=<new-track> --class=<new-class> --reason=<why>\`; the helper records \`Reclassification:\` in \`00-idea.md\` and updates state atomically.
|
|
195
|
+
If mid-stage evidence contradicts the initial Class/Track decision (the "trivial" change needs a migration, the "quick" bug fix needs architecture work, an origin doc multiplies scope), STOP and re-classify using the Decision Protocol. On acceptance, run \`node .cclaw/hooks/start-flow.mjs --reclassify --track=<new-track> --class=<new-class> --reason=<why>\`; the helper records \`Reclassification:\` in \`00-idea.md\` and updates state atomically. If it fails, report one human-readable line plus the helper JSON payload in a fenced \`json\` block, never echo the invoking command line, and do not rewrite prior artifacts or manually edit flow-state.
|
|
196
196
|
|
|
197
197
|
### Path B: \`/cc\` (no arguments)
|
|
198
198
|
|