cclaw-cli 0.48.35 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -82
- package/dist/artifact-linter.d.ts +4 -0
- package/dist/artifact-linter.js +24 -3
- package/dist/cli.d.ts +1 -19
- package/dist/cli.js +49 -495
- package/dist/constants.d.ts +2 -13
- package/dist/constants.js +1 -46
- package/dist/content/closeout-guidance.d.ts +14 -0
- package/dist/content/closeout-guidance.js +42 -0
- package/dist/content/core-agents.js +51 -9
- package/dist/content/decision-protocol.d.ts +12 -0
- package/dist/content/decision-protocol.js +20 -0
- package/dist/content/diff-command.d.ts +1 -2
- package/dist/content/diff-command.js +8 -94
- package/dist/content/examples.d.ts +4 -10
- package/dist/content/examples.js +10 -20
- package/dist/content/hook-events.js +2 -2
- package/dist/content/hook-inline-snippets.d.ts +5 -2
- package/dist/content/hook-inline-snippets.js +33 -1
- package/dist/content/hook-manifest.d.ts +3 -4
- package/dist/content/hook-manifest.js +11 -12
- package/dist/content/hooks.js +2 -0
- package/dist/content/ideate-command.d.ts +2 -0
- package/dist/content/ideate-command.js +31 -25
- package/dist/content/iron-laws.d.ts +5 -5
- package/dist/content/iron-laws.js +5 -5
- package/dist/content/learnings.d.ts +3 -4
- package/dist/content/learnings.js +24 -50
- package/dist/content/meta-skill.js +31 -24
- package/dist/content/next-command.js +38 -38
- package/dist/content/node-hooks.js +17 -343
- package/dist/content/opencode-plugin.js +2 -100
- package/dist/content/research-playbooks.js +14 -14
- package/dist/content/review-loop.d.ts +2 -0
- package/dist/content/review-loop.js +8 -0
- package/dist/content/session-hooks.js +14 -46
- package/dist/content/skills.d.ts +0 -5
- package/dist/content/skills.js +53 -128
- package/dist/content/stage-common-guidance.d.ts +0 -1
- package/dist/content/stage-common-guidance.js +15 -14
- package/dist/content/stage-schema.d.ts +26 -1
- package/dist/content/stage-schema.js +121 -40
- package/dist/content/stages/_lint-metadata/index.js +9 -15
- package/dist/content/stages/brainstorm.js +22 -43
- package/dist/content/stages/design.js +37 -57
- package/dist/content/stages/plan.js +22 -13
- package/dist/content/stages/review.js +24 -27
- package/dist/content/stages/scope.js +34 -46
- package/dist/content/stages/ship.js +7 -4
- package/dist/content/stages/spec.js +20 -9
- package/dist/content/stages/tdd.js +64 -44
- package/dist/content/start-command.js +10 -12
- package/dist/content/status-command.d.ts +2 -7
- package/dist/content/status-command.js +19 -146
- package/dist/content/subagents.d.ts +0 -5
- package/dist/content/subagents.js +47 -28
- package/dist/content/templates.d.ts +1 -1
- package/dist/content/templates.js +126 -135
- package/dist/content/track-render-context.d.ts +17 -0
- package/dist/content/track-render-context.js +44 -0
- package/dist/content/tree-command.d.ts +1 -2
- package/dist/content/tree-command.js +4 -87
- package/dist/content/utility-skills.d.ts +2 -29
- package/dist/content/utility-skills.js +2 -1533
- package/dist/content/view-command.js +29 -11
- package/dist/delegation.d.ts +1 -1
- package/dist/delegation.js +5 -15
- package/dist/doctor-registry.js +20 -21
- package/dist/doctor.js +88 -408
- package/dist/flow-state.d.ts +3 -0
- package/dist/flow-state.js +2 -0
- package/dist/harness-adapters.d.ts +1 -1
- package/dist/harness-adapters.js +48 -57
- package/dist/install.js +128 -520
- package/dist/internal/advance-stage.js +3 -9
- package/dist/internal/compound-readiness.d.ts +1 -1
- package/dist/internal/compound-readiness.js +1 -1
- package/dist/internal/tdd-loop-status.d.ts +1 -1
- package/dist/internal/tdd-loop-status.js +1 -1
- package/dist/knowledge-store.d.ts +16 -10
- package/dist/knowledge-store.js +51 -15
- package/dist/policy.js +16 -109
- package/dist/run-archive.d.ts +4 -6
- package/dist/run-archive.js +15 -20
- package/dist/run-persistence.d.ts +2 -2
- package/dist/run-persistence.js +3 -9
- package/package.json +1 -2
- package/dist/content/archive-command.d.ts +0 -2
- package/dist/content/archive-command.js +0 -124
- package/dist/content/compound-command.d.ts +0 -5
- package/dist/content/compound-command.js +0 -193
- package/dist/content/contexts.d.ts +0 -9
- package/dist/content/contexts.js +0 -65
- package/dist/content/contracts.d.ts +0 -2
- package/dist/content/contracts.js +0 -51
- package/dist/content/doctor-references.d.ts +0 -2
- package/dist/content/doctor-references.js +0 -150
- package/dist/content/eval-scaffold.d.ts +0 -15
- package/dist/content/eval-scaffold.js +0 -370
- package/dist/content/feature-command.d.ts +0 -2
- package/dist/content/feature-command.js +0 -123
- package/dist/content/flow-map.d.ts +0 -23
- package/dist/content/flow-map.js +0 -134
- package/dist/content/harness-doc.d.ts +0 -2
- package/dist/content/harness-doc.js +0 -202
- package/dist/content/harness-playbooks.d.ts +0 -24
- package/dist/content/harness-playbooks.js +0 -393
- package/dist/content/harness-tool-refs.d.ts +0 -20
- package/dist/content/harness-tool-refs.js +0 -268
- package/dist/content/ops-command.d.ts +0 -2
- package/dist/content/ops-command.js +0 -71
- package/dist/content/protocols.d.ts +0 -7
- package/dist/content/protocols.js +0 -215
- package/dist/content/retro-command.d.ts +0 -2
- package/dist/content/retro-command.js +0 -165
- package/dist/content/rewind-command.d.ts +0 -2
- package/dist/content/rewind-command.js +0 -106
- package/dist/content/tdd-log-command.d.ts +0 -2
- package/dist/content/tdd-log-command.js +0 -85
- package/dist/eval/agents/single-shot.d.ts +0 -27
- package/dist/eval/agents/single-shot.js +0 -79
- package/dist/eval/agents/with-tools.d.ts +0 -44
- package/dist/eval/agents/with-tools.js +0 -261
- package/dist/eval/agents/workflow.d.ts +0 -31
- package/dist/eval/agents/workflow.js +0 -155
- package/dist/eval/baseline.d.ts +0 -38
- package/dist/eval/baseline.js +0 -282
- package/dist/eval/config-loader.d.ts +0 -14
- package/dist/eval/config-loader.js +0 -395
- package/dist/eval/corpus.d.ts +0 -30
- package/dist/eval/corpus.js +0 -330
- package/dist/eval/cost-guard.d.ts +0 -102
- package/dist/eval/cost-guard.js +0 -190
- package/dist/eval/diff.d.ts +0 -64
- package/dist/eval/diff.js +0 -323
- package/dist/eval/llm-client.d.ts +0 -176
- package/dist/eval/llm-client.js +0 -267
- package/dist/eval/mode.d.ts +0 -28
- package/dist/eval/mode.js +0 -61
- package/dist/eval/progress.d.ts +0 -83
- package/dist/eval/progress.js +0 -59
- package/dist/eval/report.d.ts +0 -11
- package/dist/eval/report.js +0 -181
- package/dist/eval/rubric-loader.d.ts +0 -20
- package/dist/eval/rubric-loader.js +0 -143
- package/dist/eval/runner.d.ts +0 -81
- package/dist/eval/runner.js +0 -746
- package/dist/eval/runs.d.ts +0 -41
- package/dist/eval/runs.js +0 -114
- package/dist/eval/sandbox.d.ts +0 -38
- package/dist/eval/sandbox.js +0 -137
- package/dist/eval/tools/glob.d.ts +0 -2
- package/dist/eval/tools/glob.js +0 -163
- package/dist/eval/tools/grep.d.ts +0 -2
- package/dist/eval/tools/grep.js +0 -152
- package/dist/eval/tools/index.d.ts +0 -7
- package/dist/eval/tools/index.js +0 -35
- package/dist/eval/tools/read.d.ts +0 -2
- package/dist/eval/tools/read.js +0 -122
- package/dist/eval/tools/types.d.ts +0 -49
- package/dist/eval/tools/types.js +0 -41
- package/dist/eval/tools/write.d.ts +0 -2
- package/dist/eval/tools/write.js +0 -92
- package/dist/eval/types.d.ts +0 -561
- package/dist/eval/types.js +0 -47
- package/dist/eval/verifiers/judge.d.ts +0 -40
- package/dist/eval/verifiers/judge.js +0 -256
- package/dist/eval/verifiers/rules.d.ts +0 -24
- package/dist/eval/verifiers/rules.js +0 -218
- package/dist/eval/verifiers/structural.d.ts +0 -14
- package/dist/eval/verifiers/structural.js +0 -171
- package/dist/eval/verifiers/traceability.d.ts +0 -23
- package/dist/eval/verifiers/traceability.js +0 -84
- package/dist/eval/verifiers/workflow-consistency.d.ts +0 -21
- package/dist/eval/verifiers/workflow-consistency.js +0 -225
- package/dist/eval/workflow-corpus.d.ts +0 -7
- package/dist/eval/workflow-corpus.js +0 -207
- package/dist/feature-system.d.ts +0 -42
- package/dist/feature-system.js +0 -432
- package/dist/internal/knowledge-digest.d.ts +0 -7
- package/dist/internal/knowledge-digest.js +0 -93
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
import { RUNTIME_ROOT } from "../constants.js";
|
|
2
|
-
const RETRO_SKILL_FOLDER = "flow-retro";
|
|
3
|
-
const RETRO_SKILL_NAME = "flow-retro";
|
|
4
|
-
function flowStatePath() {
|
|
5
|
-
return `${RUNTIME_ROOT}/state/flow-state.json`;
|
|
6
|
-
}
|
|
7
|
-
function retroArtifactPath() {
|
|
8
|
-
return `${RUNTIME_ROOT}/artifacts/09-retro.md`;
|
|
9
|
-
}
|
|
10
|
-
function knowledgePath() {
|
|
11
|
-
return `${RUNTIME_ROOT}/knowledge.jsonl`;
|
|
12
|
-
}
|
|
13
|
-
export function retroCommandContract() {
|
|
14
|
-
return `# /cc-ops retro
|
|
15
|
-
|
|
16
|
-
## Purpose
|
|
17
|
-
|
|
18
|
-
Auto-triggered retrospective after ship. \`/cc-next\` drafts \`${retroArtifactPath()}\`
|
|
19
|
-
from run artifacts and knowledge, then asks the user exactly ONE structured
|
|
20
|
-
question: **edit / accept / skip / rewind_for_fix**. Default = accept.
|
|
21
|
-
|
|
22
|
-
This command is normally invoked indirectly by \`/cc-next\` when
|
|
23
|
-
\`closeout.shipSubstate === "retro_review"\`. Invoking it directly is still
|
|
24
|
-
supported for manual re-runs.
|
|
25
|
-
|
|
26
|
-
## HARD-GATE
|
|
27
|
-
|
|
28
|
-
- Do not finalize retro without \`${retroArtifactPath()}\` on disk (or an explicit
|
|
29
|
-
\`retroSkipped: true\` in closeout with a one-line reason).
|
|
30
|
-
- Do not finalize without appending **at least one** \`type=compound\` entry to
|
|
31
|
-
\`${knowledgePath()}\` (skipped runs set \`compoundEntries: 0\` instead).
|
|
32
|
-
- Never advance to compound/archive with \`shipSubstate\` still at
|
|
33
|
-
\`"retro_review"\`.
|
|
34
|
-
|
|
35
|
-
## Inputs
|
|
36
|
-
|
|
37
|
-
\`/cc-ops retro\` (no flags). If the user wants to skip, they answer **skip**
|
|
38
|
-
in the structured ask; there is no \`--skip\` flag.
|
|
39
|
-
|
|
40
|
-
## Algorithm
|
|
41
|
-
|
|
42
|
-
1. Read \`${flowStatePath()}\`; confirm \`completedStages\` contains \`"ship"\`.
|
|
43
|
-
2. If \`closeout.shipSubstate !== "retro_review"\`, and \`retro.completedAt\`
|
|
44
|
-
is already set, report "retro already complete" and stop.
|
|
45
|
-
3. Draft \`${retroArtifactPath()}\` from available evidence:
|
|
46
|
-
- scan \`.cclaw/artifacts/01..08-*.md\` for decisions, blockers, rewinds,
|
|
47
|
-
- scan \`.cclaw/state/delegation-log.json\` for subagent outcomes,
|
|
48
|
-
- scan \`${knowledgePath()}\` for entries recorded during this run,
|
|
49
|
-
- structure the draft as: Outcomes / Slowed / Accelerated / Repeatable rule.
|
|
50
|
-
4. Update \`closeout.retroDraftedAt = <ISO>\` in flow-state.
|
|
51
|
-
5. Present **one** structured ask using the harness's native tool
|
|
52
|
-
(\`AskUserQuestion\` on Claude, \`AskQuestion\` on Cursor, \`question\` on
|
|
53
|
-
OpenCode when \`permission.question: "allow"\` is set,
|
|
54
|
-
\`request_user_input\` on Codex in Plan / Collaboration mode; fall back
|
|
55
|
-
to a plain-text lettered list when the tool is hidden or errors):
|
|
56
|
-
- \`accept\` (default) — keep the draft as-is,
|
|
57
|
-
- \`edit\` — user edits \`${retroArtifactPath()}\` in-place, then re-runs \`/cc-next\`,
|
|
58
|
-
- \`skip\` — record \`retroSkipped: true\` + one-line reason, no compound entry required,
|
|
59
|
-
- \`rewind_for_fix\` — route back to \`plan\` / \`tdd\` / \`review\` with a non-empty reason.
|
|
60
|
-
6. On **accept**:
|
|
61
|
-
- append >=1 strict-schema JSONL line to \`${knowledgePath()}\` with
|
|
62
|
-
\`type: "compound"\`, \`source: "retro"\`, and \`stage: null\`,
|
|
63
|
-
- set \`retro.required = true\`, \`retro.completedAt = <ISO>\`,
|
|
64
|
-
\`retro.compoundEntries = <count>\`,
|
|
65
|
-
- set \`closeout.retroAcceptedAt = <ISO>\`,
|
|
66
|
-
- set \`closeout.shipSubstate = "compound_review"\`.
|
|
67
|
-
7. On **edit**:
|
|
68
|
-
- leave \`shipSubstate = "retro_review"\`,
|
|
69
|
-
- tell user to edit \`${retroArtifactPath()}\` and run \`/cc-next\` again.
|
|
70
|
-
8. On **skip**:
|
|
71
|
-
- require a one-line reason; if empty, re-ask once then escalate,
|
|
72
|
-
- set \`closeout.retroSkipped = true\`, \`closeout.retroSkipReason = <text>\`,
|
|
73
|
-
\`closeout.retroAcceptedAt = <ISO>\`,
|
|
74
|
-
- set \`retro.completedAt = <ISO>\` (marks gate satisfied for archive), and
|
|
75
|
-
\`retro.compoundEntries = 0\`,
|
|
76
|
-
- set \`closeout.shipSubstate = "compound_review"\`.
|
|
77
|
-
9. On **rewind_for_fix**:
|
|
78
|
-
- require \`targetStage\` in \`{ plan, tdd, review }\`,
|
|
79
|
-
- require a concise rationale (min 20 chars),
|
|
80
|
-
- instruct \`/cc-ops rewind <targetStage> "<reason>"\`,
|
|
81
|
-
- reset closeout progression by setting \`closeout.shipSubstate = "idle"\`.
|
|
82
|
-
10. Emit a one-line summary: \`retro: accepted|edited|skipped|rewind_for_fix | next: /cc-next\`.
|
|
83
|
-
|
|
84
|
-
## Primary skill
|
|
85
|
-
|
|
86
|
-
**${RUNTIME_ROOT}/skills/${RETRO_SKILL_FOLDER}/SKILL.md**
|
|
87
|
-
`;
|
|
88
|
-
}
|
|
89
|
-
export function retroCommandSkillMarkdown() {
|
|
90
|
-
return `---
|
|
91
|
-
name: ${RETRO_SKILL_NAME}
|
|
92
|
-
description: "Auto-drafted retrospective with a single structured accept/edit/skip/rewind_for_fix ask. Triggered from /cc-next when shipSubstate=retro_review."
|
|
93
|
-
---
|
|
94
|
-
|
|
95
|
-
# /cc-ops retro
|
|
96
|
-
|
|
97
|
-
## HARD-GATE
|
|
98
|
-
|
|
99
|
-
Archive stays blocked until one of:
|
|
100
|
-
- retro artifact exists **and** one compound knowledge entry was appended, OR
|
|
101
|
-
- retro was explicitly skipped with a one-line reason recorded in closeout.
|
|
102
|
-
|
|
103
|
-
Do not silently skip. Do not finalize without updating \`flow-state.json\`.
|
|
104
|
-
|
|
105
|
-
## Protocol
|
|
106
|
-
|
|
107
|
-
1. Confirm ship completion by reading \`${flowStatePath()}\`.
|
|
108
|
-
2. If retro draft does not yet exist, synthesise \`${retroArtifactPath()}\` using:
|
|
109
|
-
- all \`.cclaw/artifacts/*-*.md\` from the active run (stages 01–08),
|
|
110
|
-
- \`.cclaw/state/delegation-log.json\` entries,
|
|
111
|
-
- \`${knowledgePath()}\` entries written during this run.
|
|
112
|
-
Draft sections:
|
|
113
|
-
- **Outcomes** — what was actually shipped.
|
|
114
|
-
- **Slowed** — concrete friction points (cite artifact line or delegation id).
|
|
115
|
-
- **Accelerated** — patterns/decisions that worked and are worth keeping.
|
|
116
|
-
- **Repeatable rule** — one candidate rule/pattern for next run.
|
|
117
|
-
Record \`closeout.retroDraftedAt\`.
|
|
118
|
-
3. Ask the user **one** structured question via the harness's native
|
|
119
|
-
ask tool (\`AskUserQuestion\` / \`AskQuestion\` / \`question\` /
|
|
120
|
-
\`request_user_input\`; plain-text lettered list as fallback):
|
|
121
|
-
|
|
122
|
-
> Retro draft ready at \`${retroArtifactPath()}\`. How do you want to
|
|
123
|
-
> proceed? (default: accept)
|
|
124
|
-
>
|
|
125
|
-
> - **accept** — keep the draft and continue.
|
|
126
|
-
> - **edit** — I'll edit it, then re-run \`/cc-next\`.
|
|
127
|
-
> - **skip** — no retro this run (requires one-line reason).
|
|
128
|
-
> - **rewind_for_fix** — route back to plan/tdd/review because post-ship issues were found.
|
|
129
|
-
|
|
130
|
-
4. Apply the state transition for the chosen option:
|
|
131
|
-
- \`accept\` → append \`{ "type": "compound", "source": "retro", "stage": null, ... }\` line
|
|
132
|
-
to \`${knowledgePath()}\`; set \`retro.completedAt\`, \`retro.compoundEntries\`,
|
|
133
|
-
\`closeout.retroAcceptedAt\`; set \`closeout.shipSubstate = "compound_review"\`.
|
|
134
|
-
- \`edit\` → leave \`shipSubstate = "retro_review"\`; announce resume path.
|
|
135
|
-
- \`skip\` → set \`closeout.retroSkipped\`, \`closeout.retroSkipReason\`,
|
|
136
|
-
\`closeout.retroAcceptedAt\`, \`retro.completedAt\`,
|
|
137
|
-
\`retro.compoundEntries = 0\`; set \`closeout.shipSubstate = "compound_review"\`.
|
|
138
|
-
- \`rewind_for_fix\` → require \`targetStage ∈ {plan,tdd,review}\` and
|
|
139
|
-
reason (>=20 chars), then instruct \`/cc-ops rewind <targetStage> "<reason>"\`
|
|
140
|
-
and set \`closeout.shipSubstate = "idle"\` to restart closeout after rework.
|
|
141
|
-
|
|
142
|
-
5. Print one-line completion summary:
|
|
143
|
-
- \`retro gate: accepted (<N> compound entries)\`
|
|
144
|
-
- \`retro gate: skipped (reason: <text>)\`
|
|
145
|
-
- \`retro gate: editing (re-run /cc-next when ready)\`
|
|
146
|
-
- \`retro gate: rewind_for_fix (target=<stage>)\`
|
|
147
|
-
|
|
148
|
-
## Resume semantics
|
|
149
|
-
|
|
150
|
-
A new session with \`closeout.shipSubstate === "retro_review"\` resumes
|
|
151
|
-
exactly here. If \`closeout.retroDraftedAt\` is present but
|
|
152
|
-
\`retroAcceptedAt\` is missing, re-ask the same structured question without
|
|
153
|
-
regenerating the draft.
|
|
154
|
-
|
|
155
|
-
## Validation
|
|
156
|
-
|
|
157
|
-
- \`${retroArtifactPath()}\` exists and is non-empty, **or**
|
|
158
|
-
\`closeout.retroSkipped === true\` with a non-empty reason.
|
|
159
|
-
- When accepted: \`${knowledgePath()}\` gained a valid \`compound\` line
|
|
160
|
-
and \`retro.compoundEntries > 0\`.
|
|
161
|
-
- \`retro.completedAt\` is set.
|
|
162
|
-
- \`closeout.shipSubstate\` is \`"compound_review"\` (or still
|
|
163
|
-
\`"retro_review"\` when user picked \`edit\`).
|
|
164
|
-
`;
|
|
165
|
-
}
|
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
import { RUNTIME_ROOT } from "../constants.js";
|
|
2
|
-
const REWIND_SKILL_FOLDER = "flow-rewind";
|
|
3
|
-
const REWIND_SKILL_NAME = "flow-rewind";
|
|
4
|
-
function flowStatePath() {
|
|
5
|
-
return `${RUNTIME_ROOT}/state/flow-state.json`;
|
|
6
|
-
}
|
|
7
|
-
function artifactsPath() {
|
|
8
|
-
return `${RUNTIME_ROOT}/artifacts`;
|
|
9
|
-
}
|
|
10
|
-
function rewindLogPath() {
|
|
11
|
-
return `${RUNTIME_ROOT}/state/rewind-log.jsonl`;
|
|
12
|
-
}
|
|
13
|
-
export function rewindCommandContract() {
|
|
14
|
-
return `# /cc-ops rewind
|
|
15
|
-
|
|
16
|
-
## Purpose
|
|
17
|
-
|
|
18
|
-
Rewind active flow to an earlier stage, or acknowledge stale markers after
|
|
19
|
-
intentional rework.
|
|
20
|
-
|
|
21
|
-
## HARD-GATE
|
|
22
|
-
|
|
23
|
-
- Never rewind without preserving downstream artifact history.
|
|
24
|
-
- Mark downstream stages as stale; do not leave completedStages pointing to invalidated work.
|
|
25
|
-
- Record a rewind reason in \`${rewindLogPath()}\`.
|
|
26
|
-
|
|
27
|
-
## Inputs
|
|
28
|
-
|
|
29
|
-
\`/cc-ops rewind <target-stage> [reason]\`
|
|
30
|
-
or
|
|
31
|
-
\`/cc-ops rewind --ack <stage>\`
|
|
32
|
-
|
|
33
|
-
## Algorithm
|
|
34
|
-
|
|
35
|
-
### rewind mode
|
|
36
|
-
1. Read \`${flowStatePath()}\` and current track.
|
|
37
|
-
2. Validate \`target-stage\` belongs to the active track and is not ahead of current stage.
|
|
38
|
-
3. Compute downstream stages to invalidate (all stages after target that were completed or current).
|
|
39
|
-
4. Archive downstream artifacts into \`${artifactsPath()}/_rewind-archive/<rewind-id>/\`.
|
|
40
|
-
5. Rename active downstream artifacts to \`*.stale.md\`.
|
|
41
|
-
6. Update flow-state:
|
|
42
|
-
- \`currentStage = target-stage\`
|
|
43
|
-
- trim \`completedStages\` to stages before target-stage
|
|
44
|
-
- clear gate evidence/catalog for target-stage and downstream
|
|
45
|
-
- mark downstream entries in \`staleStages\`
|
|
46
|
-
- append \`rewinds[]\` record
|
|
47
|
-
7. Append JSON line to \`${rewindLogPath()}\`.
|
|
48
|
-
|
|
49
|
-
### acknowledge mode (\`--ack\`)
|
|
50
|
-
1. Read \`${flowStatePath()}\`.
|
|
51
|
-
2. If \`staleStages.<stage>\` is missing, report no-op.
|
|
52
|
-
3. Remove \`staleStages.<stage>\`.
|
|
53
|
-
4. Write updated flow-state.
|
|
54
|
-
5. Print remaining stale stages (if any).
|
|
55
|
-
|
|
56
|
-
## Output
|
|
57
|
-
|
|
58
|
-
- In rewind mode:
|
|
59
|
-
- rewind id
|
|
60
|
-
- from -> to stage
|
|
61
|
-
- invalidated stages list
|
|
62
|
-
- number of stale artifacts
|
|
63
|
-
- In acknowledge mode:
|
|
64
|
-
- acknowledged stage
|
|
65
|
-
- remaining stale stages
|
|
66
|
-
|
|
67
|
-
## Primary skill
|
|
68
|
-
|
|
69
|
-
**${RUNTIME_ROOT}/skills/${REWIND_SKILL_FOLDER}/SKILL.md**
|
|
70
|
-
`;
|
|
71
|
-
}
|
|
72
|
-
export function rewindCommandSkillMarkdown() {
|
|
73
|
-
return `---
|
|
74
|
-
name: ${REWIND_SKILL_NAME}
|
|
75
|
-
description: "Rewind active flow stage safely and acknowledge stale invalidations."
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
# /cc-ops rewind
|
|
79
|
-
|
|
80
|
-
## HARD-GATE
|
|
81
|
-
|
|
82
|
-
Rewind is an atomic state transition. Never leave flow-state half-updated (for example currentStage changed but stale markers/artifact archive missing).
|
|
83
|
-
|
|
84
|
-
## Protocol
|
|
85
|
-
|
|
86
|
-
### rewind
|
|
87
|
-
1. Validate target stage belongs to current track and is upstream.
|
|
88
|
-
2. Archive downstream artifacts under \`${artifactsPath()}/_rewind-archive/<rewind-id>/\`.
|
|
89
|
-
3. Mark downstream artifacts as stale (\`*.stale.md\`).
|
|
90
|
-
4. Reset downstream gate catalog and guard evidence.
|
|
91
|
-
5. Record \`rewinds[]\` and \`staleStages\` in flow-state.
|
|
92
|
-
6. Append rewind entry into \`${rewindLogPath()}\`.
|
|
93
|
-
|
|
94
|
-
### rewind --ack <stage>
|
|
95
|
-
1. Load flow-state stale map.
|
|
96
|
-
2. Remove exactly one stale stage marker.
|
|
97
|
-
3. Report remaining stale stages.
|
|
98
|
-
|
|
99
|
-
## Validation checklist
|
|
100
|
-
|
|
101
|
-
- \`${flowStatePath()}\` remains valid JSON.
|
|
102
|
-
- \`currentStage\` equals requested rewind target.
|
|
103
|
-
- invalidated stages are absent from \`completedStages\`.
|
|
104
|
-
- archived copies exist for each moved artifact.
|
|
105
|
-
`;
|
|
106
|
-
}
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
import { RUNTIME_ROOT } from "../constants.js";
|
|
2
|
-
const TDD_LOG_SKILL_FOLDER = "tdd-cycle-log";
|
|
3
|
-
const TDD_LOG_SKILL_NAME = "tdd-cycle-log";
|
|
4
|
-
function logPath() {
|
|
5
|
-
return `${RUNTIME_ROOT}/state/tdd-cycle-log.jsonl`;
|
|
6
|
-
}
|
|
7
|
-
function flowStatePath() {
|
|
8
|
-
return `${RUNTIME_ROOT}/state/flow-state.json`;
|
|
9
|
-
}
|
|
10
|
-
export function tddLogCommandContract() {
|
|
11
|
-
return `# /cc-ops tdd-log
|
|
12
|
-
|
|
13
|
-
## Purpose
|
|
14
|
-
|
|
15
|
-
Record explicit RED/GREEN/REFACTOR evidence used by workflow guard and doctor checks.
|
|
16
|
-
|
|
17
|
-
## HARD-GATE
|
|
18
|
-
|
|
19
|
-
- Every implementation write in tdd must be preceded by a logged RED event.
|
|
20
|
-
- Use append-only JSONL at \`${logPath()}\`; never rewrite prior lines.
|
|
21
|
-
|
|
22
|
-
## Subcommands
|
|
23
|
-
|
|
24
|
-
- \`/cc-ops tdd-log red <slice> <command> [note]\`
|
|
25
|
-
- \`/cc-ops tdd-log green <slice> <command> [note]\`
|
|
26
|
-
- \`/cc-ops tdd-log refactor <slice> <command> [note]\`
|
|
27
|
-
- \`/cc-ops tdd-log show\`
|
|
28
|
-
|
|
29
|
-
## Log Schema
|
|
30
|
-
|
|
31
|
-
Each JSON line must include:
|
|
32
|
-
- \`ts\` (ISO timestamp)
|
|
33
|
-
- \`runId\` (from flow-state)
|
|
34
|
-
- \`stage\` (usually \`tdd\`)
|
|
35
|
-
- \`slice\` (e.g. \`S-1\`)
|
|
36
|
-
- \`phase\` (\`red\` | \`green\` | \`refactor\`)
|
|
37
|
-
- \`command\`
|
|
38
|
-
- optional: \`files\`, \`exitCode\`, \`note\`, \`acIds\` (array of acceptance
|
|
39
|
-
criterion IDs like \`["AC-1"]\` — GREEN rows use this to drive the Ralph
|
|
40
|
-
Loop status summary at \`.cclaw/state/ralph-loop.json\`).
|
|
41
|
-
|
|
42
|
-
## Primary skill
|
|
43
|
-
|
|
44
|
-
**${RUNTIME_ROOT}/skills/${TDD_LOG_SKILL_FOLDER}/SKILL.md**
|
|
45
|
-
`;
|
|
46
|
-
}
|
|
47
|
-
export function tddLogCommandSkillMarkdown() {
|
|
48
|
-
return `---
|
|
49
|
-
name: ${TDD_LOG_SKILL_NAME}
|
|
50
|
-
description: "Append RED/GREEN/REFACTOR entries into tdd-cycle-log.jsonl for guard/doctor enforcement."
|
|
51
|
-
---
|
|
52
|
-
|
|
53
|
-
# /cc-ops tdd-log
|
|
54
|
-
|
|
55
|
-
## HARD-GATE
|
|
56
|
-
|
|
57
|
-
Do not fake RED evidence. A \`red\` entry must correspond to a failing test command.
|
|
58
|
-
|
|
59
|
-
## Protocol
|
|
60
|
-
|
|
61
|
-
1. Read \`${flowStatePath()}\` and capture \`activeRunId\` + \`currentStage\`.
|
|
62
|
-
2. Build JSON entry:
|
|
63
|
-
- \`ts\`: now ISO
|
|
64
|
-
- \`runId\`: activeRunId
|
|
65
|
-
- \`stage\`: currentStage
|
|
66
|
-
- \`slice\`: user-provided slice id
|
|
67
|
-
- \`phase\`: red|green|refactor
|
|
68
|
-
- \`command\`: test command or refactor verification command
|
|
69
|
-
- \`acIds\` (optional, recommended on \`green\`): the acceptance-criterion
|
|
70
|
-
IDs this GREEN row closes (e.g. \`["AC-1","AC-3"]\`). The SessionStart
|
|
71
|
-
hook aggregates distinct \`acIds\` from green rows into \`acClosed\`
|
|
72
|
-
inside \`.cclaw/state/ralph-loop.json\` so \`/cc-next\` can answer
|
|
73
|
-
"is the Ralph Loop done?" without parsing the artifact.
|
|
74
|
-
3. Append one line to \`${logPath()}\`.
|
|
75
|
-
4. After append, refresh Ralph Loop status with
|
|
76
|
-
\`cclaw internal tdd-loop-status --quiet\` (the SessionStart hook also
|
|
77
|
-
refreshes it, but a manual refresh is safe and idempotent).
|
|
78
|
-
5. \`show\`: print the last 20 lines grouped by slice.
|
|
79
|
-
|
|
80
|
-
## Validation
|
|
81
|
-
|
|
82
|
-
- File remains valid JSONL (one JSON object per line).
|
|
83
|
-
- For each slice, first phase must be \`red\`.
|
|
84
|
-
`;
|
|
85
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import type { FlowStage } from "../../types.js";
|
|
2
|
-
import type { ChatUsage, EvalLlmClient } from "../llm-client.js";
|
|
3
|
-
import type { EvalCase, ResolvedEvalConfig } from "../types.js";
|
|
4
|
-
export interface SingleShotInput {
|
|
5
|
-
caseEntry: EvalCase;
|
|
6
|
-
config: Pick<ResolvedEvalConfig, "model" | "agentTemperature" | "timeoutMs" | "tokenPricing">;
|
|
7
|
-
projectRoot: string;
|
|
8
|
-
client: EvalLlmClient;
|
|
9
|
-
/**
|
|
10
|
-
* Override the SKILL.md loader. Primarily a test hook so unit tests
|
|
11
|
-
* can swap a canned system prompt without creating fixtures on disk.
|
|
12
|
-
*/
|
|
13
|
-
loadSkill?: (stage: FlowStage) => Promise<string>;
|
|
14
|
-
}
|
|
15
|
-
export interface SingleShotOutput {
|
|
16
|
-
artifact: string;
|
|
17
|
-
usage: ChatUsage;
|
|
18
|
-
usageUsd: number;
|
|
19
|
-
model: string;
|
|
20
|
-
durationMs: number;
|
|
21
|
-
attempts: number;
|
|
22
|
-
systemPrompt: string;
|
|
23
|
-
userPrompt: string;
|
|
24
|
-
}
|
|
25
|
-
export declare function loadStageSkill(projectRoot: string, stage: FlowStage): Promise<string>;
|
|
26
|
-
/** Run the single-shot AUT (fixture mode + --judge) and return the produced artifact. */
|
|
27
|
-
export declare function runSingleShot(input: SingleShotInput): Promise<SingleShotOutput>;
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Single-shot agent used by fixture mode when `--judge` is set.
|
|
3
|
-
*
|
|
4
|
-
* Simplest realistic AUT: one LLM call with the stage's SKILL.md as the
|
|
5
|
-
* system prompt and the case's `inputPrompt` as the user message. Output
|
|
6
|
-
* is the raw assistant content, returned as the artifact for the judge
|
|
7
|
-
* pipeline.
|
|
8
|
-
*
|
|
9
|
-
* Design notes:
|
|
10
|
-
*
|
|
11
|
-
* - No tools. No multi-turn. No reads of the project beyond the one
|
|
12
|
-
* SKILL.md. agent/workflow modes layer complexity on top.
|
|
13
|
-
* - Errors are propagated as-is (`EvalLlmError` subclasses) so the
|
|
14
|
-
* runner can surface them as verifier failures without swallowing the
|
|
15
|
-
* cause.
|
|
16
|
-
* - Usage and USD cost are surfaced so the runner can commit them to
|
|
17
|
-
* the cost guard + case-level `costUsd`.
|
|
18
|
-
*/
|
|
19
|
-
import fs from "node:fs/promises";
|
|
20
|
-
import path from "node:path";
|
|
21
|
-
import { RUNTIME_ROOT } from "../../constants.js";
|
|
22
|
-
import { stageSkillFolder } from "../../content/skills.js";
|
|
23
|
-
import { exists } from "../../fs-utils.js";
|
|
24
|
-
import { computeUsageUsd } from "../cost-guard.js";
|
|
25
|
-
export async function loadStageSkill(projectRoot, stage) {
|
|
26
|
-
const folder = stageSkillFolder(stage);
|
|
27
|
-
const file = path.join(projectRoot, RUNTIME_ROOT, "skills", folder, "SKILL.md");
|
|
28
|
-
if (!(await exists(file))) {
|
|
29
|
-
throw new Error(`Stage skill not found: ${path.relative(projectRoot, file)}. ` +
|
|
30
|
-
`Run \`cclaw init\` (or \`cclaw sync\`) before \`cclaw eval --mode=fixture --judge\`.`);
|
|
31
|
-
}
|
|
32
|
-
return fs.readFile(file, "utf8");
|
|
33
|
-
}
|
|
34
|
-
function buildMessages(systemPrompt, userPrompt) {
|
|
35
|
-
return [
|
|
36
|
-
{ role: "system", content: systemPrompt },
|
|
37
|
-
{ role: "user", content: userPrompt }
|
|
38
|
-
];
|
|
39
|
-
}
|
|
40
|
-
function buildUserPrompt(caseEntry) {
|
|
41
|
-
const lines = [];
|
|
42
|
-
lines.push(`Stage: ${caseEntry.stage}`);
|
|
43
|
-
lines.push(`Case id: ${caseEntry.id}`);
|
|
44
|
-
lines.push(``);
|
|
45
|
-
lines.push(`Task:`);
|
|
46
|
-
lines.push(caseEntry.inputPrompt.trim());
|
|
47
|
-
lines.push(``);
|
|
48
|
-
lines.push(`Produce the artifact required by this stage using the SKILL.md above. ` +
|
|
49
|
-
`Output the artifact directly (markdown with optional YAML frontmatter). ` +
|
|
50
|
-
`Do not wrap in code fences, do not add commentary before or after.`);
|
|
51
|
-
return lines.join("\n");
|
|
52
|
-
}
|
|
53
|
-
/** Run the single-shot AUT (fixture mode + --judge) and return the produced artifact. */
|
|
54
|
-
export async function runSingleShot(input) {
|
|
55
|
-
const { caseEntry, config, projectRoot, client } = input;
|
|
56
|
-
const started = Date.now();
|
|
57
|
-
const loader = input.loadSkill ?? ((stage) => loadStageSkill(projectRoot, stage));
|
|
58
|
-
const systemPrompt = await loader(caseEntry.stage);
|
|
59
|
-
const userPrompt = buildUserPrompt(caseEntry);
|
|
60
|
-
const response = await client.chat({
|
|
61
|
-
model: config.model,
|
|
62
|
-
messages: buildMessages(systemPrompt, userPrompt),
|
|
63
|
-
temperature: config.agentTemperature ?? 0.2,
|
|
64
|
-
timeoutMs: config.timeoutMs
|
|
65
|
-
});
|
|
66
|
-
const usageUsd = computeUsageUsd(response.model, response.usage, {
|
|
67
|
-
tokenPricing: config.tokenPricing
|
|
68
|
-
});
|
|
69
|
-
return {
|
|
70
|
-
artifact: response.content.trim(),
|
|
71
|
-
usage: response.usage,
|
|
72
|
-
usageUsd,
|
|
73
|
-
model: response.model,
|
|
74
|
-
attempts: response.attempts,
|
|
75
|
-
durationMs: Date.now() - started,
|
|
76
|
-
systemPrompt,
|
|
77
|
-
userPrompt
|
|
78
|
-
};
|
|
79
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import type { ChatUsage, EvalLlmClient } from "../llm-client.js";
|
|
2
|
-
import { createSandbox, type Sandbox } from "../sandbox.js";
|
|
3
|
-
import type { SandboxTool } from "../tools/index.js";
|
|
4
|
-
import type { EvalCase, ResolvedEvalConfig, ToolUseSummary } from "../types.js";
|
|
5
|
-
export declare class MaxTurnsExceededError extends Error {
|
|
6
|
-
readonly turns: number;
|
|
7
|
-
constructor(turns: number);
|
|
8
|
-
}
|
|
9
|
-
export interface WithToolsInput {
|
|
10
|
-
caseEntry: EvalCase;
|
|
11
|
-
config: Pick<ResolvedEvalConfig, "model" | "agentTemperature" | "timeoutMs" | "tokenPricing" | "toolMaxTurns" | "toolMaxArgumentsBytes" | "toolMaxResultBytes">;
|
|
12
|
-
projectRoot: string;
|
|
13
|
-
client: EvalLlmClient;
|
|
14
|
-
tools?: SandboxTool[];
|
|
15
|
-
/** Override for the SKILL.md loader (test hook). */
|
|
16
|
-
loadSkill?: (stage: EvalCase["stage"]) => Promise<string>;
|
|
17
|
-
/** Override for the sandbox factory (test hook). */
|
|
18
|
-
createSandboxFn?: typeof createSandbox;
|
|
19
|
-
/**
|
|
20
|
-
* Reuse an externally-managed sandbox instead of creating + disposing a
|
|
21
|
-
* per-call one. Workflow mode uses this so every stage shares the same
|
|
22
|
-
* sandbox and earlier artifacts remain visible. When set, the caller is
|
|
23
|
-
* responsible for `dispose()`.
|
|
24
|
-
*/
|
|
25
|
-
externalSandbox?: Sandbox;
|
|
26
|
-
/**
|
|
27
|
-
* Optional override of the default user prompt prefix. Workflow mode uses
|
|
28
|
-
* this to tell the model which stage it is on and where the prior
|
|
29
|
-
* artifacts are located.
|
|
30
|
-
*/
|
|
31
|
-
promptPreamble?: string;
|
|
32
|
-
}
|
|
33
|
-
export interface WithToolsOutput {
|
|
34
|
-
artifact: string;
|
|
35
|
-
usage: ChatUsage;
|
|
36
|
-
usageUsd: number;
|
|
37
|
-
model: string;
|
|
38
|
-
attempts: number;
|
|
39
|
-
durationMs: number;
|
|
40
|
-
toolUse: ToolUseSummary;
|
|
41
|
-
systemPrompt: string;
|
|
42
|
-
userPrompt: string;
|
|
43
|
-
}
|
|
44
|
-
export declare function runWithTools(input: WithToolsInput): Promise<WithToolsOutput>;
|