cclaw-cli 0.27.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +421 -64
  2. package/dist/cli.d.ts +8 -4
  3. package/dist/cli.js +318 -47
  4. package/dist/constants.d.ts +1 -1
  5. package/dist/constants.js +34 -1
  6. package/dist/content/eval-scaffold.d.ts +2 -2
  7. package/dist/content/eval-scaffold.js +7 -6
  8. package/dist/content/start-command.d.ts +3 -2
  9. package/dist/content/start-command.js +5 -4
  10. package/dist/eval/agents/single-shot.d.ts +1 -1
  11. package/dist/eval/agents/single-shot.js +4 -4
  12. package/dist/eval/agents/with-tools.d.ts +6 -6
  13. package/dist/eval/agents/with-tools.js +5 -5
  14. package/dist/eval/agents/workflow.d.ts +7 -0
  15. package/dist/eval/agents/workflow.js +5 -3
  16. package/dist/eval/baseline.d.ts +24 -0
  17. package/dist/eval/baseline.js +75 -2
  18. package/dist/eval/config-loader.js +46 -17
  19. package/dist/eval/cost-guard.d.ts +22 -0
  20. package/dist/eval/cost-guard.js +38 -1
  21. package/dist/eval/diff.d.ts +1 -1
  22. package/dist/eval/diff.js +3 -3
  23. package/dist/eval/llm-client.d.ts +13 -2
  24. package/dist/eval/llm-client.js +8 -1
  25. package/dist/eval/mode.d.ts +28 -0
  26. package/dist/eval/mode.js +61 -0
  27. package/dist/eval/progress.d.ts +83 -0
  28. package/dist/eval/progress.js +59 -0
  29. package/dist/eval/report.js +1 -1
  30. package/dist/eval/runner.d.ts +29 -9
  31. package/dist/eval/runner.js +148 -56
  32. package/dist/eval/runs.d.ts +41 -0
  33. package/dist/eval/runs.js +114 -0
  34. package/dist/eval/sandbox.js +1 -1
  35. package/dist/eval/tools/index.js +1 -1
  36. package/dist/eval/tools/types.d.ts +1 -1
  37. package/dist/eval/types.d.ts +54 -27
  38. package/dist/eval/types.js +21 -9
  39. package/dist/eval/verifiers/workflow-consistency.d.ts +1 -1
  40. package/dist/eval/workflow-corpus.d.ts +2 -2
  41. package/dist/eval/workflow-corpus.js +4 -4
  42. package/dist/install.d.ts +10 -0
  43. package/dist/install.js +19 -5
  44. package/package.json +1 -1
package/README.md CHANGED
@@ -1,14 +1,38 @@
1
1
  # cclaw
2
2
 
3
- **A focused, installer-first workflow that turns AI coding sessions into predictable shipped outcomes.**
3
+ **Install once, ship every time.** cclaw is an installer-first workflow runtime
4
+ that gives your AI coding agent one inspectable path from idea to shipped PR:
4
5
 
5
- `cclaw` gives your agent one clear path:
6
- **brainstorm -> scope -> design -> spec -> plan -> tdd -> review -> ship**
6
+ > **brainstorm scope design → spec → plan → tdd → review → ship**
7
7
 
8
- No giant command jungle. No runtime daemon. No process theater.
9
- Just a disciplined flow that stays lightweight and works across major coding harnesses.
8
+ Every stage has real gates the agent cannot skip, every decision leaves a
9
+ file-backed audit trail, and the same six slash commands work across
10
+ Claude Code, Cursor, OpenCode, and OpenAI Codex.
10
11
 
11
- ## How It Works
12
+ No hidden control plane. No background daemon. No plugin marketplace to
13
+ configure. Just a repo-local `.cclaw/` runtime your whole team can read,
14
+ diff, and reason about.
15
+
16
+ ---
17
+
18
+ ## Who this is for
19
+
20
+ - Solo builders who want **shipped outcomes** instead of endless chat.
21
+ - Engineering teams that need a **single, repeatable path** for AI-assisted
22
+ changes across multiple harnesses and languages.
23
+ - Staff engineers and tech leads who want **enforceable discipline**:
24
+ locked-in decisions, no placeholders, mandatory TDD, traceable plans.
25
+ - Maintainers of AI agents/skills who want **measurable prompt engineering**
26
+ via the built-in eval harness.
27
+
28
+ If you are looking for a virtual engineering org with 20+ role-play
29
+ commands, or for a plugin marketplace ecosystem, see the
30
+ [Compared to references](#compared-to-references) section — other tools do
31
+ that well. cclaw trades breadth for a single, inspectable pipeline.
32
+
33
+ ---
34
+
35
+ ## How it works
12
36
 
13
37
  ```mermaid
14
38
  flowchart LR
@@ -26,8 +50,8 @@ flowchart LR
26
50
  sequenceDiagram
27
51
  participant U as User
28
52
  participant H as Harness
29
- participant V as cclaw Hooks + Skills
30
- participant S as State + Knowledge
53
+ participant V as cclaw hooks + skills
54
+ participant S as State + knowledge
31
55
  U->>H: /cc <idea>
32
56
  H->>V: Load stage contract + HARD-GATE
33
57
  V->>S: Read context (state/knowledge)
@@ -36,96 +60,429 @@ sequenceDiagram
36
60
  S-->>U: Next stage is explicit
37
61
  ```
38
62
 
39
- ## Why `cclaw` Wins In Practice
63
+ Every stage reads and writes real files under `.cclaw/`. `flow-state.json`
64
+ holds the single source of truth for "where are we"; `knowledge.jsonl`
65
+ accumulates reusable lessons; stage artifacts live under
66
+ `.cclaw/artifacts/` until the feature is archived.
67
+
68
+ ---
69
+
70
+ ## 30-second install
71
+
72
+ ```bash
73
+ npx cclaw-cli init
74
+ ```
75
+
76
+ You will get an interactive prompt for profile, default track, and which
77
+ harnesses to install into. For CI or scripted installs:
78
+
79
+ ```bash
80
+ npx cclaw-cli init --profile=standard --harnesses=claude,cursor --no-interactive
81
+ ```
82
+
83
+ ### Init profiles
84
+
85
+ | Profile | promptGuardMode | tddEnforcement | gitHookGuards | languageRulePacks | Use when |
86
+ |---|---|---|---|---|---|
87
+ | `minimal` | advisory | advisory | off | none | Personal projects, quick exploration |
88
+ | `standard` _(default)_ | advisory | advisory | off | none | Most teams; enables the full flow without hard blocks |
89
+ | `full` | **strict** | **strict** | **on** | all | Enterprise / regulated / multi-contributor repos |
90
+
91
+ Profile values are persisted to `.cclaw/config.yaml` and preserved by
92
+ `cclaw upgrade`. Switch profile later with `cclaw init --profile=<id>` or
93
+ edit individual keys directly.
94
+
95
+ ### What you get
96
+
97
+ ```text
98
+ .cclaw/
99
+ ├── commands/ # stage + utility command contracts (markdown)
100
+ ├── skills/ # stage + utility skills loaded by the harness
101
+ ├── contexts/ # cross-cutting context modes (research, debugging, …)
102
+ ├── templates/ # artifact skeletons for each stage
103
+ ├── rules/ # lint-style rules surfaced to the agent
104
+ ├── adapters/ # per-harness translation notes
105
+ ├── agents/ # subagent definitions (planner, reviewer, …)
106
+ ├── hooks/ # harness-agnostic hook scripts
107
+ ├── worktrees/ # git worktrees created by /cc-ops feature
108
+ ├── artifacts/ # active feature artifacts (00-idea.md → 09-retro.md)
109
+ ├── runs/ # archived feature snapshots: YYYY-MM-DD-slug/
110
+ ├── references/ # (optional) pinned copies of reference frameworks
111
+ ├── evals/ # eval corpus, rubrics, baselines, reports
112
+ ├── custom-skills/ # user-authored skills (never overwritten)
113
+ ├── state/ # flow-state.json + delegation-log.json + activity
114
+ └── knowledge.jsonl # append-only, strict-schema lessons + patterns
115
+ ```
116
+
117
+ Plus harness-specific shims:
118
+
119
+ - `.claude/commands/cc*.md` + `.claude/hooks/hooks.json`
120
+ - `.cursor/commands/cc*.md` + `.cursor/hooks.json` + `.cursor/rules/cclaw-workflow.mdc`
121
+ - `.opencode/commands/cc*.md` + `.opencode/plugins/cclaw-plugin.mjs`
122
+ - `.codex/commands/cc*.md` + `.codex/hooks.json`
123
+ - `AGENTS.md` with a managed routing block
124
+
125
+ ---
126
+
127
+ ## The six commands
128
+
129
+ All six appear as slash commands in every supported harness. This is the
130
+ entire top-level surface — everything else happens through subcommands or
131
+ stage routing.
40
132
 
41
- - **Low cognitive load:** one canonical stage flow instead of dozens of competing paths.
42
- - **Installer-first architecture:** generates files and hooks; does not run a hidden control plane.
43
- - **Hard-gated quality:** each stage has non-skippable constraints that reduce AI drift.
44
- - **Tiered harness coverage:** transparent capability tiers across Claude Code, Cursor, Codex, OpenCode.
45
- - **Compounding context:** flow state + project knowledge get rehydrated on new sessions automatically.
46
- - **Incremental delivery:** active artifacts stay in one place; `cclaw archive` snapshots completed features into dated run folders.
133
+ | Command | What it does |
134
+ |---|---|
135
+ | **`/cc <idea>`** | Classify the task (software / trivial / bugfix / pure-question / non-software), discover origin docs (`docs/prd/**`, ADRs, root `PRD.md`, …), sniff the stack, recommend a track, then start the first stage of that track. `/cc` without arguments resumes the current flow. |
136
+ | **`/cc-next`** | The one progression primitive. Reads `flow-state.json`, checks gates + mandatory subagent delegations, and either resumes the current stage or advances to the next. `/cc-next` in a new session is how you **resume**. |
137
+ | **`/cc-ideate`** | Repository improvement discovery. Scans for TODOs, flaky tests, oversized modules, docs drift, and recurring knowledge-store lessons; returns a ranked backlog before you commit to a specific feature. |
138
+ | **`/cc-view`** | Read-only flow visibility. `/cc-view status` (default), `/cc-view tree`, `/cc-view diff` (baseline delta map). Never mutates state. |
139
+ | **`/cc-learn`** | Append strict-schema entries to `.cclaw/knowledge.jsonl`: `rule`, `pattern`, `lesson`, or `compound`. Also curates (dedupe, promote, soft-archive) when the store grows. |
140
+ | **`/cc-ops`** | Operational router: `feature` (git-worktree manager), `tdd-log`, `retro`, `compound` (lift repeated lessons into first-class skills/rules), `archive`, `rewind` (clear a stale stage marker). |
47
141
 
48
- ## Compared To Top References
142
+ ### Example first-run
49
143
 
50
- | System | Strongest trait | Where `cclaw` is better |
144
+ ```text
145
+ > /cc Add rate limiting to the public /api/v1/search endpoint
146
+
147
+ cclaw: Classifying task…
148
+ Class: software-medium
149
+ Discovered context: docs/rfcs/rate-limit-strategy.md (rate-limit policy draft)
150
+ Stack: node 20.10.0 (pnpm), fastify 4.26, redis 7
151
+ Recommended track: medium (matched triggers: "add endpoint")
152
+ Override? (A) keep medium (B) switch track (C) cancel
153
+ > A
154
+ cclaw: Persisting flow-state.json, seeding 00-idea.md, entering brainstorm…
155
+ ```
156
+
157
+ After this `flow-state.json` contains:
158
+
159
+ ```json
160
+ {
161
+ "currentStage": "brainstorm",
162
+ "track": "medium",
163
+ "skippedStages": ["scope", "design"],
164
+ "stageGateCatalog": { "brainstorm": { "passed": [], "blocked": [] } },
165
+ "completedStages": []
166
+ }
167
+ ```
168
+
169
+ And `00-idea.md` starts with:
170
+
171
+ ```text
172
+ Class: software-medium
173
+ Track: medium (matched: "add endpoint")
174
+ Stack: node 20.10.0, fastify 4.26, redis 7
175
+
176
+ ## Discovered context
177
+
178
+ - docs/rfcs/rate-limit-strategy.md — rate-limit policy draft (Q2 2026)
179
+
180
+ ## User prompt
181
+
182
+ Add rate limiting to the public /api/v1/search endpoint
183
+ ```
184
+
185
+ No magic. No ambiguity about where you are.
186
+
187
+ ---
188
+
189
+ ## The eight stages, and the three tracks
190
+
191
+ cclaw has eight stages, but a single prompt rarely needs all of them.
192
+ `/cc` picks a **track** up front so the flow matches the task.
193
+
194
+ | Track | Path | Typical trigger |
51
195
  |---|---|---|
52
- | **Superpowers** | Rich skill ecosystem and mature workflows | Smaller operational surface, tighter stage discipline, faster onboarding for teams that want one default path |
53
- | **G-Stack** | Deep multi-role orchestration (CEO/design/eng/release style) | Less overhead, fewer moving parts, easier to keep deterministic in day-to-day product delivery |
54
- | **Everything Claude Code** | Broad command catalog and flexible patterns | Lower command entropy, clearer defaults, less decision fatigue for regular execution |
196
+ | **quick** | `spec tdd review ship` | `bug`, `hotfix`, `typo`, `rename`, `bump`, `docs only`, one-liners |
197
+ | **medium** | `brainstorm spec plan tdd review ship` | `add endpoint`, `add field`, `extend existing`, `wire integration` |
198
+ | **standard** _(default)_ | all 8 stages | `new feature`, `refactor`, `migration`, `platform`, `schema`, `architecture` |
199
+
200
+ Each stage produces a dated artifact under `.cclaw/artifacts/`:
201
+ `00-idea.md` (seed) and `01-brainstorm.md` through `08-ship.md`
202
+ (plus `09-retro.md` at closeout).
203
+
204
+ ### Track heuristics are configurable
205
+
206
+ Every team has its own vocabulary. Override the built-in trigger lists in
207
+ `.cclaw/config.yaml`:
208
+
209
+ ```yaml
210
+ trackHeuristics:
211
+ priority: [standard, medium, quick]
212
+ fallback: standard
213
+ tracks:
214
+ quick:
215
+ triggers: [hotfix, rollback, prod-incident]
216
+ veto: [schema, migration] # never route quick even if one trigger hits
217
+ standard:
218
+ patterns:
219
+ - "^epic:"
220
+ - "platform-team|core-infra"
221
+ ```
222
+
223
+ `priority` + `veto` + regex `patterns` give you deterministic routing
224
+ without touching any code.
225
+
226
+ ### Mid-flow reclassification
227
+
228
+ If you seed a task as `quick` and evidence in spec shows it actually needs a
229
+ schema migration, cclaw **stops and asks** before quietly advancing.
230
+ Reclassification is append-only: the old decision stays in history.
231
+
232
+ ---
55
233
 
56
- `cclaw` is intentionally opinionated: it optimizes for **signal over volume**.
234
+ ## Quality loop: `cclaw doctor`
57
235
 
58
- ## 60-Second Start
236
+ Run anytime. Non-zero exit code means something observably wrong with the
237
+ `.cclaw/` runtime.
59
238
 
60
239
  ```bash
61
- npx cclaw-cli init
240
+ cclaw doctor # full sweep, PASS/FAIL summary
241
+ cclaw doctor --reconcile-gates # also recompute current stage gate evidence
242
+ cclaw doctor --explain # include fix + doc reference per check
243
+ cclaw doctor --only=error # or --only=trace:,hook: for narrow sweeps
244
+ cclaw doctor --quiet # only failing checks (CI-friendly)
245
+ cclaw doctor --json # machine-readable, exit 2 on error failures
62
246
  ```
63
247
 
64
- Then run in your harness:
248
+ Each failing check points at:
249
+
250
+ - a **severity** (error / warning / info)
251
+ - a one-line **summary**
252
+ - concrete **details** from your repo
253
+ - a **fix** string and a **doc reference** when `--explain` is on
254
+
255
+ Example:
65
256
 
66
257
  ```text
67
- /cc <idea>
68
- /cc-next
258
+ [ERROR]
259
+ FAIL trace:matrix_populated :: spec artifact exists but trace matrix is empty
260
+ details: .cclaw/artifacts/04-spec.md has 3 acceptance criteria; 0 mapped
261
+ fix: rebuild trace matrix via /cc-next (spec completion protocol) or edit 04-spec.md to add testable criteria
262
+ docs: .cclaw/skills/specification-authoring/SKILL.md#trace-matrix
263
+
264
+ Doctor status: BLOCKED (1 failing error check)
69
265
  ```
70
266
 
71
- Core installer lifecycle:
267
+ Add `cclaw doctor` to a pre-commit hook or CI job (`exit 2` on error
268
+ severity) and you inherit a shared definition of "the runtime is healthy".
269
+
270
+ ---
271
+
272
+ ## Closeout and compounding
273
+
274
+ Shipping a feature is a **separate stage** (`08-ship.md`), followed by two
275
+ more disciplined steps:
276
+
277
+ ```text
278
+ /cc-ops retro # writes 09-retro.md; gates knowledge capture (≥1 compound line)
279
+ /cc-ops compound # (optional) lifts repeated learnings into first-class rules/skills
280
+ /cc-ops archive # moves artifacts/ to runs/YYYY-MM-DD-slug/, resets flow-state
281
+ ```
282
+
283
+ Archive is gated on retro completion unless you explicitly pass
284
+ `--skip-retro --retro-reason="..."`. You cannot accidentally lose the
285
+ learning pass.
286
+
287
+ Knowledge entries are strict JSONL with frequency, maturity, and provenance
288
+ fields — not freeform markdown — so they stay machine-queryable across
289
+ sessions and contributors.
290
+
291
+ ---
292
+
293
+ ## Parallel features with git worktrees
294
+
295
+ Use `/cc-ops feature` to run more than one cclaw flow side by side without
296
+ copying `.cclaw/` state:
297
+
298
+ ```text
299
+ /cc-ops feature new payments-revamp # creates a git worktree + isolated registry
300
+ /cc-ops feature list # shows all active features + their branches
301
+ /cc-ops feature switch checkout-refactor
302
+ /cc-ops feature status # which feature this workspace is attached to
303
+ ```
304
+
305
+ Each feature is a real `git worktree` with its own branch, its own
306
+ `flow-state.json`, and its own artifacts. Archive flushes the **current**
307
+ feature back into `.cclaw/runs/`.
308
+
309
+ ---
310
+
311
+ ## TDD that actually runs
312
+
313
+ The `tdd` stage is not prose guidance. It requires:
314
+
315
+ - an explicit **RED** test run (logged to `.cclaw/state/stage-activity.jsonl`)
316
+ - a mandatory **`test-author`** subagent dispatch (logged to
317
+ `.cclaw/state/delegation-log.json`)
318
+ - a **GREEN** full-suite run before exit
319
+ - optional **REFACTOR** pass with coverage preservation
320
+
321
+ `/cc-next` will not advance past `tdd` until the delegation log shows the
322
+ subagent as `completed` or explicitly `waived` (for harnesses without
323
+ native subagent dispatch, such as Codex — see
324
+ [Harness support](#harness-support)).
325
+
326
+ In **full** profile, `tddEnforcement: strict` blocks progression until a
327
+ real test file is present and matches one of your configured
328
+ `tddTestGlobs`.
329
+
330
+ ---
331
+
332
+ ## Harness support
333
+
334
+ cclaw is honest about which harnesses give you full automation and which
335
+ need small manual bridges. See
336
+ [`docs/harnesses.md`](./docs/harnesses.md) for the full matrix.
337
+
338
+ | Harness | Tier | Native subagent dispatch | Hook surface | Structured ask |
339
+ |---|---|---|---|---|
340
+ | Claude Code | tier1 | full | full | `AskUserQuestion` |
341
+ | Cursor | tier2 | partial | full | `AskQuestion` |
342
+ | OpenCode | tier2 | partial | plugin | plain-text |
343
+ | OpenAI Codex | tier2 | none | full | plain-text |
344
+
345
+ Capability gaps are captured in `.cclaw/state/harness-gaps.json` and
346
+ surfaced by `cclaw doctor`. Where native dispatch is missing, cclaw emits
347
+ a structured **waiver** rather than pretending the delegation happened.
348
+
349
+ ---
350
+
351
+ ## Guardrails that ship in the box
352
+
353
+ These are the things that make cclaw "enterprise-strong" without turning
354
+ it into ceremony:
355
+
356
+ - **Locked decisions (D-XX IDs).** Scope decisions are numbered and must
357
+ reappear in plan + TDD artifacts. The artifact linter catches any
358
+ silent drift.
359
+ - **No placeholders.** `TBD`, `TODO`, `similar to task`, and "static for
360
+ now"-style scope reduction are flagged before a stage completes.
361
+ - **Stale-stage detection.** If an upstream artifact changes after a
362
+ downstream stage is already complete, cclaw marks the downstream stage
363
+ stale and refuses to advance until you re-run it (or explicitly
364
+ acknowledge via `/cc-ops rewind --ack <stage>`).
365
+ - **Mandatory subagent delegation** at TDD, with per-harness waivers.
366
+ - **Turn Announce Discipline.** Every stage entry/exit emits a visible
367
+ line so users can see what the agent is doing, not just what it says.
368
+ - **Extracted protocols.** Decision, Completion, and Ethos protocols live
369
+ in a single place (`.cclaw/contexts/`), so every skill speaks the same
370
+ dialect.
371
+ - **Strict JSONL knowledge schema.** Queryable from scripts, not just
372
+ grep-able.
373
+
374
+ ---
375
+
376
+ ## Eval-driven prompt engineering
377
+
378
+ cclaw ships with `cclaw eval` — a three-tier regression harness for the
379
+ skills and contracts the runtime generates. Use it when you change a
380
+ stage skill, tweak a prompt, or swap a model.
381
+
382
+ ```bash
383
+ cclaw eval --dry-run # validate corpus + config
384
+ cclaw eval --schema-only # L1 structural (PR-blocking, no LLM)
385
+ cclaw eval --rules # L1 + L2 rule-based
386
+ cclaw eval --judge --mode=fixture --stage=spec # L3 LLM judge against a fixture
387
+ cclaw eval --judge --mode=agent --stage=plan # draft in a sandbox, then judge
388
+ cclaw eval --mode=workflow --judge # full multi-stage run (Tier C)
389
+ cclaw eval --compare-model=gpt-4o-mini # diff two models against same corpus
390
+ cclaw eval diff 0.26.0 latest # compare two saved reports
391
+ cclaw eval --background # long runs go to .cclaw/evals/runs/
392
+ ```
393
+
394
+ Works with any OpenAI-compatible endpoint — Zhipu AI GLM, OpenAI, Together,
395
+ self-hosted vLLM — via three environment variables:
396
+
397
+ ```bash
398
+ CCLAW_EVAL_API_KEY=...
399
+ CCLAW_EVAL_BASE_URL=https://api.z.ai/api/coding/paas/v4 # default
400
+ CCLAW_EVAL_MODEL=glm-5.1 # default
401
+ CCLAW_EVAL_DAILY_USD_CAP=5 # optional cost guard
402
+ ```
403
+
404
+ Full details and the eval contract live in
405
+ [`docs/evals.md`](./docs/evals.md).
406
+
407
+ ---
408
+
409
+ ## CLI reference
72
410
 
73
411
  ```bash
74
- npx cclaw-cli sync
75
- npx cclaw-cli doctor
76
- npx cclaw-cli archive --name <feature-name>
77
- npx cclaw-cli upgrade
78
- npx cclaw-cli uninstall
412
+ cclaw init [--profile=<id>] [--harnesses=<list>] [--track=<id>] \
413
+ [--interactive | --no-interactive] [--dry-run]
414
+ cclaw sync # regenerate shims
415
+ cclaw doctor [--reconcile-gates] [--explain] [--quiet] \
416
+ [--only=<filter>] [--json]
417
+ cclaw upgrade # refresh generated files; preserve config
418
+ cclaw archive [--name=<slug>] [--skip-retro --retro-reason=<t>]
419
+ cclaw eval <see evals section above>
420
+ cclaw uninstall # remove .cclaw + generated shims
421
+ cclaw --version # shows the installed package version
79
422
  ```
80
423
 
81
- ## PR-First Ship Flow
424
+ `sync` regenerates shims and runtime files without touching user artifacts,
425
+ state, or config keys. `upgrade` does the same **and** bumps the version
426
+ stamp in `.cclaw/config.yaml`, preserving every custom profile/heuristic
427
+ key. To reset to a named profile, re-run `cclaw init --profile=<id>`.
428
+
429
+ ---
430
+
431
+ ## Compared to references
432
+
433
+ cclaw stands on the shoulders of several open frameworks. Each one is
434
+ genuinely good at something. Here is the honest tradeoff.
82
435
 
83
- `cclaw` does not run hidden git automation. It enforces release discipline inside the harness and keeps repository actions explicit.
436
+ **Superpowers** (obra) ships a mature methodology where skills compose and
437
+ activate ambiently. cclaw trades that breadth for a **single auditable
438
+ pipeline**: `flow-state.json`, stage gates, and `cclaw doctor` make it easy
439
+ to see *why* the agent is allowed to advance. Choose Superpowers for
440
+ ecosystem richness; choose cclaw when deterministic stage discipline
441
+ matters more than plugin variety.
84
442
 
85
- Recommended shipping path:
443
+ **G-Stack** is a full virtual engineering org — dozens of slash commands
444
+ for planning, design, QA, and release. cclaw deliberately keeps **one
445
+ stage machine** and the same six harness entrypoints, prioritizing
446
+ repeatability across harnesses over role-surface area. Use G-Stack when
447
+ you want explicit multi-role theater; use cclaw when you want one pipeline
448
+ across Claude, Cursor, OpenCode, and Codex.
449
+
450
+ **Everything Claude Code** is an optimization and inventory system —
451
+ memory, instincts, security, and multi-ecosystem configs. cclaw is a
452
+ **minimal flow runtime**: eight stages, JSONL knowledge, and evals for
453
+ contract drift. Pair ECC-style breadth with cclaw if you need both
454
+ coverage and a single ship path.
455
+
456
+ ---
457
+
458
+ ## PR-first ship flow
459
+
460
+ cclaw does not run hidden git automation. Release discipline lives inside
461
+ the harness; repository operations stay explicit:
86
462
 
87
463
  ```bash
88
464
  git checkout main
89
465
  git pull origin main
90
466
  git checkout -b feat/<topic>
91
- # implement with cclaw stages in the harness
92
- git add .
93
- git commit -m "..."
467
+ # run the flow in the harness
468
+ git add . && git commit -m "..."
94
469
  git push -u origin HEAD
95
470
  gh pr create
96
471
  ```
97
472
 
98
473
  After merge to `main`, CI handles release lifecycle:
99
474
 
100
- - `Release Drafter` updates draft notes from merged PRs.
101
- - `Release Publish` validates the build, publishes to npm (if version is new), publishes an existing release draft or creates a new GitHub Release, and uploads `.tgz` + plugin manifest artifacts.
102
- - `Release Package` remains available for manual release/event-driven packaging flows.
103
- - To trigger a new publish, bump `package.json` version in the PR before merge.
475
+ - **Release Drafter** updates draft notes from merged PRs.
476
+ - **Release Publish** validates the build, publishes to npm when the
477
+ version is new, publishes an existing release draft or creates a new
478
+ GitHub Release, and uploads `.tgz` + plugin manifest artifacts.
479
+ - **Release Package** remains available for manual / event-driven flows.
104
480
 
105
- Required repository secret:
481
+ Bump `package.json` in the PR to trigger a new publish.
106
482
 
107
- - `NPM_TOKEN` with publish access to the npm package.
483
+ Required repository secret: `NPM_TOKEN` with publish access.
108
484
 
109
- ## What Gets Generated
110
-
111
- ```text
112
- .cclaw/
113
- ├── skills/
114
- ├── commands/
115
- ├── hooks/
116
- ├── templates/
117
- ├── references/
118
- ├── artifacts/ # active feature artifacts
119
- ├── state/
120
- ├── knowledge.jsonl # append-only strict-schema rule/pattern/lesson log
121
- └── runs/ # archived feature snapshots (YYYY-MM-DD-feature-name)
122
- ```
123
-
124
- ## Harness Integration
125
-
126
- Supported harnesses: `claude`, `cursor`, `opencode`, `codex`. The full
127
- per-harness tier/capability matrix, install surface, and lifecycle details live in
128
- [docs/harnesses.md](./docs/harnesses.md).
485
+ ---
129
486
 
130
487
  ## License
131
488
 
package/dist/cli.d.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import type { FlowTrack, HarnessId, InitProfile } from "./types.js";
3
- import type { EvalTier } from "./eval/types.js";
3
+ import type { EvalMode } from "./eval/types.js";
4
4
  type CommandName = "init" | "sync" | "doctor" | "upgrade" | "uninstall" | "archive" | "eval";
5
5
  interface ParsedArgs {
6
6
  command?: CommandName;
@@ -18,7 +18,7 @@ interface ParsedArgs {
18
18
  archiveSkipRetro?: boolean;
19
19
  archiveSkipRetroReason?: string;
20
20
  evalStage?: string;
21
- evalTier?: EvalTier;
21
+ evalMode?: EvalMode;
22
22
  evalSchemaOnly?: boolean;
23
23
  evalRules?: boolean;
24
24
  evalJudge?: boolean;
@@ -26,10 +26,14 @@ interface ParsedArgs {
26
26
  evalNoWrite?: boolean;
27
27
  evalUpdateBaseline?: boolean;
28
28
  evalConfirm?: boolean;
29
- /** Optional subcommand after `eval`. Currently only `diff` is supported. */
30
- evalSubcommand?: "diff";
29
+ evalQuiet?: boolean;
30
+ evalMaxCostUsd?: number;
31
+ /** Optional subcommand after `eval`. */
32
+ evalSubcommand?: "diff" | "runs";
31
33
  /** Positional arguments for eval subcommands (e.g. `diff <old> <new>`). */
32
34
  evalArgs?: string[];
35
+ evalBackground?: boolean;
36
+ evalCompareModel?: string;
33
37
  showHelp?: boolean;
34
38
  showVersion?: boolean;
35
39
  }