@curdx/flow 2.2.0 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +19 -2
- package/README.md +15 -8
- package/README.zh.md +5 -3
- package/agent-preamble/preamble.md +33 -0
- package/agents/flow-adversary.md +1 -1
- package/agents/flow-architect.md +2 -1
- package/agents/flow-brownfield-analyst.md +153 -0
- package/agents/flow-debugger.md +6 -11
- package/agents/flow-edge-hunter.md +1 -1
- package/agents/flow-executor.md +30 -8
- package/agents/flow-planner.md +38 -5
- package/agents/flow-product-designer.md +2 -1
- package/agents/flow-qa-engineer.md +9 -5
- package/agents/flow-researcher.md +2 -1
- package/agents/flow-reviewer.md +23 -5
- package/agents/flow-security-auditor.md +5 -3
- package/agents/flow-triage-analyst.md +5 -24
- package/agents/flow-ui-researcher.md +4 -3
- package/agents/flow-ux-designer.md +12 -39
- package/agents/flow-verifier.md +35 -3
- package/cli/README.md +3 -1
- package/cli/doctor-workflow.js +165 -2
- package/cli/doctor.js +8 -0
- package/cli/help.js +2 -0
- package/cli/lib/doctor-claude-settings.js +736 -0
- package/cli/lib/doctor-report.js +256 -1
- package/cli/lib/doctor-runtime-environment.js +196 -0
- package/cli/lib/frontmatter.js +44 -0
- package/cli/lib/json-schema.js +57 -0
- package/cli/lib/runtime.js +20 -2
- package/cli/lib/semver.js +14 -0
- package/cli/uninstall-actions.js +323 -0
- package/cli/uninstall.js +9 -253
- package/cli/utils.js +6 -1
- package/gates/adversarial-review-gate.md +1 -1
- package/gates/security-gate.md +2 -2
- package/gates/test-quality-gate.md +59 -0
- package/hooks/hooks.json +16 -2
- package/hooks/scripts/common.sh +4 -0
- package/hooks/scripts/session-start.sh +17 -2
- package/hooks/scripts/stop-watcher.sh +69 -18
- package/hooks/scripts/subagent-artifact-guard.sh +159 -0
- package/hooks/scripts/subagent-statusline.sh +105 -0
- package/knowledge/atomic-commits.md +1 -1
- package/knowledge/claude-code-runtime-contracts.md +203 -0
- package/knowledge/epic-decomposition.md +1 -1
- package/knowledge/execution-strategies.md +23 -1
- package/knowledge/planning-reviews.md +2 -2
- package/knowledge/poc-first-workflow.md +8 -8
- package/knowledge/review-feedback-intake.md +57 -0
- package/knowledge/two-stage-review.md +19 -6
- package/knowledge/wave-execution.md +16 -1
- package/output-styles/curdx-evidence-first.md +34 -0
- package/package.json +7 -1
- package/schemas/agent-frontmatter.schema.json +0 -7
- package/schemas/config.schema.json +14 -0
- package/schemas/hooks.schema.json +34 -2
- package/schemas/output-style-frontmatter.schema.json +22 -0
- package/schemas/plugin-manifest.schema.json +387 -17
- package/schemas/plugin-settings.schema.json +29 -0
- package/schemas/skill-frontmatter.schema.json +109 -4
- package/schemas/spec-state.schema.json +29 -4
- package/settings.json +6 -0
- package/skills/brownfield-index/SKILL.md +31 -35
- package/skills/browser-qa/SKILL.md +11 -3
- package/skills/cancel/SKILL.md +82 -0
- package/skills/debug/SKILL.md +6 -2
- package/skills/epic/SKILL.md +5 -3
- package/skills/fast/SKILL.md +1 -0
- package/skills/help/SKILL.md +17 -7
- package/skills/implement/SKILL.md +38 -7
- package/skills/init/SKILL.md +2 -1
- package/skills/review/SKILL.md +4 -1
- package/skills/security-audit/SKILL.md +17 -3
- package/skills/spec/SKILL.md +2 -1
- package/skills/start/SKILL.md +18 -18
- package/skills/status/SKILL.md +85 -0
- package/skills/ui-sketch/SKILL.md +11 -3
- package/skills/verify/SKILL.md +13 -1
- package/templates/config.json.tmpl +4 -1
- package/templates/progress.md.tmpl +19 -0
- package/templates/tasks.md.tmpl +26 -3
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# Claude Code Runtime Contracts — CurDX-Flow Notes
|
|
2
|
+
|
|
3
|
+
CurDX-Flow depends on Claude Code's plugin, hook, skill, and subagent runtime surfaces. This page records the operational contracts we rely on so agents and maintainers do not drift from the current official behavior.
|
|
4
|
+
|
|
5
|
+
## Source of Truth
|
|
6
|
+
|
|
7
|
+
- Official docs entry: `https://code.claude.com/docs/en/overview`
|
|
8
|
+
- Runtime-specific pages to re-check when changing behavior:
|
|
9
|
+
- Hooks: `/docs/en/hooks`
|
|
10
|
+
- Subagents: `/docs/en/sub-agents`
|
|
11
|
+
- Skills: `/docs/en/skills`
|
|
12
|
+
- Commands: `/docs/en/commands`
|
|
13
|
+
- Plugins: `/docs/en/plugins`
|
|
14
|
+
- Settings: `/docs/en/settings`
|
|
15
|
+
- Plugin manifest reference: `/docs/en/plugins-reference`
|
|
16
|
+
- Output styles: `/docs/en/output-styles`
|
|
17
|
+
- Status line: `/docs/en/statusline`
|
|
18
|
+
- Plugin dependency constraints: `/docs/en/plugin-dependencies`
|
|
19
|
+
- Routines / scheduled tasks: `/docs/en/routines`, `/docs/en/scheduled-tasks`
|
|
20
|
+
|
|
21
|
+
When a behavior is unclear, prefer the official docs and `claude plugin validate .` over inferred behavior from older examples.
|
|
22
|
+
|
|
23
|
+
## Hook Output Rules
|
|
24
|
+
|
|
25
|
+
- `SessionStart` context injection must use:
|
|
26
|
+
- `hookSpecificOutput.hookEventName = "SessionStart"`
|
|
27
|
+
- `hookSpecificOutput.additionalContext = "..."`
|
|
28
|
+
- Persistent environment for later hook/script invocations must be written to `CLAUDE_ENV_FILE` as shell exports. Do not invent a JSON top-level `environmentVariables` field.
|
|
29
|
+
- `Stop` / `SubagentStop` continuation blocking uses top-level `decision: "block"` plus `reason`.
|
|
30
|
+
- `PreToolUse` denial uses `hookSpecificOutput.permissionDecision = "deny"` and `permissionDecisionReason`.
|
|
31
|
+
- `PreToolUse` also supports `hookSpecificOutput.permissionDecision = "defer"` for deferred tool handling in `-p` / SDK-style flows; do not assume deny/allow are the only valid permission outcomes.
|
|
32
|
+
- `PermissionDenied` can return `{ "retry": true }` to let Claude try a different approach after an auto-mode classifier denial.
|
|
33
|
+
- Hooks must fail open when runtime prerequisites are missing (`python3`, malformed stdin JSON, absent `.flow/` state). The exception is an explicit, success-looking subagent completion with a missing required artifact.
|
|
34
|
+
|
|
35
|
+
## Subagent Artifact Discipline
|
|
36
|
+
|
|
37
|
+
Subagents that produce long reports must write the artifact before producing the final assistant summary. The final summary should be short and point to the file path.
|
|
38
|
+
|
|
39
|
+
Guarded artifact targets:
|
|
40
|
+
|
|
41
|
+
| Agent | Expected artifact |
|
|
42
|
+
| --- | --- |
|
|
43
|
+
| `flow-researcher` | `.flow/specs/<active>/research.md` |
|
|
44
|
+
| `flow-product-designer` | `.flow/specs/<active>/requirements.md` |
|
|
45
|
+
| `flow-architect` | `.flow/specs/<active>/design.md` |
|
|
46
|
+
| `flow-planner` | `.flow/specs/<active>/tasks.md` |
|
|
47
|
+
| `flow-reviewer` | `.flow/specs/<active>/review-report.md` |
|
|
48
|
+
| `flow-verifier` | `.flow/specs/<active>/verification-report.md` |
|
|
49
|
+
| `flow-security-auditor` | `.flow/specs/<active>/security-audit.md` |
|
|
50
|
+
| `flow-qa-engineer` | `.flow/specs/<active>/qa-report.md` |
|
|
51
|
+
| `flow-edge-hunter` | `.flow/specs/<active>/edge-cases.md` |
|
|
52
|
+
| `flow-adversary` | `.flow/specs/<active>/adversarial-review.md` |
|
|
53
|
+
| `flow-ui-researcher` | `.flow/specs/<active>/ui-research.md` |
|
|
54
|
+
| `flow-brownfield-analyst` | `.flow/codebase-index.md` |
|
|
55
|
+
|
|
56
|
+
`flow-executor` is marker-driven rather than report-driven: it must update task state and end with `TASK_COMPLETE: <task_id>` or `TASK_FAILED: <task_id>`.
|
|
57
|
+
|
|
58
|
+
## Agent Teams Compatibility
|
|
59
|
+
|
|
60
|
+
- Official `agent-teams` behavior differs from regular subagent invocation in one critical way: when a subagent definition runs as a teammate, its `skills` and `mcpServers` frontmatter fields are not applied.
|
|
61
|
+
- Team coordination tools remain available to teammates, but any agent that relies on a preloaded skill must also have access to the `Skill` tool so it can invoke that skill explicitly when used as a teammate.
|
|
62
|
+
- A project file like `.claude/teams/teams.json` is not configuration. Official docs say team config lives under user scope, not project scope.
|
|
63
|
+
|
|
64
|
+
## Skills and Frontmatter
|
|
65
|
+
|
|
66
|
+
- Keep `SKILL.md` frontmatter minimal and schema-backed.
|
|
67
|
+
- Use `description` for the concise trigger phrase; put longer trigger examples in `when_to_use`.
|
|
68
|
+
- Use forked context and a named agent only when the skill's work benefits from isolation or a specialized role.
|
|
69
|
+
- Avoid preloading broad tool access. Prefer the smallest useful tool set per skill/agent.
|
|
70
|
+
- Do not make bundled skills or agents implicitly depend on runtime-gated tools such as `SendMessage`, `TeamCreate`, `TeamDelete`, or `ToolSearch` unless CurDX-Flow also ships the matching feature-flag/setup contract.
|
|
71
|
+
|
|
72
|
+
## Plugin Settings
|
|
73
|
+
|
|
74
|
+
- Claude Code plugin-root `settings.json` currently supports only `agent` and `subagentStatusLine`.
|
|
75
|
+
- CurDX-Flow ships only `subagentStatusLine`, pointing at `${CLAUDE_PLUGIN_ROOT}/hooks/scripts/subagent-statusline.sh`.
|
|
76
|
+
- The status-line script must fail open on malformed input or missing `python3`; UI decoration must never break agent execution.
|
|
77
|
+
- Plugin-root references must never traverse outside the plugin directory. Installed marketplace plugins run from Claude Code's plugin cache, so parent-directory references are invalid even if they work in a development checkout.
|
|
78
|
+
- If adding plugin settings, update `schemas/plugin-settings.schema.json`, `test/plugin-structure-contract.test.js`, `test/pack-tarball-smoke.test.js`, and `scripts/validate-plugin-contracts.mjs` in the same change.
|
|
79
|
+
|
|
80
|
+
## Plugin Dependency Constraints
|
|
81
|
+
|
|
82
|
+
- Official dependency version constraints require upstream plugin release tags in the `{plugin-name}--v{version}` format.
|
|
83
|
+
- Do not add a version constraint to the `context7-plugin` dependency unless the Upstash marketplace has matching `context7-plugin--v*` tags. A semver range without those tags can disable dependency resolution.
|
|
84
|
+
- Keep the CLI registry and `.claude-plugin/plugin.json` dependency entry aligned: Context7 remains a required companion plugin, while optional tools stay in `RECOMMENDED_PLUGINS`.
|
|
85
|
+
|
|
86
|
+
## Shared Settings Guardrails
|
|
87
|
+
|
|
88
|
+
- `.claude/settings.json` is a shared project surface. Keep machine-local scripts, secrets, and credential helpers out of it.
|
|
89
|
+
- Official docs say these keys are ignored or not accepted at project scope and must live in user/local/managed settings instead:
|
|
90
|
+
- `autoMemoryDirectory`
|
|
91
|
+
- `autoMode`
|
|
92
|
+
- `useAutoModeDuringPlan`
|
|
93
|
+
- `permissions.skipDangerousModePermissionPrompt`
|
|
94
|
+
- `sshConfigs`
|
|
95
|
+
- `teammateMode` belongs in the global `~/.claude.json` config, not project `settings.json`.
|
|
96
|
+
- Treat shared auto-approval settings as high risk:
|
|
97
|
+
- `enableAllProjectMcpServers`
|
|
98
|
+
- `enabledMcpjsonServers`
|
|
99
|
+
- Treat shared hook and skill policy as behavior-changing:
|
|
100
|
+
- `disableSkillShellExecution: true` replaces inline shell output in project/plugin skills and commands with a disabled placeholder.
|
|
101
|
+
- Empty `allowedHttpHookUrls` blocks all HTTP hook targets.
|
|
102
|
+
- Empty `httpHookAllowedEnvVars` prevents HTTP hook header environment interpolation.
|
|
103
|
+
- Treat shared `env` injection as behavior-changing when it flips Claude runtime modes:
|
|
104
|
+
- `CLAUDE_CODE_SIMPLE=1` puts Claude Code into bare/simple mode and disables hooks, skills, plugins, MCP discovery, auto memory, and `CLAUDE.md`.
|
|
105
|
+
- `CLAUDE_CODE_SIMPLE_SYSTEM_PROMPT=1` keeps discovery enabled but swaps in the minimal Claude system prompt.
|
|
106
|
+
- `CLAUDE_CODE_EFFORT_LEVEL=low|medium` lowers reasoning for every collaborator session.
|
|
107
|
+
- `CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1` enables experimental teammate surfaces for every collaborator session.
|
|
108
|
+
- Provider-specific pinned model IDs (`ANTHROPIC_DEFAULT_*_MODEL`, `ANTHROPIC_CUSTOM_MODEL_OPTION`) should usually be paired with `_SUPPORTED_CAPABILITIES` so Claude keeps effort / thinking feature detection.
|
|
109
|
+
- In CI / headless runs, `CLAUDE_CODE_SYNC_PLUGIN_INSTALL=1` makes marketplace plugins available before the first turn; otherwise they can install in the background and miss turn one.
|
|
110
|
+
- `CLAUDE_CODE_PLUGIN_SEED_DIR` is the official way to pre-populate marketplace plugins in containers and CI images.
|
|
111
|
+
- Treat shared sandbox policy as runtime-sensitive:
|
|
112
|
+
- `sandbox.failIfUnavailable: true` can fail Claude Code startup on unsupported hosts.
|
|
113
|
+
- `sandbox.filesystem.denyRead` / `denyWrite` must not block `.flow`, `.git`, or the project root.
|
|
114
|
+
- Empty `sandbox.network.allowedDomains` blocks outbound network access for sandboxed commands.
|
|
115
|
+
- Prefer `attribution` over deprecated `includeCoAuthoredBy`.
|
|
116
|
+
- Treat shared runtime blockers as high risk for CurDX-Flow:
|
|
117
|
+
- `disableAllHooks: true` disables stop-hook recovery, artifact guards, and custom status lines.
|
|
118
|
+
- `agent: "<name>"` routes the main thread through a named subagent, replacing the normal CurDX-Flow prompt, tool surface, and model for the whole session.
|
|
119
|
+
- `permissions.defaultMode: "dontAsk"` can auto-deny clarification and Agent dispatch prompts.
|
|
120
|
+
- `permissions.deny` rules for `Agent`, `AskUserQuestion`, CurDX-Flow `flow-*` agents, or broad `Bash` / `Monitor` / `Read` / `Write` / `Edit` / `Grep` / `Glob` tools can make workflows fail.
|
|
121
|
+
- `availableModels` must include the portable `sonnet` and `opus` aliases used by bundled agents.
|
|
122
|
+
- Shared `effortLevel: "low"` or `"medium"` may underpower main-thread planning/review turns; prefer `high` / `xhigh` for CurDX-Flow-heavy projects.
|
|
123
|
+
- `CLAUDE_CODE_SIMPLE=1` in the launch environment is a hard runtime blocker for CurDX-Flow because Claude stops discovering plugin assets and `CLAUDE.md`.
|
|
124
|
+
- `CLAUDE_CODE_SIMPLE_SYSTEM_PROMPT=1` in the launch environment is not a hard blocker, but it weakens the normal Claude Code system prompt CurDX-Flow expects.
|
|
125
|
+
- Provider-specific model IDs in `ANTHROPIC_DEFAULT_*_MODEL` or `ANTHROPIC_CUSTOM_MODEL_OPTION` can disable feature detection for effort and thinking unless the matching `_SUPPORTED_CAPABILITIES` env var is declared.
|
|
126
|
+
- In CI / `claude -p` runs that depend on marketplace plugins, missing `CLAUDE_CODE_SYNC_PLUGIN_INSTALL=1` (or a seeded plugin cache via `CLAUDE_CODE_PLUGIN_SEED_DIR`) can leave plugins unavailable on the first turn.
|
|
127
|
+
- Prefer `claude --bare -p` for CI / scripted runs so hooks, skills, plugins, MCP discovery, auto memory, and `CLAUDE.md` do not vary by machine; add `--plugin-dir`, `--settings`, and `--mcp-config` explicitly when needed.
|
|
128
|
+
- Do not depend on interactive `/curdx-flow:*` slash commands in `claude -p`; scripted runs should ask for the desired outcome directly.
|
|
129
|
+
- `settings.json` does not accept `effortLevel: "max"`; official docs reserve `max` for session-only `/effort` (or `CLAUDE_CODE_EFFORT_LEVEL`), so do not commit it to shared project settings.
|
|
130
|
+
- `enabledPlugins` entries set to `false` for `curdx-flow@curdx-flow-marketplace` or required companion plugins override user-level installs in that project.
|
|
131
|
+
|
|
132
|
+
## Browser and UI Verification
|
|
133
|
+
|
|
134
|
+
For UI-facing acceptance criteria, code inspection and DOM unit tests are not sufficient evidence. Use `chrome-devtools` MCP when available to drive the real browser, capture screenshots, list console messages, and inspect network requests. If the MCP is unavailable, mark UI-facing acceptance criteria as unverified instead of silently passing them.
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
## Reality Verification Contract
|
|
138
|
+
|
|
139
|
+
For fix/debug/regression specs, green tests alone do not prove the user-visible problem was fixed. The workflow must preserve a BEFORE/AFTER evidence trail:
|
|
140
|
+
|
|
141
|
+
1. BEFORE: record the original reproduction command, observed failure output, and timestamp in `.progress.md` before changing code.
|
|
142
|
+
2. FIX: change the smallest root cause and run the task's Verify command.
|
|
143
|
+
3. AFTER: rerun the original reproduction command and compare output against BEFORE.
|
|
144
|
+
4. COMPLETE: write `Verified: Issue resolved` only when the original failure is gone.
|
|
145
|
+
|
|
146
|
+
Planner duties:
|
|
147
|
+
- Add a `VF` task for fix/debug specs unless `STATE.md` has an explicit D-NN waiver.
|
|
148
|
+
- Treat missing `VF` coverage as a coverage-audit gap.
|
|
149
|
+
|
|
150
|
+
Executor duties:
|
|
151
|
+
- Do not mark `VF` complete unless `.progress.md` has the BEFORE/AFTER comparison.
|
|
152
|
+
- Use the same reproduction command for AFTER unless a documented D-NN explains why the command changed.
|
|
153
|
+
|
|
154
|
+
Verifier duties:
|
|
155
|
+
- Mark fix/debug specs `PARTIAL` when BEFORE/AFTER evidence is missing, even if the normal test suite is green.
|
|
156
|
+
|
|
157
|
+
## Task Split Contract
|
|
158
|
+
|
|
159
|
+
When a task is too broad, under-specified, or unsafe to complete surgically, the executor must stop rather than expand scope. It returns `TASK_FAILED` with a split proposal containing at most 3 replacement tasks, each with `Do`, `Files`, `Done when`, `Verify`, and `Commit` fields.
|
|
160
|
+
|
|
161
|
+
The coordinator or planner owns updates to `tasks.md`. An executor must not create new tasks and execute them in the same turn.
|
|
162
|
+
|
|
163
|
+
## Failure Recovery Contract
|
|
164
|
+
|
|
165
|
+
Execution failure recovery is ledger-first:
|
|
166
|
+
|
|
167
|
+
- Default `manual` recovery blocks progress past `TASK_FAILED`; retry the first unchecked task after root-cause analysis.
|
|
168
|
+
- `fix-task` recovery may create one targeted `[FIX <task_id>]` task immediately after the failed task, but only before execution resumes.
|
|
169
|
+
- `.state.json` `execute_state.fix_task_map` records attempts, generated fix task ids, and the last error per original task.
|
|
170
|
+
- `max_fix_tasks_per_original` is a hard ceiling, not a suggestion.
|
|
171
|
+
|
|
172
|
+
Generated fix tasks must include `Do`, `Files`, `Done when`, `Verify`, and `Commit`. A recovery task that cannot name a verification command is not actionable and should stop for user input rather than guessing.
|
|
173
|
+
|
|
174
|
+
## Stop-Hook Recovery Contract
|
|
175
|
+
|
|
176
|
+
The stop-hook strategy must never trust one source of completion by itself:
|
|
177
|
+
|
|
178
|
+
- `.state.json` tracks execution cursor and phase.
|
|
179
|
+
- `tasks.md` is the task ledger; unchecked tasks mean work remains.
|
|
180
|
+
- `ALL_TASKS_COMPLETE` is a signal, not proof.
|
|
181
|
+
|
|
182
|
+
Completion requires both completed state and zero unchecked tasks. If they disagree, continue `tasks.md`'s unchecked tasks and do not add new tasks. When Claude Code sends `stop_hook_active=true`, allow stop to prevent recursive stop-hook loops; resume from persisted state on the next turn.
|
|
183
|
+
|
|
184
|
+
## Status / Cancel Contract
|
|
185
|
+
|
|
186
|
+
`/curdx-flow:status` is read-only. It must compare both machine state (`.state.json`) and human task ledger (`tasks.md`) before reporting health. If they disagree, report `NEEDS_ATTENTION` and give one concrete recovery command.
|
|
187
|
+
|
|
188
|
+
`/curdx-flow:cancel` is non-destructive by default. It cancels execution state while preserving spec artifacts, progress, reports, and project-level `.flow` files. Deleting a spec requires both `--delete-spec` and `--yes`.
|
|
189
|
+
|
|
190
|
+
If state JSON is corrupt, preserve it by renaming to `.state.json.corrupt.<timestamp>` rather than deleting it. Recovery commands should prefer `/curdx-flow:status` followed by `/curdx-flow:implement --strategy=subagent`.
|
|
191
|
+
|
|
192
|
+
## Test Quality Contract
|
|
193
|
+
|
|
194
|
+
Tests used as FR/AC evidence must exercise real behavior. Mock-only tests are not proof of implementation.
|
|
195
|
+
|
|
196
|
+
Blocking evidence problems:
|
|
197
|
+
- The test only asserts mock/spies were called.
|
|
198
|
+
- The real module/function under test is not invoked.
|
|
199
|
+
- The test is skipped, assertion-free, or would pass with an empty implementation.
|
|
200
|
+
- Mock setup overwhelms behavioral assertions and no integration/e2e backup exists.
|
|
201
|
+
- Stateful mocks are not cleaned up between tests.
|
|
202
|
+
|
|
203
|
+
Mocks are acceptable for boundaries (network, payment provider, clock/randomness) when the assertion still verifies production logic. If a requirement is backed only by weak tests, `/curdx-flow:verify` and `/curdx-flow:review` must not return full PASS.
|
|
@@ -250,7 +250,7 @@ Week 5-6: Spec 4 (refund) + Spec 5 (query)
|
|
|
250
250
|
/curdx-flow:review
|
|
251
251
|
↓
|
|
252
252
|
5. All sub-specs done → Epic complete
|
|
253
|
-
6.
|
|
253
|
+
6. Record an epic-level retrospective in `.flow/_epics/<name>/epic.md`
|
|
254
254
|
```
|
|
255
255
|
|
|
256
256
|
---
|
|
@@ -106,6 +106,8 @@ Or (default):
|
|
|
106
106
|
- Main agent executes 1 task → ends naturally (Stop event)
|
|
107
107
|
- `stop-watcher.sh` hook fires
|
|
108
108
|
- Checks whether `.state.json` still has unfinished tasks
|
|
109
|
+
- Cross-checks `tasks.md`; completion requires zero unchecked tasks as well as completed state
|
|
110
|
+
- Allows stop when Claude Code reports `stop_hook_active=true` to avoid recursive stop-hook loops
|
|
109
111
|
- If yes, it outputs `{"decision": "block", "reason": "continue task N"}`
|
|
110
112
|
- Claude Code treats `decision=block` as "issue another response round", and the main agent automatically continues with the next task
|
|
111
113
|
- Repeats until `TASK_FAILED` or `ALL_TASKS_COMPLETE`
|
|
@@ -119,6 +121,8 @@ main agent → task N → Stop → stop-watcher.sh
|
|
|
119
121
|
no tasks? → allow stop
|
|
120
122
|
```
|
|
121
123
|
|
|
124
|
+
Safety invariant: `ALL_TASKS_COMPLETE` is advisory, not authoritative. If `tasks.md` still has unchecked tasks, the hook blocks and tells the agent to continue those tasks instead of advancing to verify.
|
|
125
|
+
|
|
122
126
|
### When to use
|
|
123
127
|
- ✓ Long task chains (20+)
|
|
124
128
|
- ✓ Unattended execution (overnight automation)
|
|
@@ -219,6 +223,11 @@ return "linear"
|
|
|
219
223
|
- `"auto"` — use the decision tree above (default)
|
|
220
224
|
- Other concrete strategies
|
|
221
225
|
|
|
226
|
+
Execution failure recovery is explicit:
|
|
227
|
+
- `recovery_mode: "manual"` — default; never skip a failed task, retry from the first unchecked task after root-cause analysis.
|
|
228
|
+
- `recovery_mode: "fix-task"` — insert a targeted `[FIX <task_id>]` task before retrying the original failure.
|
|
229
|
+
- `max_fix_tasks_per_original` — hard ceiling for generated fix tasks per original task.
|
|
230
|
+
|
|
222
231
|
---
|
|
223
232
|
|
|
224
233
|
## Failure Handling (common to all strategies)
|
|
@@ -233,6 +242,19 @@ Step D: if ≥3 retries fail with no new hypothesis, stop and challenge the arch
|
|
|
233
242
|
Step E: report TASK_FAILED
|
|
234
243
|
```
|
|
235
244
|
|
|
245
|
+
If fix-task recovery is enabled, the coordinator turns a `TASK_FAILED` into a ledgered repair task before doing more work:
|
|
246
|
+
|
|
247
|
+
```markdown
|
|
248
|
+
- [ ] **<task_id>.<n>** [FIX <task_id>] Fix: <root cause>
|
|
249
|
+
- **Do**: <repair steps>
|
|
250
|
+
- **Files**: <same files as the original task or narrower>
|
|
251
|
+
- **Done when**: Original failure no longer reproduces
|
|
252
|
+
- **Verify**: <original verify command or tighter reproduction>
|
|
253
|
+
- **Commit**: `fix(<scope>): address <failure>`
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
The fix task must be written to `tasks.md` and tracked in `.state.json` `execute_state.fix_task_map` before it is executed. Executors do not invent and execute recovery work in the same turn.
|
|
257
|
+
|
|
236
258
|
### Extra protections for Stop-Hook strategy
|
|
237
259
|
- 3 consecutive TASK_FAILED → stop-watcher.sh halts the loop
|
|
238
260
|
- 10 consecutive Stop triggers with no progress → halt (anti-deadlock)
|
|
@@ -272,7 +294,7 @@ If you really must switch, do it manually:
|
|
|
272
294
|
- Next `/curdx-flow:start <name>` (or `/curdx-flow:start --resume`) resumes from `task_index`
|
|
273
295
|
|
|
274
296
|
### Snapshots
|
|
275
|
-
`/curdx-flow:
|
|
297
|
+
Claude Code checkpoints plus `.flow/specs/<name>/.progress.md` are the current recovery surface. Use `/curdx-flow:status` to inspect recoverability.
|
|
276
298
|
|
|
277
299
|
---
|
|
278
300
|
|
|
@@ -201,8 +201,8 @@ But for production-grade features, running through is strongly recommended.
|
|
|
201
201
|
|
|
202
202
|
## Difference from /curdx-flow:review
|
|
203
203
|
|
|
204
|
-
- **/curdx-flow:review
|
|
205
|
-
- **/curdx-flow:
|
|
204
|
+
- **/curdx-flow:review**: review **after code is finished** — Stage 1 compliance + Stage 2 quality
|
|
205
|
+
- **/curdx-flow:spec --review**: review **before code starts** — targets design.md
|
|
206
206
|
|
|
207
207
|
The two don't overlap. Plan Review prevents "doing the wrong thing", Code Review ensures "the thing was done right".
|
|
208
208
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# POC-First Workflow — 5 Phases
|
|
2
2
|
|
|
3
|
-
> Step-by-step methodology for the execution phase: get it running → clean up → add tests → pass quality gates →
|
|
3
|
+
> Step-by-step methodology for the execution phase: get it running → clean up → add tests → pass quality gates → hand off review-ready evidence.
|
|
4
4
|
>
|
|
5
5
|
> Agents reference this file via `@${CLAUDE_PLUGIN_ROOT}/knowledge/poc-first-workflow.md`.
|
|
6
6
|
|
|
@@ -15,7 +15,7 @@ Phase 1: Make It Work → end-to-end running; hard-coding allowed
|
|
|
15
15
|
Phase 2: Refactoring → clean up structure, behavior unchanged
|
|
16
16
|
Phase 3: Testing (TDD) → red-green-yellow loop to backfill tests
|
|
17
17
|
Phase 4: Quality Gates → tsc + lint + test all green
|
|
18
|
-
Phase 5:
|
|
18
|
+
Phase 5: Evidence Handoff → verify, review, prepare PR/release evidence
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
---
|
|
@@ -153,10 +153,10 @@ Each item produces **0 errors, 0 warnings** (warnings are not tolerated since th
|
|
|
153
153
|
|
|
154
154
|
---
|
|
155
155
|
|
|
156
|
-
## Phase 5:
|
|
156
|
+
## Phase 5: Evidence Handoff
|
|
157
157
|
|
|
158
158
|
### Goal
|
|
159
|
-
|
|
159
|
+
Feature is verified, reviewed, and ready for a human PR/release decision.
|
|
160
160
|
|
|
161
161
|
### Steps
|
|
162
162
|
|
|
@@ -165,7 +165,7 @@ Code merged to main, feature shipped.
|
|
|
165
165
|
- Commit message follows conventional format
|
|
166
166
|
- Squash if there are too many WIP commits
|
|
167
167
|
|
|
168
|
-
2. **
|
|
168
|
+
2. **Prepare PR/release evidence**
|
|
169
169
|
- Clear title (< 70 chars)
|
|
170
170
|
- Summary 3-5 lines covering why & what
|
|
171
171
|
- Include a test plan (checklist)
|
|
@@ -180,9 +180,9 @@ Code merged to main, feature shipped.
|
|
|
180
180
|
- Wait for CI green after every push
|
|
181
181
|
- Fix red immediately — don't pile up
|
|
182
182
|
|
|
183
|
-
5. **
|
|
183
|
+
5. **Hand off**
|
|
184
184
|
- Squash vs merge vs rebase: per project convention
|
|
185
|
-
-
|
|
185
|
+
- Use the host project's normal PR, merge, and release process
|
|
186
186
|
|
|
187
187
|
### Pitfalls
|
|
188
188
|
- **PR too large** → reviewer gives up. Split anything > 500 changed lines.
|
|
@@ -198,7 +198,7 @@ Code merged to main, feature shipped.
|
|
|
198
198
|
- Skip Refactoring / Testing / Quality Gates
|
|
199
199
|
|
|
200
200
|
### Fast mode (one-off task)
|
|
201
|
-
- Only Phase 1 + Phase 5 (
|
|
201
|
+
- Only Phase 1 + Phase 5 (handoff evidence stays lightweight)
|
|
202
202
|
- Suitable for: fixing a typo, adding a log, changing a constant
|
|
203
203
|
|
|
204
204
|
### Standard mode (default)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Review Feedback Intake — Verify Before Changing
|
|
2
|
+
|
|
3
|
+
CurDX-Flow treats review feedback as technical input, not orders to blindly implement. The goal is to fix real issues while avoiding scope creep, regressions, and performative agreement.
|
|
4
|
+
|
|
5
|
+
## Intake Pattern
|
|
6
|
+
|
|
7
|
+
For each review item:
|
|
8
|
+
|
|
9
|
+
1. **Read** the full finding, including severity, evidence, and suggested fix.
|
|
10
|
+
2. **Restate** the technical requirement in one sentence.
|
|
11
|
+
3. **Verify** against the codebase/spec:
|
|
12
|
+
- Is the finding true at the referenced path/line?
|
|
13
|
+
- Does it violate an FR, AC, AD, gate, test, or user decision?
|
|
14
|
+
- Does the suggested fix break existing behavior or platform constraints?
|
|
15
|
+
4. **Classify**:
|
|
16
|
+
- `BLOCKER`: correctness, security, missing requirement, failing verify, broken CI.
|
|
17
|
+
- `IMPORTANT`: maintainability or test gap that should be fixed before ship.
|
|
18
|
+
- `SUGGESTION`: non-blocking improvement or preference.
|
|
19
|
+
- `PUSHBACK`: technically wrong, violates YAGNI, conflicts with D-NN, or lacks evidence.
|
|
20
|
+
5. **Act one item at a time**:
|
|
21
|
+
- Fix blockers first.
|
|
22
|
+
- Run the smallest relevant verification after each fix.
|
|
23
|
+
- Record pushback with evidence instead of silently ignoring it.
|
|
24
|
+
|
|
25
|
+
## Required Artifact
|
|
26
|
+
|
|
27
|
+
When review produces any nontrivial feedback, append a section to `.flow/specs/<active>/.progress.md`:
|
|
28
|
+
|
|
29
|
+
```markdown
|
|
30
|
+
## Review Feedback Intake YYYY-MM-DD
|
|
31
|
+
|
|
32
|
+
| Item | Source | Classification | Decision | Evidence | Follow-up |
|
|
33
|
+
|---|---|---|---|---|---|
|
|
34
|
+
| R-01 | review-report.md#... | BLOCKER | fix | `npm test` fails AC-2.1 | Task 4.4 |
|
|
35
|
+
| R-02 | review-report.md#... | PUSHBACK | defer | D-07 says no CSV export | none |
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Pushback Rules
|
|
39
|
+
|
|
40
|
+
Push back when the feedback:
|
|
41
|
+
|
|
42
|
+
- Adds unused features or speculative architecture.
|
|
43
|
+
- Conflicts with explicit user decisions (`D-NN`).
|
|
44
|
+
- Breaks compatibility that the current code intentionally preserves.
|
|
45
|
+
- Is unsupported by evidence and cannot be reproduced.
|
|
46
|
+
- Optimizes style while leaving spec compliance unresolved.
|
|
47
|
+
|
|
48
|
+
Pushback must be technical: cite code, tests, specs, or decisions. Do not use emotional language.
|
|
49
|
+
|
|
50
|
+
## Fix Loop
|
|
51
|
+
|
|
52
|
+
1. Intake review items.
|
|
53
|
+
2. Convert accepted blockers/important issues into tasks or direct fixes.
|
|
54
|
+
3. Run targeted verification per item.
|
|
55
|
+
4. Re-run `/curdx-flow:verify` when behavior changed.
|
|
56
|
+
5. Re-run `/curdx-flow:review` until blockers are gone.
|
|
57
|
+
|
|
@@ -111,13 +111,20 @@ Stage 2 applies all enabled Gates (from `.flow/config.json`):
|
|
|
111
111
|
|
|
112
112
|
- All 4 sources (FR / AD / Research / Decisions) covered?
|
|
113
113
|
|
|
114
|
-
#### 2.5
|
|
114
|
+
#### 2.5 Test quality (test-quality-gate)
|
|
115
|
+
|
|
116
|
+
- Do tests used as FR/AC evidence exercise real behavior, not only mocks/spies?
|
|
117
|
+
- Are skipped/assertion-free tests excluded from evidence?
|
|
118
|
+
- Are mock-heavy tests backed by integration/e2e coverage or a documented boundary rationale?
|
|
119
|
+
- Are stateful mocks cleaned up between tests?
|
|
120
|
+
|
|
121
|
+
#### 2.6 (enterprise) Adversarial review (adversarial-review-gate)
|
|
115
122
|
|
|
116
123
|
- Every applicable category examined (N/A documented for the rest)?
|
|
117
124
|
- Findings proportional to real issues (zero is OK with a proof-of-checking report)?
|
|
118
125
|
- Each finding has evidence + recommendation?
|
|
119
126
|
|
|
120
|
-
#### 2.
|
|
127
|
+
#### 2.7 (enterprise) Edge cases (edge-case-gate)
|
|
121
128
|
|
|
122
129
|
- Each applicable edge-case category addressed (N/A noted for the rest)?
|
|
123
130
|
- Gap list has priorities?
|
|
@@ -168,9 +175,15 @@ When the review turns up issues, the typical flow:
|
|
|
168
175
|
↓
|
|
169
176
|
4. /curdx-flow:review re-review
|
|
170
177
|
↓
|
|
171
|
-
5. Until APPROVED →
|
|
178
|
+
5. Until APPROVED → hand off with review-report.md + atomic commits
|
|
172
179
|
```
|
|
173
180
|
|
|
181
|
+
Before implementing review feedback, apply `@${CLAUDE_PLUGIN_ROOT}/knowledge/review-feedback-intake.md`:
|
|
182
|
+
- Verify each finding against code/spec reality.
|
|
183
|
+
- Classify as `BLOCKER`, `IMPORTANT`, `SUGGESTION`, or `PUSHBACK`.
|
|
184
|
+
- Fix accepted items one at a time with targeted verification.
|
|
185
|
+
- Record technical pushback in `.progress.md` instead of silently ignoring feedback.
|
|
186
|
+
|
|
174
187
|
---
|
|
175
188
|
|
|
176
189
|
## Failure Modes of Two Stages
|
|
@@ -219,15 +232,15 @@ Some reviewers list 50 minor improvements — the user can't process.
|
|
|
219
232
|
↓ ↓
|
|
220
233
|
↓ review-report.md
|
|
221
234
|
↓
|
|
222
|
-
(optional) /curdx-flow:
|
|
235
|
+
(optional) /curdx-flow:review --adversarial --edge-case
|
|
223
236
|
↓
|
|
224
237
|
adversarial-review.md
|
|
225
238
|
edge-cases.md
|
|
226
239
|
↓
|
|
227
|
-
/
|
|
240
|
+
Ready for human PR/release handoff with verification + review evidence
|
|
228
241
|
```
|
|
229
242
|
|
|
230
|
-
Verify is "did we implement the right thing", Review is "is the implementation good", Audit is "what else could be better".
|
|
243
|
+
Verify is "did we implement the right thing", Review is "is the implementation good", Audit is "what else could be better". CurdX-Flow currently stops at evidence-backed handoff; do not reference non-existent ship/land commands.
|
|
231
244
|
|
|
232
245
|
---
|
|
233
246
|
|
|
@@ -12,6 +12,12 @@
|
|
|
12
12
|
|
|
13
13
|
A wave is **a consecutive run of `[P]`-marked tasks**. Within a wave, run in parallel; across waves, run serially.
|
|
14
14
|
|
|
15
|
+
Hard limits:
|
|
16
|
+
- Max 5 tasks per wave (`max_parallel` ceiling). More than 5 tasks must be split by a `[VERIFY]` checkpoint or a serial boundary.
|
|
17
|
+
- Every task in a wave owns a disjoint `Files` set.
|
|
18
|
+
- Shared config/barrel/registry files are serial by default: `package.json`, lockfiles, `tsconfig.*`, `index.ts`, router registries, migration manifests, generated schema registries.
|
|
19
|
+
- Read-after-write is a conflict even when file paths differ: if task B imports, tests, or configures output from task A, B must run in a later wave.
|
|
20
|
+
|
|
15
21
|
```
|
|
16
22
|
tasks.md:
|
|
17
23
|
1.1 [P] create auth directory
|
|
@@ -82,9 +88,13 @@ def analyze_waves(tasks):
|
|
|
82
88
|
def has_file_conflict(task, wave):
|
|
83
89
|
"""Do task's Files intersect any wave task's Files?"""
|
|
84
90
|
task_files = set(task.files)
|
|
91
|
+
if touches_shared_serial_surface(task_files):
|
|
92
|
+
return True
|
|
85
93
|
for other in wave:
|
|
86
94
|
if task_files & set(other.files):
|
|
87
95
|
return True
|
|
96
|
+
if has_read_after_write_dependency(task, other):
|
|
97
|
+
return True
|
|
88
98
|
return False
|
|
89
99
|
```
|
|
90
100
|
|
|
@@ -92,6 +102,7 @@ Rules:
|
|
|
92
102
|
- Two `[P]` tasks editing the same file → conflict, must split into different waves
|
|
93
103
|
- Two `[P]` tasks creating different files → OK
|
|
94
104
|
- One reads what another writes → **conflict** (reads aren't guaranteed to see latest)
|
|
105
|
+
- More than 5 `[P]` tasks in one consecutive run → split the wave before dispatch
|
|
95
106
|
|
|
96
107
|
---
|
|
97
108
|
|
|
@@ -335,13 +346,17 @@ Progress: Wave 2/5 (60%)
|
|
|
335
346
|
"execution": {
|
|
336
347
|
"strategy": "wave",
|
|
337
348
|
"max_parallel": 5,
|
|
338
|
-
"wave_fail_policy": "continue-on-single | stop-on-any"
|
|
349
|
+
"wave_fail_policy": "continue-on-single | stop-on-any",
|
|
350
|
+
"recovery_mode": "manual | fix-task",
|
|
351
|
+
"max_fix_tasks_per_original": 2
|
|
339
352
|
}
|
|
340
353
|
}
|
|
341
354
|
```
|
|
342
355
|
|
|
343
356
|
- `max_parallel`: maximum parallel tasks per wave (default 5, to avoid API rate limits)
|
|
344
357
|
- `wave_fail_policy`: default behavior on single task failure
|
|
358
|
+
- `recovery_mode`: whether a failed wave task blocks for manual retry or creates a targeted `[FIX <task_id>]` task before retry
|
|
359
|
+
- `max_fix_tasks_per_original`: maximum fix tasks generated for one original task
|
|
345
360
|
|
|
346
361
|
---
|
|
347
362
|
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: CurdX Evidence-First
|
|
3
|
+
description: Concise, engineering-focused replies with explicit validation status, assumptions, and next actions.
|
|
4
|
+
keep-coding-instructions: true
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# CurdX Evidence-First
|
|
8
|
+
|
|
9
|
+
You are still Claude Code. Keep the default coding workflow, safety rules,
|
|
10
|
+
tool usage behavior, and verification discipline.
|
|
11
|
+
|
|
12
|
+
## Response priorities
|
|
13
|
+
|
|
14
|
+
1. Lead with the concrete outcome or current state.
|
|
15
|
+
2. State validation status explicitly:
|
|
16
|
+
- `Validated` when you actually ran checks and they passed
|
|
17
|
+
- `Unvalidated` when you did not run checks yet
|
|
18
|
+
- `Blocked` when validation could not be completed
|
|
19
|
+
3. Separate observed facts from assumptions or proposals.
|
|
20
|
+
4. Keep answers concise and operational; avoid filler and cheerleading.
|
|
21
|
+
5. When work is incomplete, state the next highest-value action plainly.
|
|
22
|
+
|
|
23
|
+
## Completion discipline
|
|
24
|
+
|
|
25
|
+
- Never imply something is done without evidence.
|
|
26
|
+
- If tests, builds, browser checks, or docs validation were not run, say so directly.
|
|
27
|
+
- If you are making a best-effort inference, label it as an inference.
|
|
28
|
+
- When relevant, point to the exact file paths or commands that support the claim.
|
|
29
|
+
|
|
30
|
+
## Formatting
|
|
31
|
+
|
|
32
|
+
- Use short section headers only when they improve scanability.
|
|
33
|
+
- Prefer short bullet lists over long prose.
|
|
34
|
+
- Match the user's language.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@curdx/flow",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.4",
|
|
4
4
|
"description": "CLI installer for CurdX-Flow — AI engineering workflow meta-framework for Claude Code",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,9 +20,11 @@
|
|
|
20
20
|
"hooks/",
|
|
21
21
|
"knowledge/",
|
|
22
22
|
"agent-preamble/",
|
|
23
|
+
"output-styles/",
|
|
23
24
|
"templates/",
|
|
24
25
|
"schemas/",
|
|
25
26
|
"skills/",
|
|
27
|
+
"settings.json",
|
|
26
28
|
"README.md",
|
|
27
29
|
"CHANGELOG.md",
|
|
28
30
|
"LICENSE"
|
|
@@ -48,5 +50,9 @@
|
|
|
48
50
|
"dependencies": {
|
|
49
51
|
"@clack/prompts": "^0.8.2",
|
|
50
52
|
"picocolors": "^1.1.1"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"ajv": "^8.18.0",
|
|
56
|
+
"yaml": "^2.8.3"
|
|
51
57
|
}
|
|
52
58
|
}
|
|
@@ -40,6 +40,19 @@
|
|
|
40
40
|
"type": "string",
|
|
41
41
|
"enum": ["continue-on-single", "stop-on-any"],
|
|
42
42
|
"default": "continue-on-single"
|
|
43
|
+
},
|
|
44
|
+
"recovery_mode": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"enum": ["manual", "fix-task"],
|
|
47
|
+
"default": "manual",
|
|
48
|
+
"description": "How /curdx-flow:implement handles TASK_FAILED during execution. manual blocks for retry; fix-task inserts targeted [FIX <task>] tasks before retrying."
|
|
49
|
+
},
|
|
50
|
+
"max_fix_tasks_per_original": {
|
|
51
|
+
"type": "integer",
|
|
52
|
+
"minimum": 1,
|
|
53
|
+
"maximum": 5,
|
|
54
|
+
"default": 2,
|
|
55
|
+
"description": "Maximum generated [FIX <task>] tasks allowed for a single original task when recovery_mode is fix-task."
|
|
43
56
|
}
|
|
44
57
|
}
|
|
45
58
|
},
|
|
@@ -109,6 +122,7 @@
|
|
|
109
122
|
"karpathy-gate",
|
|
110
123
|
"verification-gate",
|
|
111
124
|
"tdd-gate",
|
|
125
|
+
"test-quality-gate",
|
|
112
126
|
"coverage-audit-gate",
|
|
113
127
|
"adversarial-review-gate",
|
|
114
128
|
"edge-case-gate",
|