cc-devflow 4.5.9 → 4.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/.claude/skills/cc-act/CHANGELOG.md +11 -0
  2. package/.claude/skills/cc-act/SKILL.md +19 -10
  3. package/.claude/skills/cc-act/assets/PR_BRIEF_TEMPLATE.md +1 -1
  4. package/.claude/skills/cc-act/references/closure-contract.md +1 -1
  5. package/.claude/skills/cc-act/references/git-commit-guidelines.md +1 -1
  6. package/.claude/skills/cc-check/CHANGELOG.md +23 -0
  7. package/.claude/skills/cc-check/PLAYBOOK.md +1 -0
  8. package/.claude/skills/cc-check/SKILL.md +15 -9
  9. package/.claude/skills/cc-check/references/review-contract.md +7 -0
  10. package/.claude/skills/cc-check/scripts/render-report-card.js +6 -1
  11. package/.claude/skills/cc-dev/CHANGELOG.md +10 -0
  12. package/.claude/skills/cc-dev/SKILL.md +34 -2
  13. package/.claude/skills/cc-do/CHANGELOG.md +18 -0
  14. package/.claude/skills/cc-do/PLAYBOOK.md +7 -7
  15. package/.claude/skills/cc-do/SKILL.md +47 -40
  16. package/.claude/skills/cc-do/references/execution-recovery.md +18 -13
  17. package/.claude/skills/cc-do/scripts/build-task-context.sh +4 -17
  18. package/.claude/skills/cc-do/scripts/record-review-decision.sh +4 -5
  19. package/.claude/skills/cc-do/scripts/recover-workflow.sh +9 -11
  20. package/.claude/skills/cc-do/scripts/verify-task-gates.sh +12 -10
  21. package/.claude/skills/cc-do/scripts/write-task-checkpoint.sh +7 -29
  22. package/.claude/skills/cc-investigate/CHANGELOG.md +24 -0
  23. package/.claude/skills/cc-investigate/PLAYBOOK.md +10 -9
  24. package/.claude/skills/cc-investigate/SKILL.md +163 -417
  25. package/.claude/skills/cc-investigate/assets/TASKS_TEMPLATE.md +56 -10
  26. package/.claude/skills/cc-investigate/assets/TASK_MANIFEST_TEMPLATE.json +6 -6
  27. package/.claude/skills/cc-investigate/assets/{ANALYSIS_TEMPLATE.md → legacy/ANALYSIS_TEMPLATE.md} +1 -0
  28. package/.claude/skills/cc-investigate/references/investigation-contract.md +5 -4
  29. package/.claude/skills/cc-investigate/scripts/bootstrap-analysis.sh +1 -1
  30. package/.claude/skills/cc-plan/CHANGELOG.md +32 -0
  31. package/.claude/skills/cc-plan/PLAYBOOK.md +55 -53
  32. package/.claude/skills/cc-plan/SKILL.md +209 -536
  33. package/.claude/skills/cc-plan/assets/TASKS_TEMPLATE.md +50 -14
  34. package/.claude/skills/cc-plan/assets/TASK_MANIFEST_TEMPLATE.json +5 -4
  35. package/.claude/skills/cc-plan/assets/{DESIGN_TEMPLATE.md → legacy/DESIGN_TEMPLATE.md} +1 -0
  36. package/.claude/skills/cc-plan/assets/{TINY_DESIGN_TEMPLATE.md → legacy/TINY_DESIGN_TEMPLATE.md} +1 -1
  37. package/.claude/skills/cc-plan/references/planning-contract.md +12 -10
  38. package/.claude/skills/cc-review/CHANGELOG.md +6 -0
  39. package/.claude/skills/cc-review/PLAYBOOK.md +9 -11
  40. package/.claude/skills/cc-review/SKILL.md +37 -61
  41. package/.claude/skills/cc-review/references/e2e-and-plugin-verification.md +1 -1
  42. package/.claude/skills/cc-review/references/implementation-review-branch.md +5 -5
  43. package/.claude/skills/cc-review/references/plan-review-branch.md +1 -1
  44. package/.claude/skills/cc-review/references/review-methods.md +4 -4
  45. package/.claude/skills/cc-review/scripts/collect-review-context.sh +14 -7
  46. package/CHANGELOG.md +30 -0
  47. package/CONTRIBUTING.md +40 -4
  48. package/CONTRIBUTING.zh-CN.md +40 -4
  49. package/README.md +22 -8
  50. package/README.zh-CN.md +22 -8
  51. package/bin/cc-devflow-cli.js +293 -36
  52. package/docs/examples/START-HERE.md +6 -4
  53. package/docs/examples/example-bindings.json +8 -8
  54. package/docs/examples/full-design-blocked/README.md +2 -2
  55. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/design.md +2 -1
  56. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/task-manifest.json +3 -2
  57. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/tasks.md +11 -8
  58. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/review/report-card.json +4 -4
  59. package/docs/examples/local-handoff/README.md +2 -2
  60. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/design.md +2 -1
  61. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/task-manifest.json +3 -2
  62. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/tasks.md +9 -6
  63. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/review/report-card.json +1 -1
  64. package/docs/examples/pdca-loop/README.md +2 -2
  65. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/handoff/pr-brief.md +2 -2
  66. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/design.md +2 -1
  67. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/task-manifest.json +2 -1
  68. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/tasks.md +9 -6
  69. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/review/report-card.json +1 -1
  70. package/docs/examples/scripts/check-example-bindings.sh +2 -0
  71. package/docs/get-shit-done-strategy-audit.md +22 -22
  72. package/docs/guides/artifact-contract.md +5 -1
  73. package/docs/guides/getting-started.md +11 -8
  74. package/docs/guides/getting-started.zh-CN.md +11 -8
  75. package/docs/guides/minimize-artifacts.md +137 -0
  76. package/lib/compiler/__tests__/skills-registry.test.js +2 -2
  77. package/lib/skill-runtime/CLAUDE.md +1 -1
  78. package/lib/skill-runtime/__tests__/autopilot.test.js +42 -6
  79. package/lib/skill-runtime/__tests__/benchmark-artifacts.test.js +165 -0
  80. package/lib/skill-runtime/__tests__/benchmark-skills.test.js +109 -0
  81. package/lib/skill-runtime/__tests__/cli-bootstrap.integration.test.js +2 -2
  82. package/lib/skill-runtime/__tests__/dispatch.test.js +8 -38
  83. package/lib/skill-runtime/__tests__/intent.test.js +4 -20
  84. package/lib/skill-runtime/__tests__/lifecycle.test.js +1 -1
  85. package/lib/skill-runtime/__tests__/paths.test.js +7 -1
  86. package/lib/skill-runtime/__tests__/planner.tdd.test.js +61 -0
  87. package/lib/skill-runtime/__tests__/prepare-pr.test.js +3 -16
  88. package/lib/skill-runtime/__tests__/query.test.js +388 -7
  89. package/lib/skill-runtime/__tests__/review-check-integration.test.js +148 -0
  90. package/lib/skill-runtime/__tests__/review-records.test.js +619 -0
  91. package/lib/skill-runtime/__tests__/runtime.integration.test.js +64 -23
  92. package/lib/skill-runtime/__tests__/schemas.test.js +43 -0
  93. package/lib/skill-runtime/__tests__/task-contract-migrate.test.js +137 -0
  94. package/lib/skill-runtime/__tests__/task-contract.test.js +874 -0
  95. package/lib/skill-runtime/__tests__/verify-artifacts.test.js +203 -0
  96. package/lib/skill-runtime/__tests__/worker-run.test.js +4 -11
  97. package/lib/skill-runtime/__tests__/workflow-context-legacy-fallback.test.js +31 -0
  98. package/lib/skill-runtime/__tests__/workflow-context.test.js +98 -0
  99. package/lib/skill-runtime/artifacts.js +0 -5
  100. package/lib/skill-runtime/context-index.js +545 -0
  101. package/lib/skill-runtime/intent.js +9 -33
  102. package/lib/skill-runtime/lifecycle.js +1 -1
  103. package/lib/skill-runtime/operations/CLAUDE.md +2 -2
  104. package/lib/skill-runtime/operations/dispatch.js +4 -42
  105. package/lib/skill-runtime/operations/init.js +2 -6
  106. package/lib/skill-runtime/operations/janitor.js +2 -18
  107. package/lib/skill-runtime/operations/resume.js +21 -38
  108. package/lib/skill-runtime/operations/review-records.js +265 -0
  109. package/lib/skill-runtime/operations/snapshot.js +1 -1
  110. package/lib/skill-runtime/operations/task-contract.js +593 -0
  111. package/lib/skill-runtime/operations/worker-run.js +2 -30
  112. package/lib/skill-runtime/paths.js +4 -4
  113. package/lib/skill-runtime/planner.js +24 -11
  114. package/lib/skill-runtime/query-registry.js +2 -2
  115. package/lib/skill-runtime/query.js +15 -2
  116. package/lib/skill-runtime/review-records.js +123 -0
  117. package/lib/skill-runtime/review.js +246 -11
  118. package/lib/skill-runtime/schemas.js +174 -12
  119. package/lib/skill-runtime/store.js +0 -10
  120. package/lib/skill-runtime/task-contract.js +188 -0
  121. package/lib/skill-runtime/workflow-context.js +748 -0
  122. package/package.json +6 -2
@@ -0,0 +1,137 @@
1
+ # Minimized Workflow Artifacts
2
+
3
+ This guide describes the default artifact contract for new cc-devflow changes.
4
+ The goal is simple: keep durable workflow truth readable, small, and measurable.
5
+
6
+ ## Default Shape
7
+
8
+ Each new change keeps durable truth under `devflow/changes/<change-key>/`.
9
+
10
+ Default human-authored Markdown:
11
+
12
+ - `planning/tasks.md`
13
+
14
+ Default CLI-owned machine records:
15
+
16
+ - `change-meta.json`
17
+ - `planning/task-manifest.json`
18
+ - `review/review-ledger.jsonl`
19
+ - `review/review-findings.json` when findings exist
20
+ - `execution/tasks/<task-id>/checkpoint.json`
21
+ - `review/report-card.json`
22
+ - one final handoff file under `handoff/`
23
+
24
+ Runtime scratch, worker prompts, journals, assignments, and session logs belong
25
+ under `devflow/workspaces/<change-key>/`, not beside durable change truth.
26
+
27
+ ## Feature Plans
28
+
29
+ Feature and scope changes use:
30
+
31
+ - `planning/tasks.md#Contract Summary`
32
+ - `planning/task-manifest.json`
33
+ - `change-meta.json`
34
+
35
+ `Contract Summary` owns the frozen human-readable plan: user story, non-negotiable
36
+ constraints, decisions that must not be reopened, task slices, and verification
37
+ expectations. `task-manifest.json` and `change-meta.json` must be generated or
38
+ updated by `cc-devflow task-contract compile`; agents must not handwrite them.
39
+
40
+ ## Bug Investigations
41
+
42
+ Bug, regression, and unexpected-behavior work uses:
43
+
44
+ - `planning/tasks.md#Root Cause Contract`
45
+ - `planning/task-manifest.json`
46
+ - `change-meta.json`
47
+
48
+ `Root Cause Contract` owns the symptom, reproduction evidence, confirmed cause,
49
+ rejected near-causes, repair boundary, and task handoff. `cc-do` should implement
50
+ from that frozen contract instead of reopening investigation during execution.
51
+ `task-manifest.json` and `change-meta.json` must be generated or updated by
52
+ `cc-devflow task-contract compile`; agents must not handwrite them.
53
+
54
+ ## Review Records
55
+
56
+ `cc-review` writes structured lifecycle events first:
57
+
58
+ - `review/review-ledger.jsonl`
59
+ - optional `review/review-findings.json`
60
+ - optional rendered Markdown from `cc-devflow review render`
61
+
62
+ Markdown review reports are for human reading when needed. They are not the
63
+ default durable review source.
64
+
65
+ Useful commands:
66
+
67
+ ```bash
68
+ npx cc-devflow review start --change REQ-001 --change-key REQ-001-copy-invite-link --base-sha abc123 --head-sha def456
69
+ npx cc-devflow review record-node --change REQ-001 --change-key REQ-001-copy-invite-link --review-id <review-id> --node-id R001 --target planning/tasks.md --status checked --coverage contract --evidence-ref "cmd:npm run verify"
70
+ npx cc-devflow review add-finding --change REQ-001 --change-key REQ-001-copy-invite-link --review-id <review-id> --finding-id F001 --severity important --confidence 8 --display-tier blocking --fingerprint sha256:<hash> --scope "current change" --path planning/tasks.md --evidence "finding evidence" --recommendation "repair action" --route cc-do
71
+ npx cc-devflow review close --change REQ-001 --change-key REQ-001-copy-invite-link --review-id <review-id> --status clean --blocking-count 0 --warning-count 0 --next cc-check
72
+ npx cc-devflow review render --change REQ-001 --change-key REQ-001-copy-invite-link --review-id <review-id> --output review/review-report.md
73
+ ```
74
+
75
+ ## Legacy Fallback
76
+
77
+ Older changes may still contain:
78
+
79
+ - `planning/design.md`
80
+ - `planning/analysis.md`
81
+ - `review/cc-review-plan.md`
82
+ - `review/cc-review-report.md`
83
+ - `review/cc-review-agent-results.jsonl`
84
+
85
+ Those files remain readable compatibility inputs. New changes should not write
86
+ them by default. When migrating old work, fold feature-plan truth into
87
+ `planning/tasks.md#Contract Summary` and bug-investigation truth into
88
+ `planning/tasks.md#Root Cause Contract`.
89
+
90
+ ## Validation Gates
91
+
92
+ Validate one change:
93
+
94
+ ```bash
95
+ npx cc-devflow task-contract compile --change REQ-001 --change-key REQ-001-copy-invite-link
96
+ npx cc-devflow task-contract validate --change REQ-001 --change-key REQ-001-copy-invite-link
97
+ ```
98
+
99
+ Validate the repository artifact contract:
100
+
101
+ ```bash
102
+ npm run verify:artifacts
103
+ ```
104
+
105
+ Measure the contract:
106
+
107
+ ```bash
108
+ npm run benchmark:artifacts
109
+ ```
110
+
111
+ The package-level verification command also includes artifact validation:
112
+
113
+ ```bash
114
+ npm run verify
115
+ ```
116
+
117
+ Skill entrypoints have a separate context budget:
118
+
119
+ ```bash
120
+ npm run benchmark:skills
121
+ ```
122
+
123
+ Keep `SKILL.md` files as thin entry contracts. Move low-frequency planning,
124
+ review, and recovery details behind `PLAYBOOK.md` or `references/*` so agents
125
+ open them only when the matching escalation condition appears.
126
+
127
+ ## Authoring Rule
128
+
129
+ Before adding a durable file under `devflow/changes/<change-key>/`, answer:
130
+
131
+ 1. Which downstream skill reads it by default?
132
+ 2. Which state does it own that no existing artifact owns?
133
+ 3. Which command fails if it drifts?
134
+
135
+ If those answers are unclear, keep the information in `planning/tasks.md` or
136
+ ephemeral workspace scratch. Machine JSON belongs to the CLI/compiler path, not
137
+ manual agent authoring.
@@ -159,9 +159,9 @@ describe('Skills Registry Generator', () => {
159
159
  expect(execute.writes).toEqual(
160
160
  expect.arrayContaining([
161
161
  expect.objectContaining({
162
- path: 'devflow/changes/<change-key>/execution/tasks/<task-id>/checkpoint.json',
162
+ path: 'devflow/changes/<change-key>/execution/tasks/<task-id>/events.jsonl',
163
163
  durability: 'durable',
164
- required: true
164
+ required: false
165
165
  })
166
166
  ])
167
167
  );
@@ -4,7 +4,7 @@
4
4
  职责分组
5
5
  入口层: `cli.js` 负责命令分发,`index.js` 提供给测试和内部脚本的稳定聚合入口。
6
6
  基础层: `schemas.js`、`store.js`、`paths.js` 管住契约、持久化与路径规则,避免执行层重复造轮子。
7
- 状态层: `artifacts.js`、`lifecycle.js`、`query.js`、`review.js`、`team-state.js` 维护运行时真相源与只读查询。
7
+ 状态层: `artifacts.js`、`lifecycle.js`、`query.js`、`workflow-context.js`、`review.js`、`team-state.js` 维护运行时真相源与只读查询。
8
8
  规划与交接: `planner.js`、`intent.js`、`delegation.js` 把任务解析、handoff 生成和 team/workspace 委派收口成统一语义。
9
9
  阶段操作: `operations/` 是唯一 stage 入口目录;具体阶段边界见 `operations/CLAUDE.md`。
10
10
  测试布局: `__tests__/` 紧贴模块放置单元、回归与集成测试;顶层 `test/` 不再承载 `skill-runtime` 私有测试。
@@ -9,13 +9,13 @@ const {
9
9
  getTaskManifestPath,
10
10
  getReportCardPath,
11
11
  getReleaseNotePath,
12
- getRuntimeStatePath,
13
- getCheckpointPath
12
+ getRuntimeStatePath
14
13
  } = require('../store');
15
14
  const {
16
15
  getIntentResumeIndexPath,
17
16
  getIntentPrBriefPath
18
17
  } = require('../artifacts');
18
+ const { getChangePaths } = require('../paths');
19
19
 
20
20
  jest.setTimeout(20000);
21
21
 
@@ -41,6 +41,41 @@ function markManifestReviewsPassed(repoRoot, changeId) {
41
41
  fs.writeFileSync(manifestPath, `${JSON.stringify(manifest, null, 2)}\n`);
42
42
  }
43
43
 
44
+ function writeCleanReviewLedger(repoRoot, changeId) {
45
+ const change = getChangePaths(repoRoot, changeId);
46
+ const ledgerPath = path.join(change.reviewDir, 'review-ledger.jsonl');
47
+ fs.mkdirSync(path.dirname(ledgerPath), { recursive: true });
48
+ fs.writeFileSync(ledgerPath, [
49
+ JSON.stringify({
50
+ schema: 'review-ledger.v2',
51
+ change: change.changeKey,
52
+ reviewId: 'RVW-20260512-001',
53
+ createdAt: '2026-05-12T00:00:00.000Z',
54
+ createdBy: 'cc-devflow-cli',
55
+ event: 'review-started',
56
+ mode: 'implementation',
57
+ scope: 'current-diff',
58
+ baseSha: 'abc123',
59
+ headSha: 'def456',
60
+ selectedNodes: [],
61
+ skippedNodes: [],
62
+ riskLanes: []
63
+ }),
64
+ JSON.stringify({
65
+ schema: 'review-ledger.v2',
66
+ change: change.changeKey,
67
+ reviewId: 'RVW-20260512-001',
68
+ createdAt: '2026-05-12T00:01:00.000Z',
69
+ createdBy: 'cc-devflow-cli',
70
+ event: 'review-closed',
71
+ status: 'clean',
72
+ blockingCount: 0,
73
+ warningCount: 0,
74
+ next: 'cc-check'
75
+ })
76
+ ].join('\n'));
77
+ }
78
+
44
79
  describe('runAutopilot', () => {
45
80
  test('stops at the approval gate after planning without writing approval-phase handoff markdown', async () => {
46
81
  const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-autopilot-'));
@@ -81,7 +116,7 @@ describe('runAutopilot', () => {
81
116
  expect(fs.existsSync(getIntentResumeIndexPath(repoRoot, 'REQ-123'))).toBe(false);
82
117
  });
83
118
 
84
- test('resumes after approval, executes delegated work, and prepares a PR from checkpoints', async () => {
119
+ test('resumes after approval, executes delegated work, and prepares a PR from task state', async () => {
85
120
  const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-autopilot-workers-'));
86
121
 
87
122
  writeJson(path.join(repoRoot, 'package.json'), {
@@ -123,17 +158,16 @@ describe('runAutopilot', () => {
123
158
  });
124
159
 
125
160
  const manifest = JSON.parse(fs.readFileSync(getTaskManifestPath(repoRoot, 'REQ-123'), 'utf8'));
126
- const delegatedCheckpoint = JSON.parse(fs.readFileSync(getCheckpointPath(repoRoot, 'REQ-123', 'T002'), 'utf8'));
127
161
  const report = JSON.parse(fs.readFileSync(getReportCardPath(repoRoot, 'REQ-123'), 'utf8'));
128
162
 
129
163
  expect(firstRun.executed).toEqual(expect.arrayContaining(['delegate', 'worker-run', 'dispatch', 'verify']));
130
164
  expect(firstRun.currentStage).toBe('verify');
131
165
  expect(manifest.tasks.find((task) => task.id === 'T002').status).toBe('passed');
132
- expect(delegatedCheckpoint.outputExcerpt).toContain('delegate-ok');
133
166
  expect(report.review.status).toBe('blocked');
134
167
  expect(fs.existsSync(getIntentPrBriefPath(repoRoot, 'REQ-123'))).toBe(false);
135
168
 
136
169
  markManifestReviewsPassed(repoRoot, 'REQ-123');
170
+ writeCleanReviewLedger(repoRoot, 'REQ-123');
137
171
 
138
172
  const secondRun = await runAutopilot({
139
173
  repoRoot,
@@ -146,7 +180,8 @@ describe('runAutopilot', () => {
146
180
 
147
181
  expect(secondRun.executed).toEqual(expect.arrayContaining(['verify', 'prepare-pr']));
148
182
  expect(secondRun.currentStage).toBe('prepare-pr');
149
- expect(prBrief).toContain('execution/tasks/T002/checkpoint.json');
183
+ expect(prBrief).toContain('planning/task-manifest.json');
184
+ expect(prBrief).not.toContain('checkpoint.json');
150
185
  });
151
186
 
152
187
  test('runs release after prepare-pr when requested for an approved plan', async () => {
@@ -195,6 +230,7 @@ describe('runAutopilot', () => {
195
230
  expect(fs.existsSync(getReleaseNotePath(repoRoot, 'REQ-123'))).toBe(false);
196
231
 
197
232
  markManifestReviewsPassed(repoRoot, 'REQ-123');
233
+ writeCleanReviewLedger(repoRoot, 'REQ-123');
198
234
 
199
235
  const result = await runAutopilot({
200
236
  repoRoot,
@@ -0,0 +1,165 @@
1
+ /**
2
+ * [INPUT]: 依赖 scripts/benchmark-artifacts.js 导出的 runBenchmarkArtifacts 和临时 artifact fixture。
3
+ * [OUTPUT]: 验证 benchmark:artifacts 使用 ceil(len/4) 估算并报告 profile 阈值 savings。
4
+ * [POS]: REQ-003-minimize-workflow-artifacts T017 的 Red/Green 证据。
5
+ * [PROTOCOL]: 变更时更新此头部,然后检查 CLAUDE.md
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const os = require('os');
10
+ const path = require('path');
11
+ const { spawnSync } = require('child_process');
12
+
13
+ const { runBenchmarkArtifacts } = require('../../../scripts/benchmark-artifacts');
14
+
15
+ const REPO_ROOT = path.resolve(__dirname, '../../..');
16
+ const BENCHMARK_SCRIPT = path.join(REPO_ROOT, 'scripts', 'benchmark-artifacts.js');
17
+
18
+ function writeText(filePath, text) {
19
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
20
+ fs.writeFileSync(filePath, text);
21
+ }
22
+
23
+ function writeJson(filePath, value) {
24
+ writeText(filePath, `${JSON.stringify(value, null, 2)}\n`);
25
+ }
26
+
27
+ function contractTasks({ changeKey, profile = 'standard', filler = '' }) {
28
+ return [
29
+ '# Tasks',
30
+ '',
31
+ '## Contract Summary',
32
+ '',
33
+ `Change: ${changeKey}`,
34
+ 'Mode: plan',
35
+ `Profile: ${profile}`,
36
+ 'Approval: approved',
37
+ '',
38
+ 'Goal:',
39
+ '- Minimize workflow artifacts.',
40
+ '',
41
+ 'Do Not Do:',
42
+ '- Do not change token estimator math.',
43
+ '',
44
+ 'Approved Direction:',
45
+ '- Use tasks.md plus generated JSON records.',
46
+ '',
47
+ 'Acceptance:',
48
+ '- Benchmark savings stay above threshold.',
49
+ '',
50
+ 'Verification:',
51
+ '',
52
+ '```bash',
53
+ 'npm run benchmark:artifacts',
54
+ '```',
55
+ '',
56
+ 'Risk / Escalate If:',
57
+ '- Savings fall below profile threshold.',
58
+ '',
59
+ filler,
60
+ '## Phase 1',
61
+ '',
62
+ '- [ ] T001 benchmark minimized artifact surface',
63
+ ' Vertical slice: Slice 1',
64
+ ''
65
+ ].join('\n');
66
+ }
67
+
68
+ function seedLegacyBaseline(repoRoot, changeKey, size = 6000) {
69
+ const changeDir = path.join(repoRoot, 'devflow', 'changes', changeKey);
70
+ writeText(path.join(changeDir, 'planning', 'design.md'), `# Design\n\n${'d'.repeat(size)}\n`);
71
+ writeText(path.join(changeDir, 'planning', 'analysis.md'), `# Analysis\n\n${'a'.repeat(size / 2)}\n`);
72
+ writeText(path.join(changeDir, 'planning', 'tasks.md'), `# Tasks\n\n${'t'.repeat(size / 2)}\n`);
73
+ writeJson(path.join(changeDir, 'planning', 'task-manifest.json'), { changeId: changeKey, tasks: [] });
74
+ writeJson(path.join(changeDir, 'change-meta.json'), { changeId: changeKey, goal: ['legacy'] });
75
+ writeJson(path.join(changeDir, 'review', 'report-card.json'), { overall: 'pass' });
76
+ }
77
+
78
+ function seedMinimizedChange(repoRoot, changeKey, options = {}) {
79
+ const changeDir = path.join(repoRoot, 'devflow', 'changes', changeKey);
80
+ writeText(path.join(changeDir, 'planning', 'tasks.md'), contractTasks({ changeKey, ...options }));
81
+ writeJson(path.join(changeDir, 'planning', 'task-manifest.json'), {
82
+ changeId: changeKey,
83
+ metadata: { source: 'tasks.md', generatedBy: 'cc-devflow task-contract', planVersion: 1 },
84
+ tasks: []
85
+ });
86
+ writeJson(path.join(changeDir, 'change-meta.json'), {
87
+ changeId: changeKey,
88
+ _meta: { generatedBy: 'cc-devflow task-contract' }
89
+ });
90
+ writeJson(path.join(changeDir, 'review', 'review-ledger.jsonl'), { note: 'counted as text by benchmark' });
91
+ writeJson(path.join(changeDir, 'review', 'review-findings.json'), { findings: [] });
92
+ writeJson(path.join(changeDir, 'review', 'report-card.json'), { overall: 'pass' });
93
+ }
94
+
95
+ describe('benchmark:artifacts', () => {
96
+ let repoRoot;
97
+
98
+ beforeEach(() => {
99
+ repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-benchmark-artifacts-'));
100
+ seedLegacyBaseline(repoRoot, 'REQ-001-legacy-baseline');
101
+ seedLegacyBaseline(repoRoot, 'REQ-002-legacy-baseline');
102
+ });
103
+
104
+ afterEach(() => {
105
+ fs.rmSync(repoRoot, { recursive: true, force: true });
106
+ });
107
+
108
+ test('reports standard savings >= 30% for REQ-003-example', () => {
109
+ seedMinimizedChange(repoRoot, 'REQ-003-example', { profile: 'standard' });
110
+
111
+ const result = runBenchmarkArtifacts(repoRoot);
112
+ const row = result.rows.find((item) => item.changeKey === 'REQ-003-example');
113
+
114
+ expect(result.code).toBe(0);
115
+ expect(row).toMatchObject({
116
+ profile: 'standard',
117
+ threshold_pct: 30,
118
+ correctness_pass: true
119
+ });
120
+ expect(row.savings_vs_baseline_pct).toBeGreaterThanOrEqual(30);
121
+ });
122
+
123
+ test('reports tiny savings >= 60% for tiny fixture', () => {
124
+ seedMinimizedChange(repoRoot, 'REQ-004-tiny-example', { profile: 'tiny' });
125
+
126
+ const result = runBenchmarkArtifacts(repoRoot);
127
+ const row = result.rows.find((item) => item.changeKey === 'REQ-004-tiny-example');
128
+
129
+ expect(result.code).toBe(0);
130
+ expect(row).toMatchObject({
131
+ profile: 'tiny',
132
+ threshold_pct: 60,
133
+ correctness_pass: true
134
+ });
135
+ expect(row.savings_vs_baseline_pct).toBeGreaterThanOrEqual(60);
136
+ });
137
+
138
+ test('exits 1 when savings are below the profile threshold', () => {
139
+ seedMinimizedChange(repoRoot, 'REQ-005-bloated-example', {
140
+ profile: 'standard',
141
+ filler: 'x'.repeat(20000)
142
+ });
143
+
144
+ const result = runBenchmarkArtifacts(repoRoot);
145
+
146
+ expect(result.code).toBe(1);
147
+ expect(result.rows[0]).toMatchObject({ correctness_pass: false });
148
+ });
149
+
150
+ test('CLI prints stdout JSON array', () => {
151
+ seedMinimizedChange(repoRoot, 'REQ-003-example', { profile: 'standard' });
152
+
153
+ const result = spawnSync(process.execPath, [BENCHMARK_SCRIPT, repoRoot], { encoding: 'utf8' });
154
+ const rows = JSON.parse(result.stdout);
155
+
156
+ expect(result.status).toBe(0);
157
+ expect(Array.isArray(rows)).toBe(true);
158
+ expect(rows[0]).toHaveProperty('savings_vs_baseline_pct');
159
+ });
160
+
161
+ test('package.json exposes npm run benchmark:artifacts', () => {
162
+ const pkg = JSON.parse(fs.readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
163
+ expect(pkg.scripts['benchmark:artifacts']).toBe('node scripts/benchmark-artifacts.js');
164
+ });
165
+ });
@@ -0,0 +1,109 @@
1
+ /**
2
+ * [INPUT]: 依赖 scripts/benchmark-skills.js 导出的 runBenchmarkSkills 和临时 skill fixture。
3
+ * [OUTPUT]: 验证 benchmark:skills 对 SKILL.md 入口体积执行 byte/line 预算。
4
+ * [POS]: skill 入口瘦身基准的 Red/Green 证据。
5
+ * [PROTOCOL]: 变更时更新此头部,然后检查 CLAUDE.md
6
+ */
7
+
8
+ const fs = require('fs');
9
+ const os = require('os');
10
+ const path = require('path');
11
+ const { spawnSync } = require('child_process');
12
+
13
+ const { runBenchmarkSkills } = require('../../../scripts/benchmark-skills');
14
+
15
+ const REPO_ROOT = path.resolve(__dirname, '../../..');
16
+ const BENCHMARK_SCRIPT = path.join(REPO_ROOT, 'scripts', 'benchmark-skills.js');
17
+
18
+ function writeSkill(repoRoot, skillName, body) {
19
+ const filePath = path.join(repoRoot, '.claude', 'skills', skillName, 'SKILL.md');
20
+ fs.mkdirSync(path.dirname(filePath), { recursive: true });
21
+ fs.writeFileSync(filePath, body);
22
+ }
23
+
24
+ function skillBody({ version = '1.0.0', filler = '' } = {}) {
25
+ return [
26
+ '---',
27
+ 'name: cc-plan',
28
+ `version: ${version}`,
29
+ 'description: fixture',
30
+ '---',
31
+ '',
32
+ '# Fixture',
33
+ '',
34
+ 'Thin entrypoint.',
35
+ filler
36
+ ].join('\n');
37
+ }
38
+
39
+ function investigateSkillBody({ filler = '' } = {}) {
40
+ return skillBody().replace('name: cc-plan', 'name: cc-investigate') + filler;
41
+ }
42
+
43
+ describe('benchmark:skills', () => {
44
+ let repoRoot;
45
+
46
+ beforeEach(() => {
47
+ repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-benchmark-skills-'));
48
+ });
49
+
50
+ afterEach(() => {
51
+ fs.rmSync(repoRoot, { recursive: true, force: true });
52
+ });
53
+
54
+ test('passes when cc-plan stays under the thin entrypoint budget', () => {
55
+ writeSkill(repoRoot, 'cc-plan', skillBody());
56
+
57
+ const result = runBenchmarkSkills(repoRoot);
58
+
59
+ expect(result.code).toBe(0);
60
+ expect(result.rows[0]).toMatchObject({
61
+ skill: 'cc-plan',
62
+ max_bytes: 16000,
63
+ max_lines: 360,
64
+ correctness_pass: true
65
+ });
66
+ });
67
+
68
+ test('passes when cc-investigate stays under the thin entrypoint budget', () => {
69
+ writeSkill(repoRoot, 'cc-investigate', investigateSkillBody());
70
+
71
+ const result = runBenchmarkSkills(repoRoot);
72
+
73
+ expect(result.rows[0]).toMatchObject({
74
+ skill: 'cc-investigate',
75
+ max_bytes: 16000,
76
+ max_lines: 360,
77
+ correctness_pass: true
78
+ });
79
+ });
80
+
81
+ test('exits 1 when cc-plan grows past the byte budget', () => {
82
+ writeSkill(repoRoot, 'cc-plan', skillBody({ filler: 'x'.repeat(17000) }));
83
+
84
+ const result = runBenchmarkSkills(repoRoot);
85
+
86
+ expect(result.code).toBe(1);
87
+ expect(result.rows[0]).toMatchObject({
88
+ skill: 'cc-plan',
89
+ correctness_pass: false,
90
+ note: 'skill entrypoint exceeds context budget'
91
+ });
92
+ });
93
+
94
+ test('CLI prints stdout JSON array', () => {
95
+ writeSkill(repoRoot, 'cc-plan', skillBody());
96
+
97
+ const result = spawnSync(process.execPath, [BENCHMARK_SCRIPT, repoRoot], { encoding: 'utf8' });
98
+ const rows = JSON.parse(result.stdout);
99
+
100
+ expect(result.status).toBe(0);
101
+ expect(Array.isArray(rows)).toBe(true);
102
+ expect(rows[0]).toHaveProperty('estimated_tokens');
103
+ });
104
+
105
+ test('package.json exposes npm run benchmark:skills', () => {
106
+ const pkg = JSON.parse(fs.readFileSync(path.join(REPO_ROOT, 'package.json'), 'utf8'));
107
+ expect(pkg.scripts['benchmark:skills']).toBe('node scripts/benchmark-skills.js');
108
+ });
109
+ });
@@ -217,9 +217,9 @@ describe('cc-devflow cli distribution bootstrap', () => {
217
217
  expect(codexDoSkill.data.writes).toEqual(
218
218
  expect.arrayContaining([
219
219
  expect.objectContaining({
220
- path: 'devflow/changes/<change-key>/execution/tasks/<task-id>/checkpoint.json',
220
+ path: 'devflow/changes/<change-key>/execution/tasks/<task-id>/events.jsonl',
221
221
  durability: 'durable',
222
- required: true
222
+ required: false
223
223
  })
224
224
  ])
225
225
  );
@@ -7,7 +7,6 @@ const { runResume } = require('../operations/resume');
7
7
  const {
8
8
  getRuntimeStatePath,
9
9
  getTaskManifestPath,
10
- getCheckpointPath,
11
10
  getEventsPath
12
11
  } = require('../store');
13
12
 
@@ -76,7 +75,7 @@ describe('runDispatch', () => {
76
75
  expect(nextManifest.tasks[0].status).toBe('pending');
77
76
  });
78
77
 
79
- test('rejects stale results when planVersion changes during task execution and records it in checkpoint', async () => {
78
+ test('rejects stale results when planVersion changes during task execution and records it in manifest and events', async () => {
80
79
  const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-dispatch-'));
81
80
  const manifestPath = getTaskManifestPath(repoRoot, 'REQ-123');
82
81
 
@@ -133,28 +132,25 @@ describe('runDispatch', () => {
133
132
  });
134
133
 
135
134
  const nextManifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
136
- const checkpoint = JSON.parse(fs.readFileSync(getCheckpointPath(repoRoot, 'REQ-123', 'T001'), 'utf8'));
137
135
  const events = fs.readFileSync(getEventsPath(repoRoot, 'REQ-123', 'T001'), 'utf8');
138
136
 
139
137
  expect(result.success).toBe(false);
140
138
  expect(nextManifest.tasks[0].status).toBe('failed');
141
139
  expect(nextManifest.tasks[0].lastError).toContain('Stale result rejected');
142
- expect(checkpoint.planVersion).toBe(1);
143
- expect(checkpoint.error).toContain('Stale result rejected');
144
140
  expect(events).toContain('task_stale_rejected');
145
141
  });
146
142
 
147
- test('restores unresolved work from the latest stable checkpoint on resume without creating handoff markdown', async () => {
143
+ test('restores unresolved work from the latest stable manifest state on resume without creating handoff markdown', async () => {
148
144
  const repoRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'cc-devflow-resume-stable-'));
149
145
  const changeId = 'REQ-123';
150
146
  const manifestPath = getTaskManifestPath(repoRoot, changeId);
151
147
 
152
148
  writeJson(getRuntimeStatePath(repoRoot, changeId), {
153
149
  changeId,
154
- changeKey: 'REQ-123-recover-from-stable-checkpoint',
155
- slug: 'recover-from-stable-checkpoint',
150
+ changeKey: 'REQ-123-recover-from-stable-state',
151
+ slug: 'recover-from-stable-state',
156
152
  createdAt: '2026-04-09T01:00:00.000Z',
157
- goal: 'Recover from stable checkpoint',
153
+ goal: 'Recover from stable state',
158
154
  status: 'in_progress',
159
155
  initializedAt: '2026-04-09T01:00:00.000Z',
160
156
  plannedAt: '2026-04-09T01:01:00.000Z',
@@ -169,13 +165,13 @@ describe('runDispatch', () => {
169
165
 
170
166
  writeJson(manifestPath, {
171
167
  changeId,
172
- goal: 'Recover from stable checkpoint',
168
+ goal: 'Recover from stable state',
173
169
  createdAt: '2026-04-09T01:00:00.000Z',
174
170
  updatedAt: '2026-04-09T01:02:00.000Z',
175
171
  tasks: [
176
172
  {
177
173
  id: 'T001',
178
- title: 'Stable checkpoint task',
174
+ title: 'Stable completed task',
179
175
  type: 'TEST',
180
176
  dependsOn: [],
181
177
  touches: ['src/a.ts'],
@@ -219,32 +215,6 @@ describe('runDispatch', () => {
219
215
  }
220
216
  });
221
217
 
222
- writeJson(getCheckpointPath(repoRoot, changeId, 'T001'), {
223
- changeId,
224
- taskId: 'T001',
225
- sessionId: 'stable-session',
226
- planVersion: 1,
227
- status: 'passed',
228
- summary: 'Task passed after 1 attempt(s)',
229
- error: '',
230
- outputExcerpt: '',
231
- timestamp: '2026-04-09T01:05:00.000Z',
232
- attempt: 1
233
- });
234
-
235
- writeJson(getCheckpointPath(repoRoot, changeId, 'T002'), {
236
- changeId,
237
- taskId: 'T002',
238
- sessionId: 'failed-session',
239
- planVersion: 1,
240
- status: 'failed',
241
- summary: 'Task failed: Command failed',
242
- error: 'Command failed',
243
- outputExcerpt: 'Command failed',
244
- timestamp: '2026-04-09T01:06:00.000Z',
245
- attempt: 2
246
- });
247
-
248
218
  const result = await runResume({
249
219
  repoRoot,
250
220
  changeId,
@@ -255,7 +225,7 @@ describe('runDispatch', () => {
255
225
  const nextManifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
256
226
 
257
227
  expect(result.success).toBe(true);
258
- expect(result.restoredCheckpoint).toMatchObject({
228
+ expect(result.restoredState).toMatchObject({
259
229
  taskId: 'T001',
260
230
  status: 'passed'
261
231
  });