valent-pipeline 0.4.3 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/README.md +26 -19
  2. package/bin/cli.js +43 -0
  3. package/package.json +1 -1
  4. package/pipeline/docs/lean-spawn-human-tasks.md +2 -2
  5. package/pipeline/orchestrators/claude-code/README.md +18 -2
  6. package/pipeline/orchestrators/claude-code/plan.workflow.js +78 -16
  7. package/pipeline/orchestrators/claude-code/retro.workflow.js +85 -21
  8. package/pipeline/orchestrators/claude-code/sprint.workflow.js +127 -9
  9. package/pipeline/orchestrators/codex/README.md +3 -3
  10. package/pipeline/orchestrators/codex/lead-loop.md +3 -3
  11. package/pipeline/prompts/lead.md +1 -1
  12. package/pipeline/schemas/task-graph.schema.json +1 -1
  13. package/pipeline/steps/common/distilled-handoff-format.md +1 -1
  14. package/pipeline/steps/orchestration/adopt-lead-and-create-team.md +1 -1
  15. package/pipeline/steps/orchestration/sprint-plan.md +2 -2
  16. package/pipeline/steps/retrospective/calibration.md +1 -1
  17. package/pipeline/task-graphs/frontend-only.yaml +1 -1
  18. package/pipeline/task-graphs/fullstack-web.yaml +1 -1
  19. package/pipeline/task-graphs/mobile-app.yaml +1 -1
  20. package/pipeline/templates/bend-handoff.template.md +1 -1
  21. package/pipeline/templates/critic-review.template.md +1 -1
  22. package/pipeline/templates/data-handoff.template.md +1 -1
  23. package/pipeline/templates/docgen-handoff.template.md +1 -1
  24. package/pipeline/templates/execution-report.template.md +1 -1
  25. package/pipeline/templates/fend-handoff.template.md +1 -1
  26. package/pipeline/templates/iac-handoff.template.md +1 -1
  27. package/pipeline/templates/judge-decision.template.md +1 -1
  28. package/pipeline/templates/libdev-handoff.template.md +1 -1
  29. package/pipeline/templates/mcp-dev-handoff.template.md +1 -1
  30. package/pipeline/templates/mobile-handoff.template.md +1 -1
  31. package/pipeline/templates/qa-test-spec.template.md +1 -1
  32. package/pipeline/templates/readiness-review.template.md +1 -1
  33. package/pipeline/templates/reqs-brief.template.md +1 -1
  34. package/pipeline/templates/uxa-spec.template.md +1 -1
  35. package/skills/valent-configure/SKILL.md +26 -5
  36. package/skills/valent-help/SKILL.md +3 -0
  37. package/skills/valent-review-cost/SKILL.md +69 -0
  38. package/skills/valent-run-epic-workflow/SKILL.md +4 -4
  39. package/skills/valent-run-project-workflow/SKILL.md +4 -4
  40. package/skills/valent-run-story-workflow/SKILL.md +5 -3
  41. package/src/board/public/app.js +377 -0
  42. package/src/board/public/index.html +62 -0
  43. package/src/board/public/styles.css +542 -0
  44. package/src/board/server.js +209 -0
  45. package/src/commands/audit.js +190 -0
  46. package/src/commands/board.js +102 -0
  47. package/src/commands/init.js +56 -23
  48. package/src/commands/resolve-graph.js +3 -6
  49. package/src/commands/status.js +122 -0
  50. package/src/commands/upgrade.js +28 -5
  51. package/src/lib/audit.js +192 -0
  52. package/src/lib/board-source.js +138 -0
  53. package/src/lib/board.js +219 -0
  54. package/src/lib/config-schema.js +31 -3
  55. package/src/lib/graph.js +2 -6
  56. package/src/lib/handoff.js +2 -6
  57. package/src/lib/paths.js +26 -0
package/README.md CHANGED
@@ -7,12 +7,11 @@ You write the story. The pipeline handles requirements analysis, UX specificatio
7
7
  ## Quick Start
8
8
 
9
9
  ```bash
10
- # Install globally
11
- npm install -g valent-pipeline
12
-
13
- # Initialize in your project
10
+ # Initialize in your project — no global install needed.
11
+ # `init` scaffolds .valent-pipeline/ AND vendors the CLI into it, so every project
12
+ # pins its own version and the agents call it via `node .valent-pipeline/bin/cli.js`.
14
13
  cd your-project
15
- valent-pipeline init
14
+ npx valent-pipeline init
16
15
 
17
16
  # Run the interactive configuration wizard
18
17
  /valent-configure
@@ -21,6 +20,13 @@ valent-pipeline init
21
20
  /valent-run-story STORY-001
22
21
  ```
23
22
 
23
+ > **No global install required.** `npx valent-pipeline init` copies the CLI (`bin/` + `src/`)
24
+ > into `.valent-pipeline/` and installs its dependencies there. Agents invoke
25
+ > `node .valent-pipeline/bin/cli.js <cmd>` — so different projects can run different CLI
26
+ > versions, and you can customize the pipeline (including `src/`) per project. A global
27
+ > install (`npm install -g valent-pipeline`) still works if you prefer the bare
28
+ > `valent-pipeline` command for manual use.
29
+
24
30
  ## How It Works
25
31
 
26
32
  A persistent **Lead** agent reads your story, assembles a team of specialist agents, and orchestrates them through a dependency-driven pipeline:
@@ -108,39 +114,40 @@ Specialized agents that replace BEND for non-API project types:
108
114
  - Claude Code CLI
109
115
  - npm account (for publishing)
110
116
 
111
- ### Install
112
-
113
- ```bash
114
- npm install -g valent-pipeline
115
- ```
116
-
117
117
  ### Initialize a Project
118
118
 
119
119
  ```bash
120
120
  cd your-project
121
- valent-pipeline init
121
+ npx valent-pipeline init
122
122
  ```
123
123
 
124
124
  The init command:
125
125
  1. Runs an interactive wizard to set project type, tech stack, and model assignments
126
126
  2. Copies pipeline infrastructure to `.valent-pipeline/`
127
- 3. Generates `pipeline-config.yaml` from your answers
128
- 4. Creates knowledge directories and initializes the backlog
129
- 5. Installs Claude Code skills for story/epic/project execution
127
+ 3. **Vendors the CLI** (`bin/` + `src/`) into `.valent-pipeline/` and installs its runtime
128
+ dependencies there, so the project is self-contained and agents run
129
+ `node .valent-pipeline/bin/cli.js <cmd>` no global install or `npx` round-trip at run time
130
+ 4. Generates `pipeline-config.yaml` from your answers
131
+ 5. Creates knowledge directories and initializes the backlog
132
+ 6. Installs Claude Code skills for story/epic/project execution
133
+
134
+ A global install (`npm install -g valent-pipeline`) is optional — only needed if you want the
135
+ bare `valent-pipeline` command available for manual use outside a project.
130
136
 
131
137
  ### Upgrade
132
138
 
133
139
  ```bash
134
- valent-pipeline upgrade
135
- valent-pipeline upgrade --dry-run # preview changes without applying
140
+ npx valent-pipeline upgrade
141
+ npx valent-pipeline upgrade --dry-run # preview changes without applying
136
142
  ```
137
143
 
138
- Upgrades pipeline infrastructure (prompts, templates, task graphs, scripts) while preserving your project-specific files (config, knowledge, backlog).
144
+ Upgrades pipeline infrastructure (prompts, templates, task graphs, scripts) **and re-vendors the
145
+ CLI** (`bin/` + `src/`) while preserving your project-specific files (config, knowledge, backlog).
139
146
 
140
147
  ### Validate Configuration
141
148
 
142
149
  ```bash
143
- valent-pipeline config validate
150
+ node .valent-pipeline/bin/cli.js config validate
144
151
  ```
145
152
 
146
153
  ## Configuration
package/bin/cli.js CHANGED
@@ -41,6 +41,49 @@ program
41
41
  await upgrade(options);
42
42
  });
43
43
 
44
+ // status command — board read-model (composes pipeline-state.json + backlog + artifacts)
45
+ program
46
+ .command('status')
47
+ .description('Show pipeline status as a board read-model (human summary, or --json for the board-state contract)')
48
+ .option('--json', 'Emit the board-state JSON contract to stdout (for a board/notifier/CI)')
49
+ .option('--out <path>', 'Write the board-state JSON to a file (e.g. board-state.json for a board to watch)')
50
+ .option('--root <dir>', 'Project root to inspect (defaults to the current directory)')
51
+ .option('--no-audit', 'Skip merging the per-agent token/wall-clock audit trail into the board')
52
+ .action(async (options) => {
53
+ const { statusCmd } = await import('../src/commands/status.js');
54
+ await statusCmd(options);
55
+ });
56
+
57
+ // board command — serves the read-only board SPA + read API (a projection, never a write path)
58
+ program
59
+ .command('board')
60
+ .description('Serve a read-only board UI (Backlog + Kanban) over a local HTTP server')
61
+ .option('--port <n>', 'Port to listen on (default 7777)', '7777')
62
+ .option('--host <addr>', 'Host/interface to bind (default 127.0.0.1; 0.0.0.0 warns loudly)', '127.0.0.1')
63
+ .option('--root <dir>', 'Project root to project the board over (defaults to the current directory)')
64
+ .option('--open', 'Open the board in the default browser once it is listening')
65
+ .option('--no-audit', 'Skip merging the per-agent token/wall-clock cost trail into the board')
66
+ .action(async (options) => {
67
+ const { boardCmd } = await import('../src/commands/board.js');
68
+ await boardCmd(options);
69
+ });
70
+
71
+ // audit command — per-agent, per-story token + wall-clock trail (reads Workflow journals)
72
+ program
73
+ .command('audit')
74
+ .description('Per-agent, per-story audit trail (tokens + wall-clock), read from Workflow run journals')
75
+ .option('--story <id>', 'Filter to a single story')
76
+ .option('--run <runId>', 'Filter to a single workflow run')
77
+ .option('--json', 'Emit the audit JSON contract to stdout')
78
+ .option('--out <path>', 'Write the audit JSON to a file')
79
+ .option('--file <path>', 'Audit a single workflow journal file (wf_*.json)')
80
+ .option('--session-dir <dir>', 'Scan a specific session dir\'s workflows/ folder')
81
+ .option('--project-dir <dir>', 'Project dir whose ~/.claude/projects session journals to read (default: cwd)')
82
+ .action(async (options) => {
83
+ const { auditCmd } = await import('../src/commands/audit.js');
84
+ await auditCmd(options);
85
+ });
86
+
44
87
  // config validate command
45
88
  const configCmd = program
46
89
  .command('config')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "valent-pipeline",
3
- "version": "0.4.3",
3
+ "version": "0.5.1",
4
4
  "description": "v3 multi-agent AI pipeline for software development lifecycle",
5
5
  "type": "module",
6
6
  "bin": {
@@ -135,8 +135,8 @@ Then test other commands:
135
135
  ```bash
136
136
  valent-pipeline config validate # should exit 0
137
137
  valent-pipeline upgrade --dry-run # should show no changes (just installed)
138
- valent-pipeline db rebuild # indexes story artifacts (auto-creates DB if missing)
139
- valent-pipeline db rebuild # should complete (no stories to index yet)
138
+ node .valent-pipeline/bin/cli.js db rebuild # indexes story artifacts (auto-creates DB if missing)
139
+ node .valent-pipeline/bin/cli.js db rebuild # should complete (no stories to index yet)
140
140
  ```
141
141
 
142
142
  Clean up:
@@ -45,19 +45,35 @@ incl. a resume-safety lint), but:
45
45
  | 3b CRITIC | `parallel([blind, edge, acceptance])` independent agents → triage barrier | one CRITIC context, passes anchored on each other |
46
46
  | Spawn context | `buildPrompt()` mirrors `spawn.template.md` (Setup/Task/Trigger/Completion) | terse inline instructions |
47
47
  | Roll-over | a rejected story is recorded and the batch continues | — |
48
+ | Empty-graph guard | a resolved graph with zero dev agents throws a diagnostic before Build | silent empty Build → CRITIC looping on an empty diff |
49
+ | No-diff guard | if dev agents report no files, CRITIC/QA/JUDGE are skipped and the story rolls over `blocked` | 4-agent CRITIC re-reviewing an empty diff to the cap |
50
+ | Non-actionable verdict | a gate/CRITIC `needs-review` escalates immediately (no re-run) | re-reviewing a structural blocker until the cap |
48
51
  | Resume | journal (`resumeFromRunId`) | disk-state rehydration + re-decide |
49
52
 
50
53
  ## Args
51
54
 
52
55
  ```js
53
56
  // batch form (a planned sprint)
54
- { stories: [{ storyId, projectType, profiles }, ...], maxRejectionCycles? }
57
+ { stories: [{ storyId, projectType, profiles }, ...], maxRejectionCycles?, models? }
55
58
  // single-story form (back-compat)
56
- { storyId, projectType, profiles?, maxRejectionCycles? }
59
+ { storyId, projectType, profiles?, maxRejectionCycles?, models? }
57
60
  ```
58
61
 
59
62
  Returns `{ shipped, stories_shipped, stories_rolled_over, results: [{ storyId, shipped, verdict, skipped }] }`.
60
63
 
64
+ ### Per-agent model tiers (`models`)
65
+
66
+ Each workflow assigns a model tier per spawned agent — **gates** (READINESS/CRITIC/JUDGE) → `opus`,
67
+ **spec + build** → `sonnet`, **CLI-runner / IO** steps (resolve-graph, sprint-pack, validate-sprint,
68
+ calibrate, embed, persist) → `haiku`, and the retro's loop-until-dry review (`RETRO-REVIEW`) → `opus`.
69
+ This assignment is baked into each script as a default and is **overridable** via the `models` arg —
70
+ the `pipeline-config.yaml` `models` tier→roles map (`{ opus:[...], sonnet:[...], haiku:[...] }`), which
71
+ the invoking skills pass through. Edit it with `/valent-configure` → "Model Assignments". A Workflow
72
+ script can't read files, so the config arrives via `args`, never a direct read. The same `models`
73
+ config also drives the prose-Lead pipeline (`providers/claude-code/runtime.md`), so the two paths stay
74
+ in sync. Selection is static (default + args only) → journal-replay safe. Omit `models` to use the
75
+ baked-in default; an agent with no tier mapping inherits the session model.
76
+
61
77
  ## Resume & state model (step 8)
62
78
 
63
79
  **The journal is the state of record.** Each Workflow invocation returns a `runId`. To resume
@@ -14,14 +14,17 @@
14
14
  * one git branch and must be sequential — see sprint.workflow.js.)
15
15
  *
16
16
  * The deterministic packing/validation (greedy bin-packing, consistency cross-checks) is NOT
17
- * done in this script — it lives in `valent-pipeline sprint-pack` / `validate-sprint`
17
+ * done in this script — it lives in `node .valent-pipeline/bin/cli.js sprint-pack` / `validate-sprint`
18
18
  * (src/lib/sprint.js), invoked through an agent because a Workflow script has no CLI/fs
19
19
  * access. Both runtimes reuse those CLIs; this workflow just sequences the agents.
20
20
  *
21
21
  * The return value is shaped to feed straight into sprint.workflow.js:
22
22
  * { sprintId, points_planned, stories: [{ storyId, projectType, profiles }] }
23
23
  *
24
- * args: { stories: [{ storyId, projectType }], sprintId, velocity, backlogPath?, maxRejectionCycles? }
24
+ * args: { stories: [{ storyId, projectType }], sprintId, velocity, backlogPath?, maxRejectionCycles?, models? }
25
+ * `models` is the pipeline-config.yaml `models` tier->roles map, passed through by the invoking
26
+ * skill so per-agent model tiers stay config-driven (editable via `valent configure`). Omit it to
27
+ * use the baked-in default. See sprint.workflow.js for the full rationale.
25
28
  */
26
29
 
27
30
  export const meta = {
@@ -31,8 +34,8 @@ export const meta = {
31
34
  { title: 'Groom', detail: 'reqs -> uxa? -> qa-a -> readiness gate, pipelined across the batch' },
32
35
  { title: 'Size', detail: 'profile-matched estimators per story, summed (parallel)' },
33
36
  { title: 'Persist', detail: 'write story_points + groomed status to the backlog' },
34
- { title: 'Pack', detail: 'valent-pipeline sprint-pack (greedy bin-packing, in code)' },
35
- { title: 'Validate', detail: 'write plan/status artifacts + valent-pipeline validate-sprint' },
37
+ { title: 'Pack', detail: 'node .valent-pipeline/bin/cli.js sprint-pack (greedy bin-packing, in code)' },
38
+ { title: 'Validate', detail: 'write plan/status artifacts + node .valent-pipeline/bin/cli.js validate-sprint' },
36
39
  ],
37
40
  }
38
41
 
@@ -116,7 +119,16 @@ const PROFILE_ESTIMATORS = {
116
119
 
117
120
  // --- args ---
118
121
 
119
- const a = args || {}
122
+ // args may arrive as a parsed object or as a JSON string, depending on how the invoking
123
+ // skill/harness passes it. Normalize defensively so `a.stories` etc. resolve either way.
124
+ function parseArgs(x) {
125
+ if (typeof x === 'string') {
126
+ try { return JSON.parse(x) } catch { return {} }
127
+ }
128
+ return x || {}
129
+ }
130
+
131
+ const a = parseArgs(args)
120
132
  const stories = Array.isArray(a.stories) ? a.stories : []
121
133
  const sprintId = a.sprintId
122
134
  const velocity = a.velocity
@@ -126,10 +138,60 @@ if (!stories.length || !sprintId || typeof velocity !== 'number') {
126
138
  throw new Error('args must include { stories:[{storyId,projectType}], sprintId, velocity }')
127
139
  }
128
140
 
141
+ // --- per-agent model tiers ----------------------------------------------------
142
+ // Tiers come from pipeline-config.yaml `models` (a tier->roles map), passed in as
143
+ // args.models by the invoking skill — a Workflow script can't read files. We invert it
144
+ // to role->tier and overlay it on a baked-in default so the workflow self-hosts a sane
145
+ // assignment even when args.models is absent. Static + args only => journal-replay safe.
146
+ // readiness gate -> opus, spec/estimators -> sonnet, CLI-runners/IO -> haiku.
147
+ const DEFAULT_MODELS = {
148
+ READINESS: 'opus',
149
+ REQS: 'sonnet', UXA: 'sonnet', 'QA-A': 'sonnet',
150
+ BEND: 'sonnet', FEND: 'sonnet', DATA: 'sonnet', 'MCP-DEV': 'sonnet',
151
+ LIBDEV: 'sonnet', DOCGEN: 'sonnet', IAC: 'sonnet', MOBILE: 'sonnet',
152
+ PERSIST: 'haiku', PACK: 'haiku', VALIDATE: 'haiku',
153
+ }
154
+ function buildModelMap(cfg) {
155
+ const map = { ...DEFAULT_MODELS }
156
+ if (cfg && typeof cfg === 'object' && !Array.isArray(cfg)) {
157
+ for (const tier of ['opus', 'sonnet', 'haiku']) {
158
+ for (const role of cfg[tier] || []) {
159
+ if (typeof role === 'string') map[role.toUpperCase()] = tier
160
+ }
161
+ }
162
+ }
163
+ return map
164
+ }
165
+ const MODELS = buildModelMap(a.models)
166
+ // undefined => the agent inherits the main-loop (session) model.
167
+ const modelFor = (role) => MODELS[String(role).toUpperCase()]
168
+
169
+ // --- per-agent reasoning effort (thinking budget) ----------------------------
170
+ // Optional, config-driven, mirrors `models`. args.reasoning is a level->roles map inverted to
171
+ // role->trigger-phrase. EMPTY default => no trigger injected unless the config opts in, so
172
+ // behavior is unchanged out of the box. Static + args only => journal-replay safe.
173
+ const REASONING_PHRASES = { think: 'think', 'think-hard': 'think hard', 'think-harder': 'think harder', ultrathink: 'ultrathink' }
174
+ const DEFAULT_REASONING = {} // blank control surface — fill `reasoning` in pipeline-config.yaml to use it
175
+ function buildReasoningMap(cfg) {
176
+ const map = { ...DEFAULT_REASONING }
177
+ if (cfg && typeof cfg === 'object' && !Array.isArray(cfg)) {
178
+ for (const level of Object.keys(REASONING_PHRASES)) {
179
+ for (const role of cfg[level] || []) {
180
+ if (typeof role === 'string') map[role.toUpperCase()] = REASONING_PHRASES[level]
181
+ }
182
+ }
183
+ }
184
+ return map
185
+ }
186
+ const REASONING = buildReasoningMap(a.reasoning)
187
+ const reasoningFor = (role) => REASONING[String(role).toUpperCase()]
188
+
129
189
  function buildPrompt({ role, promptFile, storyId, taskSubject, trigger, returnContract }) {
130
190
  const outputDir = `stories/${storyId}/output`
191
+ const think = reasoningFor(role) // undefined unless config opts this role into a thinking tier
131
192
  return [
132
193
  `You are **${role}**, for story ${storyId} in the valent-pipeline (sprint ${sprintId} planning).`,
194
+ ...(think ? ['', `Before you act, ${think} about the hardest parts of this task.`] : []),
133
195
  '',
134
196
  '## Setup',
135
197
  `1. Read your core prompt: \`.valent-pipeline/prompts/${promptFile}\` — identity, protocols, step sequence.`,
@@ -161,7 +223,7 @@ const groomed = await pipeline(
161
223
  taskSubject: 'Tag testing_profiles for this story, then produce reqs-brief.md.',
162
224
  returnContract: 'Return ONLY { schema:1, agent:"reqs", story, testing_profiles:[...], files:[...] } as JSON.',
163
225
  }),
164
- { label: `reqs:${story.storyId}`, phase: 'Groom', schema: REQS_GROOM_SCHEMA },
226
+ { label: `reqs:${story.storyId}`, phase: 'Groom', schema: REQS_GROOM_SCHEMA, model: modelFor('REQS') },
165
227
  )
166
228
  return { ...story, profiles: r.testing_profiles || [] }
167
229
  },
@@ -170,7 +232,7 @@ const groomed = await pipeline(
170
232
  if (g.profiles.includes('ui')) {
171
233
  await agent(
172
234
  buildPrompt({ role: 'UXA', promptFile: 'uxa.md', storyId: g.storyId, taskSubject: 'Translate the brief into uxa-spec.md.' }),
173
- { label: `uxa:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
235
+ { label: `uxa:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA, model: modelFor('UXA') },
174
236
  )
175
237
  }
176
238
  return g
@@ -179,7 +241,7 @@ const groomed = await pipeline(
179
241
  async (g) => {
180
242
  await agent(
181
243
  buildPrompt({ role: 'QA-A', promptFile: 'qa-a.md', storyId: g.storyId, taskSubject: 'Produce qa-test-spec.md before any code is written.' }),
182
- { label: `qa-a:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
244
+ { label: `qa-a:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA, model: modelFor('QA-A') },
183
245
  )
184
246
  return g
185
247
  },
@@ -192,7 +254,7 @@ const groomed = await pipeline(
192
254
  role: 'READINESS', promptFile: 'readiness.md', storyId: g.storyId,
193
255
  taskSubject: 'Validate the spec chain (reqs/uxa/qa) is implementation-ready; run cross-story checks (sprint mode).',
194
256
  }),
195
- { label: `gate:readiness:${g.storyId}`, phase: 'Groom', schema: VERDICT_SCHEMA },
257
+ { label: `gate:readiness:${g.storyId}`, phase: 'Groom', schema: VERDICT_SCHEMA, model: modelFor('READINESS') },
196
258
  )
197
259
  if (v.verdict === 'pass') return { ...g, groomedStatus: 'groomed' }
198
260
  rejections += 1
@@ -204,7 +266,7 @@ const groomed = await pipeline(
204
266
  log(`${g.storyId}: readiness rejection ${rejections}/${maxRejectionCycles} -> ${target}`)
205
267
  await agent(
206
268
  buildPrompt({ role: target, promptFile: `${target.toLowerCase()}.md`, storyId: g.storyId, taskSubject: 'Address the READINESS rejection and rewrite the affected spec.' }),
207
- { label: `rework:${target.toLowerCase()}:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
269
+ { label: `rework:${target.toLowerCase()}:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA, model: modelFor(target) },
208
270
  )
209
271
  }
210
272
  },
@@ -227,7 +289,7 @@ const sized = await parallel(
227
289
  taskSubject: 'Estimate this story (read your estimate.md step; apply calibration directives if present).',
228
290
  returnContract: 'Return ONLY { schema:1, agent, story, points:<int> } as JSON.',
229
291
  }),
230
- { label: `estimate:${est.toLowerCase()}:${g.storyId}`, phase: 'Size', schema: ESTIMATE_SCHEMA },
292
+ { label: `estimate:${est.toLowerCase()}:${g.storyId}`, phase: 'Size', schema: ESTIMATE_SCHEMA, model: modelFor(est) },
231
293
  )),
232
294
  ).then((ests) => ({
233
295
  ...g,
@@ -244,15 +306,15 @@ await agent(
244
306
  `Update \`${backlogPath}\`: for each of these stories set \`story_points\` and \`status: groomed\`, ` +
245
307
  `and write \`testing_profiles\`. Stories (JSON): ${JSON.stringify(sizedStories.map((s) => ({ id: s.storyId, story_points: s.points, testing_profiles: s.profiles })))}. ` +
246
308
  `Return your \`valent:handoff\` machine block fields as JSON.`,
247
- { label: 'persist-sizing', phase: 'Persist', schema: HANDOFF_SCHEMA },
309
+ { label: 'persist-sizing', phase: 'Persist', schema: HANDOFF_SCHEMA, model: modelFor('PERSIST') },
248
310
  )
249
311
 
250
312
  phase('Pack')
251
313
  // Deterministic greedy packing happens in code (src/lib/sprint.js), invoked via the CLI.
252
314
  const pack = await agent(
253
- `Run exactly: \`valent-pipeline sprint-pack --velocity ${velocity} --backlog ${backlogPath}\` ` +
315
+ `Run exactly: \`node .valent-pipeline/bin/cli.js sprint-pack --velocity ${velocity} --backlog ${backlogPath}\` ` +
254
316
  `in the project root and return its stdout JSON verbatim (fields: sprint_stories, buffer_story_ids, points_planned, remaining_capacity).`,
255
- { label: 'sprint-pack', phase: 'Pack', schema: PACK_SCHEMA },
317
+ { label: 'sprint-pack', phase: 'Pack', schema: PACK_SCHEMA, model: modelFor('PACK') },
256
318
  )
257
319
  log(`packed ${pack.sprint_stories.length} stories (${pack.points_planned} pts); buffer: ${pack.buffer_story_ids.length}`)
258
320
 
@@ -262,9 +324,9 @@ const validation = await agent(
262
324
  `For sprint ${sprintId}: (1) write \`sprint-${sprintId}-plan.md\` from \`.valent-pipeline/templates/sprint-plan.template.md\` ` +
263
325
  `and \`sprint-${sprintId}-status.yaml\` from the status template for the packed stories ${JSON.stringify(pack.sprint_stories)}; ` +
264
326
  `(2) tag those stories \`sprint: ${sprintId}\` + \`status: sprint-planned\` in \`${backlogPath}\`; ` +
265
- `(3) run \`valent-pipeline validate-sprint --status sprint-${sprintId}-status.yaml --backlog ${backlogPath}\` and ` +
327
+ `(3) run \`node .valent-pipeline/bin/cli.js validate-sprint --status sprint-${sprintId}-status.yaml --backlog ${backlogPath}\` and ` +
266
328
  `return its result as JSON { valid:boolean, errors:[...] } (errors = the lines it printed on failure, else []).`,
267
- { label: 'validate-sprint', phase: 'Validate', schema: VALIDATE_SCHEMA },
329
+ { label: 'validate-sprint', phase: 'Validate', schema: VALIDATE_SCHEMA, model: modelFor('VALIDATE') },
268
330
  )
269
331
  if (!validation.valid) {
270
332
  throw new Error(`sprint ${sprintId} plan failed validation: ${(validation.errors || []).join('; ')}`)
@@ -16,25 +16,28 @@
16
16
  * guard) -> embed (CLI).
17
17
  *
18
18
  * The deterministic pieces are NOT in this script: calibration arithmetic is
19
- * `valent-pipeline calibrate` (src/lib/sprint.js); embedding is `valent-pipeline db embed`.
19
+ * `node .valent-pipeline/bin/cli.js calibrate` (src/lib/sprint.js); embedding is `node .valent-pipeline/bin/cli.js db embed`.
20
20
  * Both run through agents (a Workflow script has no CLI/fs access). The directive IMPACT
21
21
  * GATING and INVARIANT GUARD are deterministic policy, so they are enforced HERE in code —
22
22
  * the agent only proposes; the script decides what gets applied vs. surfaced for approval.
23
23
  *
24
- * args: { batchNumber, sprintId?, storyOutputDirs?: string[], dryRounds?: number, maxRounds?: number }
24
+ * args: { batchNumber, sprintId?, storyOutputDirs?: string[], dryRounds?: number, maxRounds?: number, models? }
25
25
  * sprintId present => sprint-mode (calibration runs). dryRounds = consecutive empty rounds
26
- * that end the loop-until-dry (default 2). maxRounds caps it (default 5).
26
+ * that end the loop-until-dry (default 2). maxRounds caps it (default 5). `models` is the
27
+ * pipeline-config.yaml `models` tier->roles map, passed through by the invoking skill so
28
+ * per-agent model tiers stay config-driven (editable via `valent configure`). Omit it to use
29
+ * the baked-in default. See sprint.workflow.js for the full rationale.
27
30
  */
28
31
 
29
32
  export const meta = {
30
33
  name: 'valent-retro',
31
34
  description: 'Retrospective: calibrate, loop-until-dry aggregate review, gated directives, embed (Workflow)',
32
35
  phases: [
33
- { title: 'Calibrate', detail: 'valent-pipeline calibrate (estimation accuracy, in code) — sprint mode' },
36
+ { title: 'Calibrate', detail: 'node .valent-pipeline/bin/cli.js calibrate (estimation accuracy, in code) — sprint mode' },
34
37
  { title: 'Analyze', detail: 'CRITIC/QA/JUDGE batch outputs + cost' },
35
38
  { title: 'Aggregate', detail: 'loop-until-dry 3-pass aggregate review + completeness critic (R5)' },
36
39
  { title: 'Directives', detail: 'agent proposes; code enforces impact gating + invariant guard' },
37
- { title: 'Embed', detail: 'valent-pipeline db embed (persist curated patterns)' },
40
+ { title: 'Embed', detail: 'node .valent-pipeline/bin/cli.js db embed (persist curated patterns)' },
38
41
  ],
39
42
  }
40
43
 
@@ -109,17 +112,78 @@ const HANDOFF_SCHEMA = {
109
112
 
110
113
  // --- args ---
111
114
 
112
- const a = args || {}
115
+ // args may arrive as a parsed object or as a JSON string, depending on how the invoking
116
+ // skill/harness passes it. Normalize defensively so `a.batchNumber` etc. resolve either way.
117
+ function parseArgs(x) {
118
+ if (typeof x === 'string') {
119
+ try { return JSON.parse(x) } catch { return {} }
120
+ }
121
+ return x || {}
122
+ }
123
+
124
+ const a = parseArgs(args)
113
125
  const batchNumber = a.batchNumber
114
126
  const sprintId = a.sprintId || null
115
127
  const dryRounds = a.dryRounds ?? 2
116
128
  const maxRounds = a.maxRounds ?? 5
117
129
  if (batchNumber == null) throw new Error('args must include { batchNumber }')
118
130
 
119
- const retroPrompt = (instruction, returnContract) =>
120
- `You are **RETROSPECTIVE**, analyzing story batch ${batchNumber} in the valent-pipeline. ` +
121
- `Read \`.valent-pipeline/prompts/retrospective.md\` and the step file named in the task. ${instruction} ` +
122
- (returnContract || 'Return your findings as the JSON object specified.')
131
+ // --- per-agent model tiers ----------------------------------------------------
132
+ // Tiers come from pipeline-config.yaml `models` (a tier->roles map), passed in as
133
+ // args.models by the invoking skill a Workflow script can't read files. We invert it
134
+ // to role->tier and overlay it on a baked-in default so the workflow self-hosts a sane
135
+ // assignment even when args.models is absent. Static + args only => journal-replay safe.
136
+ // Retro stages map to synthetic role keys (not the single RETROSPECTIVE persona) so each
137
+ // stage can be tuned independently: the loop-until-dry aggregate review + completeness
138
+ // critic are the genuine quality work (RETRO-REVIEW -> opus); analyze/directives are
139
+ // lighter (RETRO -> sonnet); calibrate/embed/IO are mechanical (haiku).
140
+ const DEFAULT_MODELS = {
141
+ 'RETRO-REVIEW': 'opus',
142
+ RETRO: 'sonnet',
143
+ CALIBRATE: 'haiku', EMBED: 'haiku', PERSIST: 'haiku',
144
+ }
145
+ function buildModelMap(cfg) {
146
+ const map = { ...DEFAULT_MODELS }
147
+ if (cfg && typeof cfg === 'object' && !Array.isArray(cfg)) {
148
+ for (const tier of ['opus', 'sonnet', 'haiku']) {
149
+ for (const role of cfg[tier] || []) {
150
+ if (typeof role === 'string') map[role.toUpperCase()] = tier
151
+ }
152
+ }
153
+ }
154
+ return map
155
+ }
156
+ const MODELS = buildModelMap(a.models)
157
+ // undefined => the agent inherits the main-loop (session) model.
158
+ const modelFor = (role) => MODELS[String(role).toUpperCase()]
159
+
160
+ // --- reasoning effort (thinking budget) --------------------------------------
161
+ // Optional, config-driven, mirrors `models`. args.reasoning is a level->roles map inverted to
162
+ // role->trigger-phrase. EMPTY default => nothing injected unless the config opts in. Every agent
163
+ // here shares the RETROSPECTIVE identity, so the knob keys on that role. Static + args only.
164
+ const REASONING_PHRASES = { think: 'think', 'think-hard': 'think hard', 'think-harder': 'think harder', ultrathink: 'ultrathink' }
165
+ const DEFAULT_REASONING = {} // blank control surface — fill `reasoning` in pipeline-config.yaml to use it
166
+ function buildReasoningMap(cfg) {
167
+ const map = { ...DEFAULT_REASONING }
168
+ if (cfg && typeof cfg === 'object' && !Array.isArray(cfg)) {
169
+ for (const level of Object.keys(REASONING_PHRASES)) {
170
+ for (const role of cfg[level] || []) {
171
+ if (typeof role === 'string') map[role.toUpperCase()] = REASONING_PHRASES[level]
172
+ }
173
+ }
174
+ }
175
+ return map
176
+ }
177
+ const REASONING = buildReasoningMap(a.reasoning)
178
+ const reasoningFor = (role) => REASONING[String(role).toUpperCase()]
179
+
180
+ const retroPrompt = (instruction, returnContract) => {
181
+ const think = reasoningFor('RETROSPECTIVE') // undefined unless config opts RETROSPECTIVE into a tier
182
+ return `You are **RETROSPECTIVE**, analyzing story batch ${batchNumber} in the valent-pipeline. ` +
183
+ (think ? `Before you act, ${think} about the hardest parts of this task. ` : '') +
184
+ `Read \`.valent-pipeline/prompts/retrospective.md\` and the step file named in the task. ${instruction} ` +
185
+ (returnContract || 'Return your findings as the JSON object specified.')
186
+ }
123
187
 
124
188
  // A stable de-dup key so loop-until-dry converges (don't re-count the same finding).
125
189
  const findingKey = (f) => `${(f.summary || '').toLowerCase().trim().slice(0, 80)}`
@@ -131,9 +195,9 @@ if (sprintId) {
131
195
  phase('Calibrate')
132
196
  // Estimation-accuracy arithmetic lives in code (src/lib/sprint.js); run it via the CLI.
133
197
  calibration = await agent(
134
- `Run exactly: \`valent-pipeline calibrate --sprint ${sprintId}\` in the project root and return its stdout JSON verbatim ` +
198
+ `Run exactly: \`node .valent-pipeline/bin/cli.js calibrate --sprint ${sprintId}\` in the project root and return its stdout JSON verbatim ` +
135
199
  `(fields: ratios, flagged_pairs, surface_averages, velocity). This feeds calibration directives.`,
136
- { label: 'calibrate', phase: 'Calibrate', schema: { type: 'object', additionalProperties: true } },
200
+ { label: 'calibrate', phase: 'Calibrate', schema: { type: 'object', additionalProperties: true }, model: modelFor('CALIBRATE') },
137
201
  )
138
202
  log(`calibration: ${(calibration.flagged_pairs || []).length} flagged pair(s); velocity unstable=${calibration.velocity?.unstable}`)
139
203
  }
@@ -144,7 +208,7 @@ await agent(
144
208
  'Run analyze.md: read all CRITIC reviews, QA-B bug reports, JUDGE rejections, and cost data; categorize rejection/bug patterns.',
145
209
  'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.',
146
210
  ),
147
- { label: 'analyze', phase: 'Analyze', schema: FINDINGS_SCHEMA },
211
+ { label: 'analyze', phase: 'Analyze', schema: FINDINGS_SCHEMA, model: modelFor('RETRO') },
148
212
  )
149
213
 
150
214
  phase('Aggregate')
@@ -164,7 +228,7 @@ while (dry < dryRounds && round < maxRounds) {
164
228
  `Report ONLY findings not already reported in earlier rounds.`,
165
229
  'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.',
166
230
  ),
167
- { label: `aggregate:round-${round}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA },
231
+ { label: `aggregate:round-${round}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA, model: modelFor('RETRO-REVIEW') },
168
232
  )
169
233
  const fresh = (r.findings || []).filter((f) => !seen.has(findingKey(f)))
170
234
  if (!fresh.length) {
@@ -187,7 +251,7 @@ const critic = await agent(
187
251
  `List only genuine gaps — empty if coverage is complete.`,
188
252
  'Return ONLY { schema:1, gaps:["..."] } as JSON.',
189
253
  ),
190
- { label: 'completeness-critic', phase: 'Aggregate', schema: COMPLETENESS_SCHEMA },
254
+ { label: 'completeness-critic', phase: 'Aggregate', schema: COMPLETENESS_SCHEMA, model: modelFor('RETRO-REVIEW') },
191
255
  )
192
256
  if ((critic.gaps || []).length) {
193
257
  log(`completeness-critic surfaced ${critic.gaps.length} gap(s) — running targeted reviews`)
@@ -196,7 +260,7 @@ if ((critic.gaps || []).length) {
196
260
  agent(
197
261
  retroPrompt(`Targeted aggregate review for the previously-uncovered angle: "${gap}". Report only findings not already reported.`,
198
262
  'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.'),
199
- { label: `aggregate:gap-${i + 1}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA },
263
+ { label: `aggregate:gap-${i + 1}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA, model: modelFor('RETRO-REVIEW') },
200
264
  )),
201
265
  )
202
266
  for (const r of extra.filter(Boolean)) {
@@ -222,7 +286,7 @@ const drafted = await agent(
222
286
  `propose it and flag it; the orchestrator decides what gets applied.`,
223
287
  'Return ONLY { schema:1, directives:[{target_agent,directive,reason,impact_level,touchesInvariant,category}] } as JSON.',
224
288
  ),
225
- { label: 'draft-directives', phase: 'Directives', schema: DIRECTIVES_SCHEMA },
289
+ { label: 'draft-directives', phase: 'Directives', schema: DIRECTIVES_SCHEMA, model: modelFor('RETRO') },
226
290
  )
227
291
 
228
292
  const all = drafted.directives || []
@@ -239,7 +303,7 @@ if (applied.length) {
239
303
  `Append these APPROVED correction directives to \`correction-directives.yaml\` (status: active, created_batch: ${batchNumber}). ` +
240
304
  `They have passed the impact gate (low/medium only). Directives (JSON): ${JSON.stringify(applied)}. ` +
241
305
  `Return { schema:1 } when done.`,
242
- { label: 'apply-directives', phase: 'Directives', schema: HANDOFF_SCHEMA },
306
+ { label: 'apply-directives', phase: 'Directives', schema: HANDOFF_SCHEMA, model: modelFor('PERSIST') },
243
307
  )
244
308
  }
245
309
  if (proposals.length) {
@@ -248,7 +312,7 @@ if (proposals.length) {
248
312
  `Write these directive PROPOSALS to \`retrospective-batch-${batchNumber}.md\` under "## Pending Approval" — do NOT add them to ` +
249
313
  `correction-directives.yaml. For each, document the proposed directive, why it needs approval (architecture-conflict or high-impact), ` +
250
314
  `evidence, risk, and an alternative. Proposals (JSON): ${JSON.stringify(proposals)}. Return { schema:1 } when done.`,
251
- { label: 'surface-proposals', phase: 'Directives', schema: HANDOFF_SCHEMA },
315
+ { label: 'surface-proposals', phase: 'Directives', schema: HANDOFF_SCHEMA, model: modelFor('PERSIST') },
252
316
  )
253
317
  }
254
318
 
@@ -257,8 +321,8 @@ phase('Embed')
257
321
  const embed = await agent(
258
322
  `Run embed-instructions.md: write \`embed-instructions.md\` (curated recurring patterns / novel decisions / bug patterns / ` +
259
323
  `broadly-applicable directives only — NOT one-offs) in the most recent story output dir, then run ` +
260
- `\`valent-pipeline db embed --file <that path>\`. Return { schema:1, embedded:<int count> }.`,
261
- { label: 'embed', phase: 'Embed', schema: { type: 'object', additionalProperties: true } },
324
+ `\`node .valent-pipeline/bin/cli.js db embed --file <that path>\`. Return { schema:1, embedded:<int count> }.`,
325
+ { label: 'embed', phase: 'Embed', schema: { type: 'object', additionalProperties: true }, model: modelFor('EMBED') },
262
326
  )
263
327
 
264
328
  return {