valent-pipeline 0.3.4 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/bin/cli.js +80 -0
  2. package/package.json +7 -5
  3. package/pipeline/docs/design/provider-adapter-guide.md +6 -7
  4. package/pipeline/orchestrators/claude-code/README.md +99 -0
  5. package/pipeline/orchestrators/claude-code/plan.workflow.js +284 -0
  6. package/pipeline/orchestrators/claude-code/retro.workflow.js +274 -0
  7. package/pipeline/orchestrators/claude-code/sprint.workflow.js +354 -0
  8. package/pipeline/orchestrators/codex/README.md +52 -0
  9. package/pipeline/orchestrators/codex/lead-loop.md +115 -0
  10. package/pipeline/prompts/critic.md +2 -0
  11. package/pipeline/prompts/lead.md +1 -1
  12. package/pipeline/schemas/handoff.schema.json +19 -0
  13. package/pipeline/schemas/task-graph.schema.json +53 -0
  14. package/pipeline/schemas/verdict.schema.json +20 -0
  15. package/pipeline/steps/common/distilled-handoff-format.md +15 -0
  16. package/pipeline/steps/critic/acceptance-audit.md +1 -1
  17. package/pipeline/steps/critic/edge-case-hunt.md +2 -2
  18. package/pipeline/steps/critic/triage.md +2 -2
  19. package/pipeline/steps/orchestration/adopt-lead-and-create-team.md +13 -12
  20. package/pipeline/steps/orchestration/sprint-plan.md +28 -31
  21. package/pipeline/steps/retrospective/calibration.md +18 -31
  22. package/pipeline/task-graphs/backend-api.yaml +1 -1
  23. package/pipeline/task-graphs/data-pipeline.yaml +1 -1
  24. package/pipeline/task-graphs/document-generation.yaml +1 -1
  25. package/pipeline/task-graphs/frontend-only.yaml +9 -8
  26. package/pipeline/task-graphs/fullstack-web.yaml +11 -10
  27. package/pipeline/task-graphs/library.yaml +1 -1
  28. package/pipeline/task-graphs/mcp-server.yaml +1 -1
  29. package/pipeline/task-graphs/mobile-app.yaml +8 -7
  30. package/pipeline/templates/bend-handoff.template.md +11 -0
  31. package/pipeline/templates/critic-review.template.md +15 -1
  32. package/pipeline/templates/data-handoff.template.md +11 -0
  33. package/pipeline/templates/docgen-handoff.template.md +11 -0
  34. package/pipeline/templates/execution-report.template.md +11 -0
  35. package/pipeline/templates/fend-handoff.template.md +11 -0
  36. package/pipeline/templates/iac-handoff.template.md +11 -0
  37. package/pipeline/templates/judge-decision.template.md +13 -0
  38. package/pipeline/templates/libdev-handoff.template.md +11 -0
  39. package/pipeline/templates/mcp-dev-handoff.template.md +11 -0
  40. package/pipeline/templates/mobile-handoff.template.md +11 -0
  41. package/pipeline/templates/qa-test-spec.template.md +11 -0
  42. package/pipeline/templates/readiness-review.template.md +13 -0
  43. package/pipeline/templates/reqs-brief.template.md +11 -0
  44. package/pipeline/templates/uxa-spec.template.md +11 -0
  45. package/skills/valent-run-story/SKILL.md +12 -0
  46. package/src/commands/calibrate.js +86 -0
  47. package/src/commands/init.js +1 -1
  48. package/src/commands/rejection-cap.js +70 -0
  49. package/src/commands/resolve-graph.js +79 -0
  50. package/src/commands/sprint-pack.js +62 -0
  51. package/src/commands/validate-handoff.js +32 -0
  52. package/src/commands/validate-sprint.js +55 -0
  53. package/src/lib/graph.js +98 -0
  54. package/src/lib/handoff.js +99 -0
  55. package/src/lib/rejection.js +38 -0
  56. package/src/lib/sprint.js +312 -0
package/bin/cli.js CHANGED
@@ -54,6 +54,86 @@ configCmd
54
54
  await validate();
55
55
  });
56
56
 
57
+ // validate-handoff command
58
+ program
59
+ .command('validate-handoff')
60
+ .description('Validate a handoff artifact\'s valent:handoff machine block against the schema')
61
+ .requiredOption('--file <path>', 'Path to the handoff markdown file')
62
+ .option('--gate', 'Force gate validation (verdict required + pass-requires-zero-Highs invariant)')
63
+ .action(async (options) => {
64
+ const { validateHandoffCmd } = await import('../src/commands/validate-handoff.js');
65
+ await validateHandoffCmd(options);
66
+ });
67
+
68
+ // resolve-graph command
69
+ program
70
+ .command('resolve-graph')
71
+ .description('Deterministically resolve a task graph against testing profiles (evaluate predicates, prune blockedBy)')
72
+ .option('--type <project-type>', 'Project type (resolves .valent-pipeline/task-graphs/<type>.yaml, falling back to packaged)')
73
+ .option('--file <path>', 'Explicit path to a task-graph YAML (overrides --type)')
74
+ .option('--profiles <list>', 'Comma-separated testing profiles, e.g. api,ui,iac', '')
75
+ .option('--validate-only', 'Validate the graph shape and references without resolving')
76
+ .action(async (options) => {
77
+ const { resolveGraphCmd } = await import('../src/commands/resolve-graph.js');
78
+ await resolveGraphCmd(options);
79
+ });
80
+
81
+ // sprint-pack command (meta-loop: greedy story packing)
82
+ program
83
+ .command('sprint-pack')
84
+ .description('Deterministically pack groomed stories into a sprint by priority within a velocity budget')
85
+ .requiredOption('--velocity <n>', 'Sprint capacity in story points')
86
+ .option('--backlog <path>', 'Backlog file (YAML/JSON); packs its `items`')
87
+ .option('--stories <path>', 'Explicit story array (YAML/JSON); overrides --backlog')
88
+ .action(async (options) => {
89
+ const { sprintPackCmd } = await import('../src/commands/sprint-pack.js');
90
+ await sprintPackCmd(options);
91
+ });
92
+
93
+ // calibrate command (meta-loop: estimation-accuracy arithmetic)
94
+ program
95
+ .command('calibrate')
96
+ .description('Compute calibration metrics (point/time ratios, deviation flags, velocity stability)')
97
+ .option('--sprint <id>', 'Sprint to pull calibration rows for (queries the SQLite store)')
98
+ .option('--db', 'Use all calibration rows from the store (no sprint filter)')
99
+ .option('--db-path <path>', 'Database path (defaults to config)')
100
+ .option('--data <path>', 'Explicit calibration rows (YAML/JSON); overrides the db source')
101
+ .option('--velocity-history <path>', 'Explicit velocity history (YAML/JSON) to pair with --data')
102
+ .option('--deviation-threshold <n>', 'Pairwise deviation flag threshold (default 0.5)')
103
+ .option('--cv-threshold <n>', 'Velocity coefficient-of-variation instability threshold (default 0.3)')
104
+ .action(async (options) => {
105
+ const { calibrateCmd } = await import('../src/commands/calibrate.js');
106
+ await calibrateCmd(options);
107
+ });
108
+
109
+ // validate-sprint command (meta-loop: consistency cross-checks)
110
+ program
111
+ .command('validate-sprint')
112
+ .description('Cross-check sprint status YAML and backlog for consistency (sprint-plan.md Step 6)')
113
+ .requiredOption('--status <path>', 'Sprint status YAML/JSON (machine-readable companion to the plan)')
114
+ .requiredOption('--backlog <path>', 'Backlog file (YAML/JSON)')
115
+ .option('--plan <path>', 'Optional structured plan (JSON/YAML), e.g. sprint-pack output; defaults to deriving from --status')
116
+ .action(async (options) => {
117
+ const { validateSprintCmd } = await import('../src/commands/validate-sprint.js');
118
+ await validateSprintCmd(options);
119
+ });
120
+
121
+ // rejection-cap command (code-owned rejection cap for the prose/Codex shell)
122
+ program
123
+ .command('rejection-cap')
124
+ .description('Track and enforce the per-story rejection cap in code (exits non-zero when tripped)')
125
+ .requiredOption('--story <id>', 'Story identifier')
126
+ .requiredOption('--gate <gate>', 'Gate name (readiness | critic | judge)')
127
+ .option('--agent <name>', 'Responsible agent the rejection is routed to (defaults to the gate)')
128
+ .option('--max <n>', 'Cap (max rejection cycles); defaults to 5')
129
+ .option('--increment', 'Record a new rejection (bump the counter), then report')
130
+ .option('--reset', 'Clear all counters for the story (call at a story boundary), then report')
131
+ .option('--state <path>', 'State file path (defaults to .valent-pipeline/rejection-state.json)')
132
+ .action(async (options) => {
133
+ const { rejectionCapCmd } = await import('../src/commands/rejection-cap.js');
134
+ await rejectionCapCmd(options);
135
+ });
136
+
57
137
  // db commands
58
138
  const dbCmd = program
59
139
  .command('db')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "valent-pipeline",
3
- "version": "0.3.4",
3
+ "version": "0.4.2",
4
4
  "description": "v3 multi-agent AI pipeline for software development lifecycle",
5
5
  "type": "module",
6
6
  "bin": {
@@ -16,14 +16,16 @@
16
16
  "skills/"
17
17
  ],
18
18
  "scripts": {
19
- "test": "node scripts/test-local.js",
20
- "prepublishOnly": "node scripts/test-local.js"
19
+ "test": "node scripts/test-local.js && node scripts/test-workflow.js",
20
+ "prepublishOnly": "node scripts/test-local.js && node scripts/test-workflow.js"
21
21
  },
22
22
  "dependencies": {
23
+ "ajv": "^8.20.0",
24
+ "better-sqlite3": "^12.0.0",
23
25
  "commander": "^12.0.0",
24
26
  "inquirer": "^9.0.0",
25
- "better-sqlite3": "^11.0.0",
26
- "sqlite-vec": "^0.1.0"
27
+ "sqlite-vec": "^0.1.0",
28
+ "yaml": "^2.9.0"
27
29
  },
28
30
  "keywords": [
29
31
  "ai",
@@ -24,11 +24,9 @@ pipeline/
24
24
  claude-code/
25
25
  runtime.md ← PROVIDER — Claude Code runtime operations
26
26
  spawn.template.md ← PROVIDER — Claude Code spawn template
27
- knowledge-spawn.template.md ← PROVIDER — Claude Code knowledge spawn
28
27
  codex/
29
28
  runtime.md ← PROVIDER — Codex runtime operations
30
29
  spawn.template.md ← PROVIDER — Codex spawn template
31
- knowledge-spawn.template.md ← PROVIDER — Codex knowledge spawn
32
30
  AGENTS.md ← PROVIDER — Codex repo-level instructions
33
31
  cloud-task-protocol.md ← PROVIDER — Codex cloud execution protocol
34
32
  cloud-task-prompts/ ← PROVIDER — Codex cloud task templates
@@ -56,7 +54,8 @@ Lead's prompt (`lead.md`) defines WHEN and WHY. The runtime adapter defines HOW.
56
54
  |------|---------|
57
55
  | `runtime.md` | All runtime operations: initialization, task registry, agent spawning, signal delivery, monitoring, teardown |
58
56
  | `spawn.template.md` | Agent spawn prompt template — what each agent instance receives at startup |
59
- | `knowledge-spawn.template.md` | Knowledge agent spawn template — knowledge-specific initialization |
57
+
58
+ > **Note:** Knowledge is a self-service skill (`valent-knowledge`), not an agent — there is no `knowledge-spawn.template.md`. `scripts/validate-provider-sync.js` enforces this inventory (spawn-template parity + manifest prompt resolution).
60
59
 
61
60
  ### Codex-Only Files
62
61
 
@@ -133,7 +132,6 @@ Entirely shared. Quality gates are orchestration logic (when to reject, where to
133
132
  1. Create `providers/{new-provider}/` with:
134
133
  - `runtime.md` — all runtime operations for the new provider
135
134
  - `spawn.template.md` — spawn template adapted for the provider's agent model
136
- - `knowledge-spawn.template.md` — knowledge spawn adapted
137
135
 
138
136
  2. Update `src/lib/config-schema.js`:
139
137
  - Add new provider to `validProviders` array
@@ -158,9 +156,10 @@ Entirely shared. Quality gates are orchestration logic (when to reject, where to
158
156
 
159
157
  The `scripts/validate-provider-sync.js` script runs in CI before every publish. It checks:
160
158
 
161
- 1. **Template parity** — `spawn.template.md` and `knowledge-spawn.template.md` exist in both provider directories
162
- 2. **Agent coverage** — Both runtime.md files reference the same set of agents from `agents-manifest.yaml`
163
- 3. **Structural consistency** — Both runtime.md files have the same major sections (Initialization, Task Registry, Agent Spawning, Signal Delivery, Monitoring, Teardown)
159
+ 1. **Template parity** — `spawn.template.md` exists in both providers, and any `*spawn.template.md` in one provider has a counterpart in the other
160
+ 2. **Manifest integrity** — every `prompt_template` declared in `agents-manifest.yaml` resolves to a real file
161
+ 3. **Agent coverage** — both runtime.md files reference the critical agents (REQS, BEND, FEND, CRITIC, QA-B, JUDGE)
162
+ 4. **Structural consistency** — both runtime.md files have the major sections (Initialization, Task Registry, Agent Spawning, Signal Delivery, Monitoring, Teardown)
164
163
 
165
164
  If any check fails, the publish is blocked. Fix the discrepancy, then re-push.
166
165
 
@@ -0,0 +1,99 @@
1
+ # Claude Code orchestrator (native Workflow)
2
+
3
+ This is the Claude Code deployment of the valent-pipeline orchestrator, per the hybrid
4
+ target in [`../../../docs-feedback/reimplementation-plan.md`](../../../docs-feedback/reimplementation-plan.md)
5
+ (R3): the Claude Code provider runs a deterministic **Workflow script**, while the Codex
6
+ provider keeps the markdown-skill Lead. Both consume the same shared substrate
7
+ (`prompts/`, `steps/`, `task-graphs/`, `schemas/`, templates).
8
+
9
+ ## The three workflows
10
+
11
+ | File | Step | Role |
12
+ |---|---|---|
13
+ | `plan.workflow.js` | 7 | Groom → size → pack → validate a set of pending stories into a planned sprint batch. Emits a batch shaped to feed straight into `sprint.workflow.js`. |
14
+ | `sprint.workflow.js` | 4 + 6 | Execute a planned batch sequentially through the per-story pipeline with schema-validated gates. |
15
+ | `retro.workflow.js` | 7 | Learn from a shipped batch: calibrate, loop-until-dry aggregate review, gated directives, embed. |
16
+
17
+ They compose as `plan → sprint → retro`. The per-story pipeline is kept **inline** in
18
+ `sprint.workflow.js` (not a nested `workflow()`), so the single `workflow()` nesting level
19
+ stays free for a future sprint-cycle wrapper to call all three (reimplementation-plan §5b).
20
+
21
+ ## Status
22
+
23
+ `sprint.workflow.js` implements **Steps 4 + 6** (R1 control flow, R4 gates-as-stages, the
24
+ sprint batch loop, 3b parallel CRITIC, and full spawn-context prompts). `plan.workflow.js`
25
+ and `retro.workflow.js` implement **Step 7**. **Step 8** (resume + state model, below) is
26
+ wired. All three are control-flow-validated by `scripts/test-workflow.js` (21 scenarios,
27
+ incl. a resume-safety lint), but:
28
+
29
+ - It is **opt-in, not the default.** `skills/valent-run-story` still drives the prose Lead;
30
+ the Workflow runs only when the user opts in (see that skill's "Step 5 (alternative)").
31
+ - It has **not been exercised end-to-end against a live story.** A Workflow runs via the
32
+ Workflow tool against a real project and spawns real agents; it cannot be unit-tested like
33
+ `src/lib/*`. Validate it against a `testResources/*` fixture before making it the default.
34
+
35
+ ## What it demonstrates
36
+
37
+ | Concern | How | Replaces |
38
+ |---|---|---|
39
+ | DAG resolution | spawns an agent that runs `resolve-graph` (step 2) per story | Lead transcribing + pruning by judgment |
40
+ | Sprint batch | sequential `for`-loop over `args.stories[]` (shared branch ⇒ no overlap) | prose six-phase sprint loop |
41
+ | Quality gates | `runGate()` returns a `verdict.schema`-validated object | prose verdict, unchecked |
42
+ | Pass-invariant | `assertGate()` rejects `pass` + open Highs | KANBAN-002 class |
43
+ | Rejection cap | JS `while` loop, code-owned counter | model-counted circuit breaker |
44
+ | Dev fan-out | `parallel()` barrier before CRITIC | wave/spawn_trigger overlay |
45
+ | 3b CRITIC | `parallel([blind, edge, acceptance])` independent agents → triage barrier | one CRITIC context, passes anchored on each other |
46
+ | Spawn context | `buildPrompt()` mirrors `spawn.template.md` (Setup/Task/Trigger/Completion) | terse inline instructions |
47
+ | Roll-over | a rejected story is recorded and the batch continues | — |
48
+ | Resume | journal (`resumeFromRunId`) | disk-state rehydration + re-decide |
49
+
50
+ ## Args
51
+
52
+ ```js
53
+ // batch form (a planned sprint)
54
+ { stories: [{ storyId, projectType, profiles }, ...], maxRejectionCycles? }
55
+ // single-story form (back-compat)
56
+ { storyId, projectType, profiles?, maxRejectionCycles? }
57
+ ```
58
+
59
+ Returns `{ shipped, stories_shipped, stories_rolled_over, results: [{ storyId, shipped, verdict, skipped }] }`.
60
+
61
+ ## Resume & state model (step 8)
62
+
63
+ **The journal is the state of record.** Each Workflow invocation returns a `runId`. To resume
64
+ after an interruption (context limit, crash, manual stop, or a mid-run script edit), relaunch
65
+ with `Workflow({ scriptPath, resumeFromRunId })` — **not** a fresh run. The journal replays the
66
+ unchanged prefix of `agent()` calls instantly (same script + args → 100% cache hit) and re-runs
67
+ only from the first changed/new call onward. Already-shipped stories and passed gates are not
68
+ redone. This is the exact form of the durability the prose Lead approximated by re-reading
69
+ `pipeline-state.json` and re-deciding.
70
+
71
+ `pipeline-state.json`, `sprint-{n}-status.yaml`, and the markdown handoffs are **derived,
72
+ human-readable views** in this path — agents write them for visibility; the orchestrator never
73
+ reads them back to make a control-flow decision (its state lives in JS variables the journal
74
+ captures). Because there is no multi-file state of record, the non-atomic multi-file desync the
75
+ prose Lead can hit (feedback gap #2) is structurally impossible here. *Do not hand-edit a state
76
+ file to resume — pass `resumeFromRunId`.* (The prose Lead path still uses `pipeline-state.json`
77
+ as its mechanism; that's correct for that runtime.)
78
+
79
+ **Resume-safety is linted.** Journal replay requires a deterministic, side-effect-free script
80
+ body, so `scripts/test-workflow.js` statically rejects `Date.now`/`new Date(`/`Math.random`
81
+ (nondeterminism) and `import`/`require`/`*FileSync`/`process.*` (in-script IO) in all three
82
+ workflow files. All IO goes through agents; that's why resolve-graph/sprint-pack/calibrate/embed
83
+ are invoked *through* an agent rather than imported.
84
+
85
+ ## Known simplifications (next slices)
86
+
87
+ - A `sprint-cycle.workflow.js` that calls `plan → sprint → retro` via `workflow()` isn't built
88
+ yet; for now run the three workflows in sequence (the plan output feeds the sprint input).
89
+ - Per-story dev fan-out re-runs ALL dev agents on a CRITIC rejection; routing rework to only
90
+ the agent(s) CRITIC targeted (via `rejectionTarget`) is a refinement once run live.
91
+ - No PMCP / visual-validation stage yet; no PM/program-loop workflow (left agent-driven per §5b).
92
+
93
+ ## Runtime constraint that shaped the design
94
+
95
+ A Workflow script body has **no filesystem or import access** — it cannot read
96
+ `task-graphs/*.yaml`, parse handoffs, or run the CLI directly. All IO is performed by the
97
+ agents it spawns (which have Bash/Read/Write); the script only sequences them and validates
98
+ their structured returns. That is why `resolve-graph` is invoked *through* an agent rather
99
+ than imported.
@@ -0,0 +1,284 @@
1
+ /**
2
+ * valent-pipeline sprint PLANNING orchestrator — Claude Code (native Workflow) provider.
3
+ *
4
+ * STATUS: Step 7 (reimplementation-plan §5b). Reviewable; control flow validated by
5
+ * scripts/test-workflow.js. Opt-in, not the default — not yet run end-to-end against a
6
+ * live backlog. The Codex provider keeps the markdown-skill Lead (hybrid, R3).
7
+ *
8
+ * Produces a planned sprint batch from a set of pending stories:
9
+ * groom (pipelined) -> size (parallel) -> persist points -> pack (CLI) -> validate (CLI)
10
+ *
11
+ * Why grooming is pipeline() but execution is for(): grooming runs Phase-1 SPEC agents
12
+ * (reqs/uxa/qa-a/readiness) that DON'T touch code, so stories can flow assembly-line —
13
+ * story B can be in QA-A while story C is still in REQS. (Execution, by contrast, shares
14
+ * one git branch and must be sequential — see sprint.workflow.js.)
15
+ *
16
+ * The deterministic packing/validation (greedy bin-packing, consistency cross-checks) is NOT
17
+ * done in this script — it lives in `valent-pipeline sprint-pack` / `validate-sprint`
18
+ * (src/lib/sprint.js), invoked through an agent because a Workflow script has no CLI/fs
19
+ * access. Both runtimes reuse those CLIs; this workflow just sequences the agents.
20
+ *
21
+ * The return value is shaped to feed straight into sprint.workflow.js:
22
+ * { sprintId, points_planned, stories: [{ storyId, projectType, profiles }] }
23
+ *
24
+ * args: { stories: [{ storyId, projectType }], sprintId, velocity, backlogPath?, maxRejectionCycles? }
25
+ */
26
+
27
+ export const meta = {
28
+ name: 'valent-plan',
29
+ description: 'Groom, size, and pack a set of stories into a validated sprint plan (Workflow)',
30
+ phases: [
31
+ { title: 'Groom', detail: 'reqs -> uxa? -> qa-a -> readiness gate, pipelined across the batch' },
32
+ { title: 'Size', detail: 'profile-matched estimators per story, summed (parallel)' },
33
+ { title: 'Persist', detail: 'write story_points + groomed status to the backlog' },
34
+ { title: 'Pack', detail: 'valent-pipeline sprint-pack (greedy bin-packing, in code)' },
35
+ { title: 'Validate', detail: 'write plan/status artifacts + valent-pipeline validate-sprint' },
36
+ ],
37
+ }
38
+
39
+ // --- schemas (inlined; a Workflow script cannot read pipeline/schemas/*.json) ---
40
+
41
+ const HANDOFF_SCHEMA = {
42
+ type: 'object',
43
+ required: ['schema', 'agent', 'story'],
44
+ additionalProperties: true,
45
+ properties: {
46
+ schema: { const: 1 },
47
+ agent: { type: 'string' },
48
+ story: { type: 'string' },
49
+ files: { type: 'array', items: { type: 'string' } },
50
+ flags: { type: 'array', items: { type: 'string' } },
51
+ },
52
+ }
53
+
54
+ // REQS also tags testing_profiles during grooming (sprint-groom.md Step 0).
55
+ const REQS_GROOM_SCHEMA = {
56
+ type: 'object',
57
+ required: ['schema', 'agent', 'story', 'testing_profiles'],
58
+ additionalProperties: true,
59
+ properties: {
60
+ schema: { const: 1 },
61
+ agent: { type: 'string' },
62
+ story: { type: 'string' },
63
+ testing_profiles: { type: 'array', items: { type: 'string' } },
64
+ },
65
+ }
66
+
67
+ const VERDICT_SCHEMA = {
68
+ type: 'object',
69
+ required: ['schema', 'agent', 'story', 'verdict', 'highFindingsOpen'],
70
+ additionalProperties: true,
71
+ properties: {
72
+ schema: { const: 1 },
73
+ agent: { type: 'string' },
74
+ story: { type: 'string' },
75
+ verdict: { enum: ['pass', 'fail', 'needs-review'] },
76
+ highFindingsOpen: { type: 'integer', minimum: 0 },
77
+ rejectionTarget: { type: ['string', 'null'] },
78
+ },
79
+ }
80
+
81
+ const ESTIMATE_SCHEMA = {
82
+ type: 'object',
83
+ required: ['schema', 'agent', 'story', 'points'],
84
+ additionalProperties: true,
85
+ properties: {
86
+ schema: { const: 1 },
87
+ agent: { type: 'string' },
88
+ story: { type: 'string' },
89
+ points: { type: 'integer', minimum: 0 },
90
+ },
91
+ }
92
+
93
+ const PACK_SCHEMA = {
94
+ type: 'object',
95
+ required: ['sprint_stories', 'buffer_story_ids', 'points_planned'],
96
+ properties: {
97
+ sprint_stories: { type: 'array', items: { type: 'string' } },
98
+ buffer_story_ids: { type: 'array', items: { type: 'string' } },
99
+ points_planned: { type: 'integer' },
100
+ remaining_capacity: { type: 'integer' },
101
+ },
102
+ }
103
+
104
+ const VALIDATE_SCHEMA = {
105
+ type: 'object',
106
+ required: ['valid'],
107
+ additionalProperties: true,
108
+ properties: { valid: { type: 'boolean' }, errors: { type: 'array', items: { type: 'string' } } },
109
+ }
110
+
111
+ // Which estimator agent owns each testing profile (sprint-size.md Step 2).
112
+ const PROFILE_ESTIMATORS = {
113
+ api: 'BEND', ui: 'FEND', 'data-pipeline': 'DATA', 'mcp-server': 'MCP-DEV',
114
+ library: 'LIBDEV', 'document-generation': 'DOCGEN', iac: 'IAC',
115
+ }
116
+
117
+ // --- args ---
118
+
119
+ const a = args || {}
120
+ const stories = Array.isArray(a.stories) ? a.stories : []
121
+ const sprintId = a.sprintId
122
+ const velocity = a.velocity
123
+ const backlogPath = a.backlogPath || 'pipeline-backlog.yaml'
124
+ const maxRejectionCycles = a.maxRejectionCycles ?? 5
125
+ if (!stories.length || !sprintId || typeof velocity !== 'number') {
126
+ throw new Error('args must include { stories:[{storyId,projectType}], sprintId, velocity }')
127
+ }
128
+
129
+ function buildPrompt({ role, promptFile, storyId, taskSubject, trigger, returnContract }) {
130
+ const outputDir = `stories/${storyId}/output`
131
+ return [
132
+ `You are **${role}**, for story ${storyId} in the valent-pipeline (sprint ${sprintId} planning).`,
133
+ '',
134
+ '## Setup',
135
+ `1. Read your core prompt: \`.valent-pipeline/prompts/${promptFile}\` — identity, protocols, step sequence.`,
136
+ `2. Read shared context: \`${outputDir}/pipeline-context.md\` (and correction directives if present).`,
137
+ '3. Read each step file at the point of execution, not before. Check decision gates first.',
138
+ '',
139
+ '## Task Assignment',
140
+ taskSubject,
141
+ '',
142
+ '## Trigger',
143
+ trigger || 'Begin now.',
144
+ '',
145
+ '## On Completion',
146
+ returnContract || 'Write your handoff artifact, then return ONLY your `valent:handoff` machine block fields as JSON.',
147
+ ].join('\n')
148
+ }
149
+
150
+ // ---------------------------------------------------------------------------
151
+
152
+ phase('Groom')
153
+ // Pipelined: spec agents don't touch code, so stories flow assembly-line through the stages.
154
+ const groomed = await pipeline(
155
+ stories,
156
+ // Stage 1: REQS analyzes the story AND tags testing_profiles (sprint-groom.md Step 0 + REQS).
157
+ async (story) => {
158
+ const r = await agent(
159
+ buildPrompt({
160
+ role: 'REQS', promptFile: 'reqs.md', storyId: story.storyId,
161
+ taskSubject: 'Tag testing_profiles for this story, then produce reqs-brief.md.',
162
+ returnContract: 'Return ONLY { schema:1, agent:"reqs", story, testing_profiles:[...], files:[...] } as JSON.',
163
+ }),
164
+ { label: `reqs:${story.storyId}`, phase: 'Groom', schema: REQS_GROOM_SCHEMA },
165
+ )
166
+ return { ...story, profiles: r.testing_profiles || [] }
167
+ },
168
+ // Stage 2: UXA only for UI stories (skipped otherwise — no idle agent).
169
+ async (g) => {
170
+ if (g.profiles.includes('ui')) {
171
+ await agent(
172
+ buildPrompt({ role: 'UXA', promptFile: 'uxa.md', storyId: g.storyId, taskSubject: 'Translate the brief into uxa-spec.md.' }),
173
+ { label: `uxa:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
174
+ )
175
+ }
176
+ return g
177
+ },
178
+ // Stage 3: QA-A writes the test spec before any code.
179
+ async (g) => {
180
+ await agent(
181
+ buildPrompt({ role: 'QA-A', promptFile: 'qa-a.md', storyId: g.storyId, taskSubject: 'Produce qa-test-spec.md before any code is written.' }),
182
+ { label: `qa-a:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
183
+ )
184
+ return g
185
+ },
186
+ // Stage 4: READINESS gate with a code-owned rejection loop. Rework routes upstream.
187
+ async (g) => {
188
+ let rejections = 0
189
+ while (true) {
190
+ const v = await agent(
191
+ buildPrompt({
192
+ role: 'READINESS', promptFile: 'readiness.md', storyId: g.storyId,
193
+ taskSubject: 'Validate the spec chain (reqs/uxa/qa) is implementation-ready; run cross-story checks (sprint mode).',
194
+ }),
195
+ { label: `gate:readiness:${g.storyId}`, phase: 'Groom', schema: VERDICT_SCHEMA },
196
+ )
197
+ if (v.verdict === 'pass') return { ...g, groomedStatus: 'groomed' }
198
+ rejections += 1
199
+ if (rejections >= maxRejectionCycles) {
200
+ log(`${g.storyId}: readiness cap tripped after ${rejections} — blocked-on-user, removed from pipeline`)
201
+ return { ...g, groomedStatus: 'blocked-on-user' }
202
+ }
203
+ const target = v.rejectionTarget || 'REQS'
204
+ log(`${g.storyId}: readiness rejection ${rejections}/${maxRejectionCycles} -> ${target}`)
205
+ await agent(
206
+ buildPrompt({ role: target, promptFile: `${target.toLowerCase()}.md`, storyId: g.storyId, taskSubject: 'Address the READINESS rejection and rewrite the affected spec.' }),
207
+ { label: `rework:${target.toLowerCase()}:${g.storyId}`, phase: 'Groom', schema: HANDOFF_SCHEMA },
208
+ )
209
+ }
210
+ },
211
+ )
212
+
213
+ const readyStories = groomed.filter(Boolean).filter((g) => g.groomedStatus === 'groomed')
214
+ log(`groomed ${readyStories.length}/${stories.length} stories`)
215
+
216
+ phase('Size')
217
+ // Each story is sized by every estimator whose profile is present; story_points = the sum.
218
+ // Sizing is estimation only (no code), so stories size in parallel.
219
+ const sized = await parallel(
220
+ readyStories.map((g) => () => {
221
+ const estimators = [...new Set(g.profiles.map((p) => PROFILE_ESTIMATORS[p]).filter(Boolean))]
222
+ return parallel(
223
+ estimators.map((est) => () =>
224
+ agent(
225
+ buildPrompt({
226
+ role: est, promptFile: `${est.toLowerCase()}.md`, storyId: g.storyId,
227
+ taskSubject: 'Estimate this story (read your estimate.md step; apply calibration directives if present).',
228
+ returnContract: 'Return ONLY { schema:1, agent, story, points:<int> } as JSON.',
229
+ }),
230
+ { label: `estimate:${est.toLowerCase()}:${g.storyId}`, phase: 'Size', schema: ESTIMATE_SCHEMA },
231
+ )),
232
+ ).then((ests) => ({
233
+ ...g,
234
+ points: ests.filter(Boolean).reduce((sum, e) => sum + (e.points || 0), 0),
235
+ }))
236
+ }),
237
+ )
238
+ const sizedStories = sized.filter(Boolean)
239
+
240
+ phase('Persist')
241
+ // One agent persists the summed points + groomed status to the backlog so sprint-pack can
242
+ // read them. (The script can't write files; the agent does the IO.)
243
+ await agent(
244
+ `Update \`${backlogPath}\`: for each of these stories set \`story_points\` and \`status: groomed\`, ` +
245
+ `and write \`testing_profiles\`. Stories (JSON): ${JSON.stringify(sizedStories.map((s) => ({ id: s.storyId, story_points: s.points, testing_profiles: s.profiles })))}. ` +
246
+ `Return your \`valent:handoff\` machine block fields as JSON.`,
247
+ { label: 'persist-sizing', phase: 'Persist', schema: HANDOFF_SCHEMA },
248
+ )
249
+
250
+ phase('Pack')
251
+ // Deterministic greedy packing happens in code (src/lib/sprint.js), invoked via the CLI.
252
+ const pack = await agent(
253
+ `Run exactly: \`valent-pipeline sprint-pack --velocity ${velocity} --backlog ${backlogPath}\` ` +
254
+ `in the project root and return its stdout JSON verbatim (fields: sprint_stories, buffer_story_ids, points_planned, remaining_capacity).`,
255
+ { label: 'sprint-pack', phase: 'Pack', schema: PACK_SCHEMA },
256
+ )
257
+ log(`packed ${pack.sprint_stories.length} stories (${pack.points_planned} pts); buffer: ${pack.buffer_story_ids.length}`)
258
+
259
+ phase('Validate')
260
+ // Write the human plan + machine status artifacts, tag the backlog, then cross-check in code.
261
+ const validation = await agent(
262
+ `For sprint ${sprintId}: (1) write \`sprint-${sprintId}-plan.md\` from \`.valent-pipeline/templates/sprint-plan.template.md\` ` +
263
+ `and \`sprint-${sprintId}-status.yaml\` from the status template for the packed stories ${JSON.stringify(pack.sprint_stories)}; ` +
264
+ `(2) tag those stories \`sprint: ${sprintId}\` + \`status: sprint-planned\` in \`${backlogPath}\`; ` +
265
+ `(3) run \`valent-pipeline validate-sprint --status sprint-${sprintId}-status.yaml --backlog ${backlogPath}\` and ` +
266
+ `return its result as JSON { valid:boolean, errors:[...] } (errors = the lines it printed on failure, else []).`,
267
+ { label: 'validate-sprint', phase: 'Validate', schema: VALIDATE_SCHEMA },
268
+ )
269
+ if (!validation.valid) {
270
+ throw new Error(`sprint ${sprintId} plan failed validation: ${(validation.errors || []).join('; ')}`)
271
+ }
272
+
273
+ // Shaped to feed straight into sprint.workflow.js.
274
+ const packedSet = new Set(pack.sprint_stories)
275
+ const plannedStories = sizedStories
276
+ .filter((s) => packedSet.has(s.storyId))
277
+ .map((s) => ({ storyId: s.storyId, projectType: s.projectType, profiles: s.profiles }))
278
+
279
+ return {
280
+ sprintId,
281
+ points_planned: pack.points_planned,
282
+ stories: plannedStories,
283
+ buffer_story_ids: pack.buffer_story_ids,
284
+ }