valent-pipeline 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.js CHANGED
@@ -133,6 +133,17 @@ program
133
133
  await sprintPackCmd(options);
134
134
  });
135
135
 
136
+ // resolve-eligible command (meta-loop: cross-epic candidate eligibility)
137
+ program
138
+ .command('resolve-eligible')
139
+ .description('Deterministically resolve which pending backlog items are eligible for the next sprint (dependency chains stay together)')
140
+ .option('--backlog <path>', 'Backlog file (YAML/JSON); resolves its `items`')
141
+ .option('--stories <path>', 'Explicit item array (YAML/JSON); overrides --backlog')
142
+ .action(async (options) => {
143
+ const { resolveEligibleCmd } = await import('../src/commands/resolve-eligible.js');
144
+ await resolveEligibleCmd(options);
145
+ });
146
+
136
147
  // calibrate command (meta-loop: estimation-accuracy arithmetic)
137
148
  program
138
149
  .command('calibrate')
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "valent-pipeline",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "v3 multi-agent AI pipeline for software development lifecycle",
5
5
  "type": "module",
6
6
  "bin": {
@@ -332,10 +332,15 @@ if (!validation.valid) {
332
332
  throw new Error(`sprint ${sprintId} plan failed validation: ${(validation.errors || []).join('; ')}`)
333
333
  }
334
334
 
335
- // Shaped to feed straight into sprint.workflow.js.
336
- const packedSet = new Set(pack.sprint_stories)
337
- const plannedStories = sizedStories
338
- .filter((s) => packedSet.has(s.storyId))
335
+ // Shaped to feed straight into sprint.workflow.js. Order by `pack.sprint_stories`, NOT by
336
+ // grooming/candidate order: sprint-pack emits stories in dependency-safe order (every prerequisite
337
+ // before its dependent) and sprint.workflow.js runs the batch SEQUENTIALLY in array order on a
338
+ // shared branch — so a dependent must not precede its prerequisite. This is what lets a dependency
339
+ // chain ship together in one sprint.
340
+ const sizedById = new Map(sizedStories.map((s) => [s.storyId, s]))
341
+ const plannedStories = pack.sprint_stories
342
+ .map((id) => sizedById.get(id))
343
+ .filter(Boolean)
339
344
  .map((s) => ({ storyId: s.storyId, projectType: s.projectType, profiles: s.profiles }))
340
345
 
341
346
  return {
@@ -5,15 +5,19 @@
5
5
  * by scripts/test-workflow.js. Opt-in, not the default. The Codex provider keeps the
6
6
  * markdown-skill Lead (hybrid, R3).
7
7
  *
8
- * The retrospective is the ONE place the meta-loop adds genuine *quality*, not just reliable
9
- * structure (R5): the prose retro is a fixed single pass; here the aggregate review runs
10
- * LOOP-UNTIL-DRY (keep reviewing until K consecutive rounds surface nothing new) followed by a
11
- * COMPLETENESS-CRITIC ("which pattern did we not check?"). That is the same rigor that makes
12
- * CRITIC's 3-pass and JUDGE the strongest existing features, applied to the learning loop.
8
+ * A retrospective LEARNS from what the sprint already produced it does NOT re-review the code.
9
+ * Finding bugs is CRITIC/JUDGE's job, and they already did it as a gate DURING the sprint; by the
10
+ * time the retro runs the code has shipped, so a fresh review here is both too late to gate and a
11
+ * duplication of work. (An earlier design ran a loop-until-dry aggregate review + completeness-
12
+ * critic on opus that bolted the pipeline's most expensive pattern onto the stage that should be
13
+ * the cheapest, for little value. The one genuine cross-story blind spot — seams between stories
14
+ * that per-story CRITIC can't see — now lives where it can still gate: the sprint-end integration
15
+ * gate in sprint.workflow.js.)
13
16
  *
14
- * Flow: calibrate (CLI) -> analyze -> aggregate-review (loop-until-dry) -> completeness-critic
15
- * -> directives (agent proposes; CODE enforces impact gating + the architectural-invariant
16
- * guard) -> embed (CLI).
17
+ * Flow: calibrate (CLI) -> synthesize (mine the sprint's OWN artifacts — CRITIC reviews, JUDGE
18
+ * rejections, QA bugs, rejection-cycle/cost data into correction directives) -> directives
19
+ * gating (agent proposes; CODE enforces impact gating + the architectural-invariant guard)
20
+ * -> embed (CLI). Bounded and cheap: no opus review loop.
17
21
  *
18
22
  * The deterministic pieces are NOT in this script: calibration arithmetic is
19
23
  * `node .valent-pipeline/bin/cli.js calibrate` (src/lib/sprint.js); embedding is `node .valent-pipeline/bin/cli.js db embed`.
@@ -21,21 +25,19 @@
21
25
  * GATING and INVARIANT GUARD are deterministic policy, so they are enforced HERE in code —
22
26
  * the agent only proposes; the script decides what gets applied vs. surfaced for approval.
23
27
  *
24
- * args: { batchNumber, sprintId?, storyOutputDirs?: string[], dryRounds?: number, maxRounds?: number, models? }
25
- * sprintId present => sprint-mode (calibration runs). dryRounds = consecutive empty rounds
26
- * that end the loop-until-dry (default 2). maxRounds caps it (default 5). `models` is the
27
- * pipeline-config.yaml `models` tier->roles map, passed through by the invoking skill so
28
- * per-agent model tiers stay config-driven (editable via `valent configure`). Omit it to use
29
- * the baked-in default. See sprint.workflow.js for the full rationale.
28
+ * args: { batchNumber, sprintId?, storyOutputDirs?: string[], models?, reasoning? }
29
+ * sprintId present => sprint-mode (calibration runs). `models` is the pipeline-config.yaml
30
+ * `models` tier->roles map, passed through by the invoking skill so per-agent model tiers stay
31
+ * config-driven (editable via `valent configure`). Omit it to use the baked-in default. See
32
+ * sprint.workflow.js for the full rationale.
30
33
  */
31
34
 
32
35
  export const meta = {
33
36
  name: 'valent-retro',
34
- description: 'Retrospective: calibrate, loop-until-dry aggregate review, gated directives, embed (Workflow)',
37
+ description: 'Retrospective: calibrate, synthesize directives from sprint artifacts, gate, embed (Workflow)',
35
38
  phases: [
36
39
  { title: 'Calibrate', detail: 'node .valent-pipeline/bin/cli.js calibrate (estimation accuracy, in code) — sprint mode' },
37
- { title: 'Analyze', detail: 'CRITIC/QA/JUDGE batch outputs + cost' },
38
- { title: 'Aggregate', detail: 'loop-until-dry 3-pass aggregate review + completeness critic (R5)' },
40
+ { title: 'Synthesize', detail: 'mine CRITIC/QA/JUDGE/cost artifacts -> propose correction directives' },
39
41
  { title: 'Directives', detail: 'agent proposes; code enforces impact gating + invariant guard' },
40
42
  { title: 'Embed', detail: 'node .valent-pipeline/bin/cli.js db embed (persist curated patterns)' },
41
43
  ],
@@ -43,39 +45,6 @@ export const meta = {
43
45
 
44
46
  // --- schemas (inlined) ---
45
47
 
46
- const FINDINGS_SCHEMA = {
47
- type: 'object',
48
- required: ['schema', 'findings'],
49
- additionalProperties: true,
50
- properties: {
51
- schema: { const: 1 },
52
- findings: {
53
- type: 'array',
54
- items: {
55
- type: 'object',
56
- required: ['id', 'summary'],
57
- properties: {
58
- id: { type: 'string' },
59
- summary: { type: 'string' },
60
- severity: { type: 'string' },
61
- stories: { type: 'array', items: { type: 'string' } },
62
- },
63
- },
64
- },
65
- },
66
- }
67
-
68
- const COMPLETENESS_SCHEMA = {
69
- type: 'object',
70
- required: ['schema', 'gaps'],
71
- additionalProperties: true,
72
- // gaps = review angles NOT yet covered (e.g. "no security-boundary scan run"). Empty => complete.
73
- properties: {
74
- schema: { const: 1 },
75
- gaps: { type: 'array', items: { type: 'string' } },
76
- },
77
- }
78
-
79
48
  const DIRECTIVES_SCHEMA = {
80
49
  type: 'object',
81
50
  required: ['schema', 'directives'],
@@ -124,8 +93,6 @@ function parseArgs(x) {
124
93
  const a = parseArgs(args)
125
94
  const batchNumber = a.batchNumber
126
95
  const sprintId = a.sprintId || null
127
- const dryRounds = a.dryRounds ?? 2
128
- const maxRounds = a.maxRounds ?? 5
129
96
  if (batchNumber == null) throw new Error('args must include { batchNumber }')
130
97
 
131
98
  // --- per-agent model tiers ----------------------------------------------------
@@ -133,12 +100,11 @@ if (batchNumber == null) throw new Error('args must include { batchNumber }')
133
100
  // args.models by the invoking skill — a Workflow script can't read files. We invert it
134
101
  // to role->tier and overlay it on a baked-in default so the workflow self-hosts a sane
135
102
  // assignment even when args.models is absent. Static + args only => journal-replay safe.
136
- // Retro stages map to synthetic role keys (not the single RETROSPECTIVE persona) so each
137
- // stage can be tuned independently: the loop-until-dry aggregate review + completeness
138
- // critic are the genuine quality work (RETRO-REVIEW -> opus); analyze/directives are
139
- // lighter (RETRO -> sonnet); calibrate/embed/IO are mechanical (haiku).
103
+ // Retro stages map to synthetic role keys (not the single RETROSPECTIVE persona) so each stage can
104
+ // be tuned independently. The retro is learning, not review, so there is no opus tier here:
105
+ // synthesis/judgment over existing artifacts is RETRO -> sonnet; calibrate/embed/IO are mechanical
106
+ // (haiku). The cross-story review that used to justify opus now lives in the sprint-end gate.
140
107
  const DEFAULT_MODELS = {
141
- 'RETRO-REVIEW': 'opus',
142
108
  RETRO: 'sonnet',
143
109
  CALIBRATE: 'haiku', EMBED: 'haiku', PERSIST: 'haiku',
144
110
  }
@@ -185,9 +151,6 @@ const retroPrompt = (instruction, returnContract) => {
185
151
  (returnContract || 'Return your findings as the JSON object specified.')
186
152
  }
187
153
 
188
- // A stable de-dup key so loop-until-dry converges (don't re-count the same finding).
189
- const findingKey = (f) => `${(f.summary || '').toLowerCase().trim().slice(0, 80)}`
190
-
191
154
  // ---------------------------------------------------------------------------
192
155
 
193
156
  let calibration = null
@@ -202,93 +165,33 @@ if (sprintId) {
202
165
  log(`calibration: ${(calibration.flagged_pairs || []).length} flagged pair(s); velocity unstable=${calibration.velocity?.unstable}`)
203
166
  }
204
167
 
205
- phase('Analyze')
206
- await agent(
207
- retroPrompt(
208
- 'Run analyze.md: read all CRITIC reviews, QA-B bug reports, JUDGE rejections, and cost data; categorize rejection/bug patterns.',
209
- 'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.',
210
- ),
211
- { label: 'analyze', phase: 'Analyze', schema: FINDINGS_SCHEMA, model: modelFor('RETRO') },
212
- )
213
-
214
- phase('Aggregate')
215
- // LOOP-UNTIL-DRY (R5): re-run the 3-pass aggregate review until `dryRounds` consecutive
216
- // rounds surface nothing new, deduping against everything already seen. A simple
217
- // fixed-pass review (the prose behavior) misses the tail; this does not.
218
- const seen = new Set()
219
- const confirmed = []
220
- let dry = 0
221
- let round = 0
222
- while (dry < dryRounds && round < maxRounds) {
223
- round += 1
224
- const r = await agent(
225
- retroPrompt(
226
- `Run aggregate-review.md (round ${round}): 3-pass CRITIC-style review of the aggregate diff (last retro tag to HEAD) — ` +
227
- `correctness across story boundaries, convention/pattern drift, architecture/integration. ` +
228
- `Report ONLY findings not already reported in earlier rounds.`,
229
- 'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.',
230
- ),
231
- { label: `aggregate:round-${round}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA, model: modelFor('RETRO-REVIEW') },
232
- )
233
- const fresh = (r.findings || []).filter((f) => !seen.has(findingKey(f)))
234
- if (!fresh.length) {
235
- dry += 1
236
- log(`aggregate round ${round}: dry (${dry}/${dryRounds})`)
237
- continue
238
- }
239
- dry = 0
240
- for (const f of fresh) seen.add(findingKey(f))
241
- confirmed.push(...fresh)
242
- log(`aggregate round ${round}: +${fresh.length} new finding(s) (${confirmed.length} total)`)
243
- }
244
-
245
- // COMPLETENESS-CRITIC (R5): ask what review angle we never ran. Each named gap gets one
246
- // targeted review round; anything it surfaces joins the confirmed set.
247
- const critic = await agent(
248
- retroPrompt(
249
- `We ran ${round} aggregate-review round(s) and found ${confirmed.length} finding(s). ` +
250
- `What review angle was NOT covered (e.g. a modality, a security boundary, a contract surface)? ` +
251
- `List only genuine gaps — empty if coverage is complete.`,
252
- 'Return ONLY { schema:1, gaps:["..."] } as JSON.',
253
- ),
254
- { label: 'completeness-critic', phase: 'Aggregate', schema: COMPLETENESS_SCHEMA, model: modelFor('RETRO-REVIEW') },
255
- )
256
- if ((critic.gaps || []).length) {
257
- log(`completeness-critic surfaced ${critic.gaps.length} gap(s) — running targeted reviews`)
258
- const extra = await parallel(
259
- critic.gaps.map((gap, i) => () =>
260
- agent(
261
- retroPrompt(`Targeted aggregate review for the previously-uncovered angle: "${gap}". Report only findings not already reported.`,
262
- 'Return ONLY { schema:1, findings:[{id,summary,severity,stories}] } as JSON.'),
263
- { label: `aggregate:gap-${i + 1}`, phase: 'Aggregate', schema: FINDINGS_SCHEMA, model: modelFor('RETRO-REVIEW') },
264
- )),
265
- )
266
- for (const r of extra.filter(Boolean)) {
267
- for (const f of (r.findings || [])) {
268
- if (!seen.has(findingKey(f))) { seen.add(findingKey(f)); confirmed.push(f) }
269
- }
270
- }
271
- }
272
- log(`aggregate review complete: ${confirmed.length} confirmed finding(s)`)
273
-
274
- phase('Directives')
275
- // The agent PROPOSES directives (with impact_level + a touchesInvariant flag). The CODE
276
- // enforces the policy — deterministic, uncheatable — per the §5b determinism map:
168
+ phase('Synthesize')
169
+ // Learning, not review: one pass mines the artifacts the sprint ALREADY produced (CRITIC reviews,
170
+ // QA-B bug reports, JUDGE rejections, rejection-cycle counts, cost data) per analyze.md, then drafts
171
+ // correction directives per directives.md no fresh code review, no opus loop. The agent proposes
172
+ // directives (with impact_level + a touchesInvariant flag); the CODE below enforces the policy —
173
+ // deterministic, uncheatable — per the §5b determinism map:
277
174
  // - touchesInvariant -> ARCHITECTURE-CONFLICT: never auto-applied, surfaced to the user
278
175
  // - impact_level 'high' -> proposal only, requires user approval
279
176
  // - 'low' / 'medium' -> auto-applied (medium also notifies the Lead)
280
177
  const drafted = await agent(
281
178
  retroPrompt(
282
- `Run directives.md against the ${confirmed.length} confirmed finding(s)` +
283
- (calibration ? ' and the calibration metrics' : '') +
284
- `. For EACH proposed directive set impact_level (low|medium|high) and touchesInvariant=true if it would skip test ` +
285
- `execution, allow shipping without evidence, weaken a quality gate, or exempt mandatory tests. Do NOT self-censor — ` +
286
- `propose it and flag it; the orchestrator decides what gets applied.`,
179
+ `Run analyze.md then directives.md: read all CRITIC reviews, QA-B bug reports, JUDGE rejections, ` +
180
+ `rejection-cycle counts, and cost data from this sprint's story outputs; categorize the recurring ` +
181
+ `rejection/bug patterns; then propose correction directives for those patterns` +
182
+ (calibration ? ' and for the calibration metrics' : '') +
183
+ `. Do NOT re-review the shipped code for new bugs — that was CRITIC/JUDGE's job during the sprint; ` +
184
+ `your job is to learn from what they already found. For EACH proposed directive set impact_level ` +
185
+ `(low|medium|high) and touchesInvariant=true if it would skip test execution, allow shipping without ` +
186
+ `evidence, weaken a quality gate, or exempt mandatory tests. Do NOT self-censor — propose it and flag ` +
187
+ `it; the orchestrator decides what gets applied.`,
287
188
  'Return ONLY { schema:1, directives:[{target_agent,directive,reason,impact_level,touchesInvariant,category}] } as JSON.',
288
189
  ),
289
- { label: 'draft-directives', phase: 'Directives', schema: DIRECTIVES_SCHEMA, model: modelFor('RETRO') },
190
+ { label: 'synthesize', phase: 'Synthesize', schema: DIRECTIVES_SCHEMA, model: modelFor('RETRO') },
290
191
  )
291
192
 
193
+ phase('Directives')
194
+
292
195
  const all = drafted.directives || []
293
196
  const conflicts = all.filter((d) => d.touchesInvariant)
294
197
  const highImpact = all.filter((d) => !d.touchesInvariant && d.impact_level === 'high')
@@ -328,9 +231,7 @@ const embed = await agent(
328
231
  return {
329
232
  batchNumber,
330
233
  sprintId,
331
- aggregate_findings: confirmed.length,
332
- aggregate_rounds: round,
333
- completeness_gaps: (critic.gaps || []).length,
234
+ directives_proposed: all.length,
334
235
  directives_applied: applied.length,
335
236
  directives_pending_approval: proposals.length,
336
237
  architecture_conflicts: conflicts.length,
@@ -59,6 +59,7 @@ export const meta = {
59
59
  { title: 'Critic', detail: 'three independent passes in parallel -> triage -> rejection loop (code-owned cap)' },
60
60
  { title: 'QA', detail: 'execute tests against real infra' },
61
61
  { title: 'Judge', detail: 'evidence-based ship decision' },
62
+ { title: 'Integration', detail: 'single cross-story seam review — only when >1 story touched overlapping files' },
62
63
  ],
63
64
  }
64
65
 
@@ -123,6 +124,31 @@ const FINDINGS_SCHEMA = {
123
124
  },
124
125
  }
125
126
 
127
+ // Sprint-end cross-story seam review. Advisory: stories already passed JUDGE, so this does not
128
+ // re-gate them — it surfaces integration findings to be filed as bugs against the affected stories.
129
+ const INTEGRATION_SCHEMA = {
130
+ type: 'object',
131
+ required: ['schema', 'verdict', 'findings'],
132
+ additionalProperties: true,
133
+ properties: {
134
+ schema: { const: 1 },
135
+ verdict: { enum: ['clean', 'findings'] },
136
+ findings: {
137
+ type: 'array',
138
+ items: {
139
+ type: 'object',
140
+ required: ['summary'],
141
+ properties: {
142
+ summary: { type: 'string' },
143
+ severity: { enum: ['High', 'Med', 'Low'] },
144
+ files: { type: 'array', items: { type: 'string' } },
145
+ stories: { type: 'array', items: { type: 'string' } },
146
+ },
147
+ },
148
+ },
149
+ },
150
+ }
151
+
126
152
  const RESOLVED_GRAPH_SCHEMA = {
127
153
  type: 'object',
128
154
  required: ['tasks', 'skipped'],
@@ -190,7 +216,7 @@ for (const s of batch) {
190
216
  // assignment even when args.models is absent. Static + args only => journal-replay safe.
191
217
  // gates -> opus (judgment), spec/build -> sonnet, CLI-runners/IO -> haiku.
192
218
  const DEFAULT_MODELS = {
193
- READINESS: 'opus', CRITIC: 'opus', JUDGE: 'opus',
219
+ READINESS: 'opus', CRITIC: 'opus', JUDGE: 'opus', INTEGRATION: 'opus',
194
220
  REQS: 'sonnet', UXA: 'sonnet', 'QA-A': 'sonnet', 'QA-B': 'sonnet',
195
221
  BEND: 'sonnet', FEND: 'sonnet', DATA: 'sonnet', 'MCP-DEV': 'sonnet',
196
222
  LIBDEV: 'sonnet', DOCGEN: 'sonnet', IAC: 'sonnet', MOBILE: 'sonnet',
@@ -284,11 +310,66 @@ for (let i = 0; i < batch.length; i++) {
284
310
 
285
311
  const shippedCount = results.filter((r) => r.shipped).length
286
312
  log(`sprint complete: ${shippedCount}/${results.length} shipped`)
313
+
314
+ // Sprint-end integration gate: per-story CRITIC reviews each diff in ISOLATION and never sees two
315
+ // stories together, so cross-story SEAMS (a module touched by >1 story, mismatched integration
316
+ // points) are its one structural blind spot. Cover it with a SINGLE bounded review — but only when
317
+ // it could find something: >1 story shipped AND at least two of them touched an overlapping file.
318
+ // No loop, no completeness-critic (that disproportionate apparatus was removed from the retro for
319
+ // the same reason). Advisory only — stories already passed JUDGE, so findings are surfaced as bugs
320
+ // to file, not a re-gate. Overlap is computed from the dev handoff `files`, so a disjoint sprint
321
+ // (every story in its own corner of the tree) skips the gate entirely.
322
+ const integration = await runIntegrationGate(results.filter((r) => r.shipped))
323
+
287
324
  return {
288
325
  shipped: results.every((r) => r.shipped),
289
326
  stories_shipped: shippedCount,
290
327
  stories_rolled_over: results.length - shippedCount,
291
328
  results,
329
+ integration,
330
+ }
331
+
332
+ // runIntegrationGate: returns null when not warranted (≤1 shipped story or no file overlap), else
333
+ // the single review's structured result ({ verdict, findings }) for the orchestrator to file as bugs.
334
+ async function runIntegrationGate(shipped) {
335
+ if (shipped.length < 2) return null
336
+
337
+ // Files touched by more than one shipped story = the seams worth reviewing.
338
+ const owners = new Map() // file -> Set(storyId)
339
+ for (const r of shipped) {
340
+ for (const f of r.files || []) {
341
+ if (!owners.has(f)) owners.set(f, new Set())
342
+ owners.get(f).add(r.storyId)
343
+ }
344
+ }
345
+ const overlapFiles = [...owners.entries()].filter(([, s]) => s.size > 1).map(([f]) => f)
346
+ if (!overlapFiles.length) {
347
+ log(`integration gate: skipped — ${shipped.length} stories shipped but no overlapping files`)
348
+ return null
349
+ }
350
+
351
+ log(`integration gate: ${overlapFiles.length} file(s) touched by >1 story — running single cross-story review`)
352
+ const result = await agent(
353
+ [
354
+ `You are **INTEGRATION**, the sprint-end cross-story seam reviewer for this batch.`,
355
+ ``,
356
+ `Per-story CRITIC reviewed each story's diff in isolation and CANNOT see two stories together.`,
357
+ `Review ONLY the cross-story SEAMS — the files below were each modified by more than one story`,
358
+ `this sprint, so that is where integration mismatches hide (incompatible signatures, ordering`,
359
+ `assumptions, duplicated/diverging logic, broken shared invariants).`,
360
+ ``,
361
+ `Overlapping files: ${JSON.stringify(overlapFiles)}`,
362
+ `Shipped stories: ${JSON.stringify(shipped.map((r) => r.storyId))}`,
363
+ ``,
364
+ `Do a SINGLE focused pass. Do NOT re-review within-story correctness (CRITIC already did) and do`,
365
+ `NOT hunt for unrelated issues. Report only genuine cross-story seam problems; empty if the seams`,
366
+ `are clean. Return ONLY { schema:1, verdict:"clean"|"findings", findings:[{summary,severity,files,stories}] } as JSON.`,
367
+ ].join('\n'),
368
+ { label: 'gate:integration', phase: 'Integration', schema: INTEGRATION_SCHEMA, model: modelFor('INTEGRATION') },
369
+ )
370
+ const findings = result.findings || []
371
+ log(`integration gate: ${findings.length} cross-story finding(s)`)
372
+ return { verdict: result.verdict, findings, overlapFiles }
292
373
  }
293
374
 
294
375
  // ===========================================================================
@@ -373,7 +454,7 @@ async function runStory(story) {
373
454
  const devFiles = builds.filter(Boolean).flatMap((b) => (Array.isArray(b.files) ? b.files : []))
374
455
  if (devFiles.length === 0) {
375
456
  log(`${storyId}: dev phase reported no files — nothing to review; skipping CRITIC/QA/JUDGE, rolling over`)
376
- return { storyId, shipped: false, verdict: 'blocked', reason: 'no-dev-output', skipped: graph.skipped }
457
+ return { storyId, shipped: false, verdict: 'blocked', reason: 'no-dev-output', skipped: graph.skipped, files: [] }
377
458
  }
378
459
 
379
460
  phase('Critic')
@@ -386,7 +467,7 @@ async function runStory(story) {
386
467
  const decision = await runGate(storyId, 'JUDGE', 'judge.md',
387
468
  'Review evidence (tests, traceability, bugs) and make the ship decision.', 'Judge', null)
388
469
 
389
- return { storyId, shipped: decision.verdict === 'pass', verdict: decision.verdict, skipped: graph.skipped }
470
+ return { storyId, shipped: decision.verdict === 'pass', verdict: decision.verdict, skipped: graph.skipped, files: devFiles }
390
471
 
391
472
  // --- per-story closures over storyId/devTasks ----------------------------
392
473
 
@@ -52,12 +52,27 @@ Loop sprints until all stories are shipped, blocked, or cancelled. Each iteratio
52
52
  **ALWAYS** read `{epic_progress_path}` and `{backlog_path}` from disk at the top of each sprint.
53
53
 
54
54
  #### 4b. Check for remaining work (cross-epic)
55
- Read `{backlog_path}` (all epics). If no `pending` stories with met dependencies remain:
56
- - all remaining `shipped`/`cancelled` project complete, go to Step 5;
57
- - remaining `blocked`/`blocked-on-user` → report blockers, go to Step 5;
58
- - remaining `pending` but all with unmet `depends_on` → circular/missing prerequisite, report and stop.
55
+ **Do not hand-walk the dependency graph.** Candidate eligibility is deterministic call the
56
+ resolver instead of deciding by hand which `depends_on` are "met":
59
57
 
60
- Otherwise collect the eligible pending story IDs across ALL epics whose dependencies are met (with `projectType` from config and each story's `testing_profiles`) as this sprint's candidate list.
58
+ ```
59
+ node .valent-pipeline/bin/cli.js resolve-eligible --backlog {backlog_path}
60
+ ```
61
+
62
+ It emits `{ eligible: [ids in priority order], blocked: [{ id, reason }] }`. A pending story is
63
+ **eligible** when its whole dependency chain is live — a prerequisite that is itself a pending,
64
+ eligible story is **INCLUDED in this sprint's candidate list**, not deferred to a later sprint.
65
+ That is the point: `plan.workflow.js` packs the chain together (`sprint-pack` orders prerequisites
66
+ before dependents and buffers anything over velocity), and `sprint.workflow.js` runs the batch
67
+ sequentially on a shared branch — so a dependency chain ships together in one sprint, capacity
68
+ permitting. A story is **blocked** (withheld) only when a prerequisite is `cancelled` /
69
+ `blocked` / `blocked-on-user` / missing, or a blocking bug is unresolved.
70
+
71
+ Interpret the output:
72
+ - `eligible` is non-empty → these IDs are this sprint's candidate list. Pair each with its
73
+ `projectType` (from config) and `testing_profiles` for Step 4c.
74
+ - `eligible` is empty and every remaining item is `shipped`/`cancelled` → project complete, go to Step 5.
75
+ - `eligible` is empty but `blocked` is non-empty → report the blocked stories with their reasons, go to Step 5.
61
76
 
62
77
  #### 4c. Plan the sprint
63
78
  Invoke `plan.workflow.js` via the **Workflow tool**:
@@ -81,11 +96,13 @@ Workflow({
81
96
  })
82
97
  ```
83
98
 
84
- Runs each story sequentially through the per-story pipeline on a shared branch, rolling over any story JUDGE rejects or that trips the cap. Returns `{ shipped, stories_shipped, stories_rolled_over, results: [...] }`. Record its `runId`.
99
+ Runs each story sequentially through the per-story pipeline on a shared branch, rolling over any story JUDGE rejects or that trips the cap. After the batch, a **sprint-end integration gate** runs a single cross-story seam review — but ONLY when >1 story shipped and at least two touched an overlapping file (otherwise it's skipped). Returns `{ shipped, stories_shipped, stories_rolled_over, results: [...], integration }`, where `integration` is `null` (not warranted) or `{ verdict, findings, overlapFiles }`. Record its `runId`.
85
100
 
86
101
  #### 4e. Update progress + re-resolve dependencies
87
102
  For each shipped story: move it to `stories_completed` with a compact one-line outcome tagged with its epic; update `total_completed` / `last_updated`; **FIFO at 80 lines** (evict oldest). Read and follow `.valent-pipeline/steps/orchestration/update-backlog-status.md`. Then **re-check whether any previously blocked stories are now unblocked** (their `depends_on` / `blocked_by_bugs` may have been resolved by what just shipped) — these become eligible for the next sprint's candidate list.
88
103
 
104
+ If the sprint result's `integration.findings` is non-empty, file each as a `bug` backlog item (per `update-backlog-status.md`'s conditional-ship bug format) against the affected stories' epic, so the cross-story seam issue is tracked and prioritized like any other bug.
105
+
89
106
  #### 4f. Retrospective (mandatory, blocking)
90
107
  Invoke `retro.workflow.js`:
91
108
 
@@ -96,7 +113,7 @@ Workflow({
96
113
  })
97
114
  ```
98
115
 
99
- Record its `runId`. Retro runs every sprint and tightens future sizing.
116
+ Record its `runId`. Retro runs every sprint and tightens future sizing. It **learns from the sprint's own artifacts** — calibration (CLI), one synthesis pass that mines CRITIC/JUDGE/QA/cost data into correction directives, then embed — and is bounded and cheap (no fresh code review; that is CRITIC/JUDGE's job during the sprint, and cross-story seams are covered by the sprint-end integration gate in 4d).
100
117
 
101
118
  #### 4g. Continue
102
119
  Increment `{n}` and return to Step 4a. **Do NOT ask permission to continue between sprints** — the project loop is autonomous.
@@ -0,0 +1,49 @@
1
+ import { readFileSync, existsSync } from 'fs';
2
+ import { isAbsolute, join, extname } from 'path';
3
+ import { parse as parseYaml } from 'yaml';
4
+ import { resolveEligibleStories } from '../lib/sprint.js';
5
+
6
+ /** Read a JSON or YAML file (by extension) and return the parsed object. */
7
+ function loadStructured(path) {
8
+ const abs = isAbsolute(path) ? path : join(process.cwd(), path);
9
+ if (!existsSync(abs)) throw new Error(`File not found: ${abs}`);
10
+ const raw = readFileSync(abs, 'utf-8');
11
+ return extname(abs).toLowerCase() === '.json' ? JSON.parse(raw) : parseYaml(raw);
12
+ }
13
+
14
+ /**
15
+ * `valent-pipeline resolve-eligible (--backlog <path> | --stories <path>)`
16
+ *
17
+ * Deterministic cross-epic candidate eligibility for the next sprint (src/lib/sprint.js). Replaces
18
+ * the hand-run "collect pending stories whose dependencies are met" step in the project skills.
19
+ * A pending story is eligible when its whole dependency chain is live (every prerequisite is
20
+ * shipped OR another pending+eligible story), so a dependency chain can be groomed and packed
21
+ * into ONE sprint — packSprint orders the prerequisites first and the sprint executes them
22
+ * sequentially. Stories withheld by a dead/missing prerequisite or unresolved bug are reported.
23
+ *
24
+ * Emits JSON: { eligible: [ids in priority order], blocked: [{ id, reason }] }.
25
+ *
26
+ * --backlog reads a backlog file and resolves its `items`. --stories reads an explicit array of
27
+ * items (JSON or YAML); both forms tolerate either an array or an `items:` list.
28
+ */
29
+ export async function resolveEligibleCmd(options) {
30
+ let items;
31
+ try {
32
+ const src = options.stories || options.backlog;
33
+ if (!src) throw new Error('Either --backlog <path> or --stories <path> is required.');
34
+ const data = loadStructured(src);
35
+ items = Array.isArray(data) ? data : data.items;
36
+ } catch (err) {
37
+ console.error(`Error: ${err.message}`);
38
+ process.exit(1);
39
+ }
40
+
41
+ if (!Array.isArray(items)) {
42
+ console.error('Error: no item list found (expected an array or an `items:` list).');
43
+ process.exit(1);
44
+ }
45
+
46
+ const result = resolveEligibleStories(items);
47
+ console.log(JSON.stringify(result, null, 2));
48
+ process.exit(0);
49
+ }
package/src/lib/sprint.js CHANGED
@@ -97,6 +97,107 @@ export function packSprint(stories, velocity) {
97
97
  };
98
98
  }
99
99
 
100
+ /**
101
+ * Cross-epic candidate eligibility for the next sprint — which pending backlog items can be
102
+ * groomed now. A deterministic replacement for the hand-run "collect pending stories whose
103
+ * dependencies are met" step in the project skills (`valent-run-project*` Step 4b).
104
+ *
105
+ * The OLD rule only surfaced a story whose prerequisites were ALREADY shipped, so a dependency
106
+ * chain trickled in one story per sprint (A ships, then B becomes eligible, then C...). This
107
+ * instead surfaces a pending story whenever its whole dependency chain is LIVE — every
108
+ * prerequisite is either already shipped or another pending story that is itself eligible.
109
+ * `packSprint` then auto-includes those prerequisites in dependency-safe order and the sprint
110
+ * executes them sequentially, so a chain can run together in ONE sprint (up to the velocity
111
+ * budget; overflow falls to the groomed buffer).
112
+ *
113
+ * Only items whose dependency chain hits a DEAD prerequisite (cancelled / blocked /
114
+ * blocked-on-user, or missing from the backlog) or a blocking bug that is not yet resolved are
115
+ * withheld — reported in `blocked` with the offending ref. Removal propagates: a dependent of a
116
+ * withheld story is itself withheld (the fixpoint below), so a dead prerequisite blocks its
117
+ * whole downstream chain.
118
+ *
119
+ * @param {Array} items - backlog items: { id, status, depends_on|dependencies, blocked_by_bugs,
120
+ * priority }. `type` is irrelevant here — any item in `candidateStatus` is a candidate.
121
+ * @param {object} [opts]
122
+ * - satisfiedStatuses: statuses that satisfy a dependency (default ['shipped'])
123
+ * - deadStatuses: statuses that permanently block a dependent (default
124
+ * ['cancelled','blocked','blocked-on-user'])
125
+ * - candidateStatus: status of items considered as candidates (default 'pending')
126
+ * @returns {{ eligible: string[], blocked: Array<{ id: string, reason: string }> }}
127
+ * `eligible` is ascending-priority ordered (lower number first; missing priority last, ties by
128
+ * id) so the downstream groom/size/pack sees the most important work first. A prerequisite in
129
+ * an in-progress status (neither shipped, dead, nor still `pending`) is treated as
130
+ * already-moving and does NOT block — only genuinely dead/missing/pending-but-withheld deps do.
131
+ */
132
+ export function resolveEligibleStories(items, opts = {}) {
133
+ const list = Array.isArray(items) ? items : [];
134
+ const satisfied = new Set(opts.satisfiedStatuses ?? ['shipped']);
135
+ const dead = new Set(opts.deadStatuses ?? ['cancelled', 'blocked', 'blocked-on-user']);
136
+ const candidateStatus = opts.candidateStatus ?? 'pending';
137
+
138
+ const byId = new Map(list.map((it) => [it.id, it]));
139
+ const blocked = []; // { id, reason }
140
+ const blockedIds = new Set();
141
+ const recordBlocked = (id, reason) => {
142
+ if (blockedIds.has(id)) return;
143
+ blockedIds.add(id);
144
+ blocked.push({ id, reason });
145
+ };
146
+
147
+ // A blocking bug is cleared when its referenced bug item is shipped (update-backlog-status.md)
148
+ // or no longer present in the backlog (the entry was removed). Otherwise the dependent waits.
149
+ const bugBlocker = (it) => {
150
+ for (const bugId of it.blocked_by_bugs ?? []) {
151
+ const bug = byId.get(bugId);
152
+ if (bug && !satisfied.has(bug.status)) return bugId;
153
+ }
154
+ return null;
155
+ };
156
+
157
+ // Seed the eligible set with every candidate that isn't bug-blocked, then iterate to a fixpoint:
158
+ // drop any whose dependency chain reaches a dead/missing/withheld prerequisite, propagating the
159
+ // removal downstream until stable.
160
+ const eligible = new Set();
161
+ for (const it of list) {
162
+ if (it.status !== candidateStatus) continue;
163
+ const bug = bugBlocker(it);
164
+ if (bug) recordBlocked(it.id, `blocked by unresolved bug "${bug}"`);
165
+ else eligible.add(it.id);
166
+ }
167
+
168
+ let changed = true;
169
+ while (changed) {
170
+ changed = false;
171
+ for (const id of [...eligible]) {
172
+ const it = byId.get(id);
173
+ for (const depId of depsOf(it)) {
174
+ const dep = byId.get(depId);
175
+ let reason = null;
176
+ if (!dep) reason = `depends on "${depId}" which is missing from the backlog`;
177
+ else if (dead.has(dep.status)) reason = `depends on "${depId}" which is ${dep.status}`;
178
+ else if (dep.status === candidateStatus && !eligible.has(depId)) {
179
+ // Prerequisite is a candidate that has itself been withheld — chain is dead upstream.
180
+ reason = `depends on "${depId}" which is not eligible (its own prerequisites are unmet)`;
181
+ }
182
+ // else: dep is satisfied or in an in-progress status, or a still-eligible candidate -> OK.
183
+ if (reason) {
184
+ eligible.delete(id);
185
+ recordBlocked(id, reason);
186
+ changed = true;
187
+ break;
188
+ }
189
+ }
190
+ }
191
+ }
192
+
193
+ const priorityOf = (id) => byId.get(id)?.priority ?? Infinity;
194
+ const eligibleIds = [...eligible].sort(
195
+ (a, b) => priorityOf(a) - priorityOf(b) || String(a).localeCompare(String(b)),
196
+ );
197
+
198
+ return { eligible: eligibleIds, blocked };
199
+ }
200
+
100
201
  /** Sample standard deviation; 0 when fewer than 2 samples. */
101
202
  function stdev(values) {
102
203
  if (values.length < 2) return 0;