@chllming/wave-orchestration 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/CHANGELOG.md +52 -3
  2. package/README.md +33 -5
  3. package/docs/README.md +18 -4
  4. package/docs/agents/wave-cont-eval-role.md +36 -0
  5. package/docs/agents/{wave-evaluator-role.md → wave-cont-qa-role.md} +14 -11
  6. package/docs/agents/wave-documentation-role.md +1 -1
  7. package/docs/agents/wave-infra-role.md +1 -1
  8. package/docs/agents/wave-integration-role.md +3 -3
  9. package/docs/agents/wave-launcher-role.md +4 -3
  10. package/docs/agents/wave-security-role.md +40 -0
  11. package/docs/concepts/context7-vs-skills.md +1 -1
  12. package/docs/concepts/what-is-a-wave.md +56 -6
  13. package/docs/evals/README.md +166 -0
  14. package/docs/evals/benchmark-catalog.json +663 -0
  15. package/docs/guides/author-and-run-waves.md +135 -0
  16. package/docs/guides/planner.md +5 -0
  17. package/docs/guides/terminal-surfaces.md +2 -0
  18. package/docs/plans/component-cutover-matrix.json +1 -1
  19. package/docs/plans/component-cutover-matrix.md +1 -1
  20. package/docs/plans/current-state.md +19 -1
  21. package/docs/plans/examples/wave-example-live-proof.md +435 -0
  22. package/docs/plans/migration.md +42 -0
  23. package/docs/plans/wave-orchestrator.md +46 -7
  24. package/docs/plans/waves/wave-0.md +4 -4
  25. package/docs/reference/live-proof-waves.md +177 -0
  26. package/docs/reference/migration-0.2-to-0.5.md +26 -19
  27. package/docs/reference/npmjs-trusted-publishing.md +6 -5
  28. package/docs/reference/runtime-config/README.md +14 -4
  29. package/docs/reference/sample-waves.md +87 -0
  30. package/docs/reference/skills.md +110 -42
  31. package/docs/research/agent-context-sources.md +130 -11
  32. package/docs/research/coordination-failure-review.md +266 -0
  33. package/docs/roadmap.md +6 -2
  34. package/package.json +2 -2
  35. package/releases/manifest.json +35 -2
  36. package/scripts/research/agent-context-archive.mjs +83 -1
  37. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +811 -0
  38. package/scripts/wave-orchestrator/adhoc.mjs +1331 -0
  39. package/scripts/wave-orchestrator/agent-state.mjs +358 -6
  40. package/scripts/wave-orchestrator/artifact-schemas.mjs +173 -0
  41. package/scripts/wave-orchestrator/clarification-triage.mjs +10 -3
  42. package/scripts/wave-orchestrator/config.mjs +48 -12
  43. package/scripts/wave-orchestrator/context7.mjs +2 -0
  44. package/scripts/wave-orchestrator/coord-cli.mjs +51 -19
  45. package/scripts/wave-orchestrator/coordination-store.mjs +26 -4
  46. package/scripts/wave-orchestrator/coordination.mjs +83 -9
  47. package/scripts/wave-orchestrator/dashboard-state.mjs +20 -8
  48. package/scripts/wave-orchestrator/dep-cli.mjs +5 -2
  49. package/scripts/wave-orchestrator/docs-queue.mjs +8 -2
  50. package/scripts/wave-orchestrator/evals.mjs +451 -0
  51. package/scripts/wave-orchestrator/feedback.mjs +15 -1
  52. package/scripts/wave-orchestrator/install.mjs +32 -9
  53. package/scripts/wave-orchestrator/launcher-closure.mjs +281 -0
  54. package/scripts/wave-orchestrator/launcher-runtime.mjs +334 -0
  55. package/scripts/wave-orchestrator/launcher.mjs +709 -601
  56. package/scripts/wave-orchestrator/ledger.mjs +123 -20
  57. package/scripts/wave-orchestrator/local-executor.mjs +99 -12
  58. package/scripts/wave-orchestrator/planner.mjs +177 -42
  59. package/scripts/wave-orchestrator/replay.mjs +6 -3
  60. package/scripts/wave-orchestrator/role-helpers.mjs +84 -0
  61. package/scripts/wave-orchestrator/shared.mjs +75 -11
  62. package/scripts/wave-orchestrator/skills.mjs +637 -106
  63. package/scripts/wave-orchestrator/traces.mjs +71 -48
  64. package/scripts/wave-orchestrator/wave-files.mjs +947 -101
  65. package/scripts/wave.mjs +9 -0
  66. package/skills/README.md +202 -0
  67. package/skills/provider-aws/SKILL.md +111 -0
  68. package/skills/provider-aws/adapters/claude.md +1 -0
  69. package/skills/provider-aws/adapters/codex.md +1 -0
  70. package/skills/provider-aws/references/service-verification.md +39 -0
  71. package/skills/provider-aws/skill.json +50 -1
  72. package/skills/provider-custom-deploy/SKILL.md +59 -0
  73. package/skills/provider-custom-deploy/skill.json +46 -1
  74. package/skills/provider-docker-compose/SKILL.md +90 -0
  75. package/skills/provider-docker-compose/adapters/local.md +1 -0
  76. package/skills/provider-docker-compose/skill.json +49 -1
  77. package/skills/provider-github-release/SKILL.md +116 -1
  78. package/skills/provider-github-release/adapters/claude.md +1 -0
  79. package/skills/provider-github-release/adapters/codex.md +1 -0
  80. package/skills/provider-github-release/skill.json +51 -1
  81. package/skills/provider-kubernetes/SKILL.md +137 -0
  82. package/skills/provider-kubernetes/adapters/claude.md +1 -0
  83. package/skills/provider-kubernetes/adapters/codex.md +1 -0
  84. package/skills/provider-kubernetes/references/kubectl-patterns.md +58 -0
  85. package/skills/provider-kubernetes/skill.json +48 -1
  86. package/skills/provider-railway/SKILL.md +118 -1
  87. package/skills/provider-railway/references/verification-commands.md +39 -0
  88. package/skills/provider-railway/skill.json +67 -1
  89. package/skills/provider-ssh-manual/SKILL.md +91 -0
  90. package/skills/provider-ssh-manual/skill.json +50 -1
  91. package/skills/repo-coding-rules/SKILL.md +84 -0
  92. package/skills/repo-coding-rules/skill.json +30 -1
  93. package/skills/role-cont-eval/SKILL.md +90 -0
  94. package/skills/role-cont-eval/adapters/codex.md +1 -0
  95. package/skills/role-cont-eval/skill.json +36 -0
  96. package/skills/role-cont-qa/SKILL.md +93 -0
  97. package/skills/role-cont-qa/adapters/claude.md +1 -0
  98. package/skills/role-cont-qa/skill.json +36 -0
  99. package/skills/role-deploy/SKILL.md +90 -0
  100. package/skills/role-deploy/skill.json +32 -1
  101. package/skills/role-documentation/SKILL.md +66 -0
  102. package/skills/role-documentation/skill.json +32 -1
  103. package/skills/role-implementation/SKILL.md +62 -0
  104. package/skills/role-implementation/skill.json +32 -1
  105. package/skills/role-infra/SKILL.md +74 -0
  106. package/skills/role-infra/skill.json +32 -1
  107. package/skills/role-integration/SKILL.md +79 -1
  108. package/skills/role-integration/skill.json +32 -1
  109. package/skills/role-research/SKILL.md +58 -0
  110. package/skills/role-research/skill.json +32 -1
  111. package/skills/role-security/SKILL.md +60 -0
  112. package/skills/role-security/skill.json +36 -0
  113. package/skills/runtime-claude/SKILL.md +60 -1
  114. package/skills/runtime-claude/skill.json +32 -1
  115. package/skills/runtime-codex/SKILL.md +52 -1
  116. package/skills/runtime-codex/skill.json +32 -1
  117. package/skills/runtime-local/SKILL.md +39 -0
  118. package/skills/runtime-local/skill.json +32 -1
  119. package/skills/runtime-opencode/SKILL.md +51 -0
  120. package/skills/runtime-opencode/skill.json +32 -1
  121. package/skills/wave-core/SKILL.md +107 -0
  122. package/skills/wave-core/references/marker-syntax.md +62 -0
  123. package/skills/wave-core/skill.json +31 -1
  124. package/wave.config.json +35 -6
  125. package/skills/role-evaluator/SKILL.md +0 -6
  126. package/skills/role-evaluator/skill.json +0 -5
@@ -9,6 +9,7 @@ import {
9
9
  readJsonOrNull,
10
10
  writeJsonAtomic,
11
11
  } from "./shared.mjs";
12
+ import { resolveEvalTargetsAgainstCatalog } from "./evals.mjs";
12
13
 
13
14
  export const EXIT_CONTRACT_COMPLETION_VALUES = ["contract", "integrated", "authoritative", "live"];
14
15
  export const EXIT_CONTRACT_DURABILITY_VALUES = ["none", "ephemeral", "durable"];
@@ -20,6 +21,19 @@ const COMPLETION_ORDER = ORDER(EXIT_CONTRACT_COMPLETION_VALUES);
20
21
  const DURABILITY_ORDER = ORDER(EXIT_CONTRACT_DURABILITY_VALUES);
21
22
  const PROOF_ORDER = ORDER(EXIT_CONTRACT_PROOF_VALUES);
22
23
  const DOC_IMPACT_ORDER = ORDER(EXIT_CONTRACT_DOC_IMPACT_VALUES);
24
+ const COMPONENT_MATURITY_LEVELS = [
25
+ "inventoried",
26
+ "contract-frozen",
27
+ "repo-landed",
28
+ "baseline-proved",
29
+ "pilot-live",
30
+ "qa-proved",
31
+ "fleet-ready",
32
+ "cutover-ready",
33
+ "deprecation-ready",
34
+ ];
35
+ const COMPONENT_MATURITY_ORDER = ORDER(COMPONENT_MATURITY_LEVELS);
36
+ const PROOF_CENTRIC_COMPONENT_LEVEL = "pilot-live";
23
37
 
24
38
  const WAVE_PROOF_REGEX =
25
39
  /^\[wave-proof\]\s*completion=(contract|integrated|authoritative|live)\s+durability=(none|ephemeral|durable)\s+proof=(unit|integration|live)\s+state=(met|gap)\s*(?:detail=(.*))?$/gim;
@@ -29,6 +43,10 @@ const WAVE_DOC_CLOSURE_REGEX =
29
43
  /^\[wave-doc-closure\]\s*state=(closed|no-change|delta)(?:\s+paths=([^\n]*?))?(?:\s+detail=(.*))?$/gim;
30
44
  const WAVE_INTEGRATION_REGEX =
31
45
  /^\[wave-integration\]\s*state=(ready-for-doc-closure|needs-more-work)\s+claims=(\d+)\s+conflicts=(\d+)\s+blockers=(\d+)\s*(?:detail=(.*))?$/gim;
46
+ const WAVE_EVAL_REGEX =
47
+ /^\[wave-eval\]\s*state=(satisfied|needs-more-work|blocked)\s+targets=(\d+)\s+benchmarks=(\d+)\s+regressions=(\d+)(?:\s+target_ids=([^\s]+))?(?:\s+benchmark_ids=([^\s]+))?\s*(?:detail=(.*))?$/gim;
48
+ const WAVE_SECURITY_REGEX =
49
+ /^\[wave-security\]\s*state=(clear|concerns|blocked)\s+findings=(\d+)\s+approvals=(\d+)\s*(?:detail=(.*))?$/gim;
32
50
  const WAVE_GATE_REGEX =
33
51
  /^\[wave-gate\]\s*architecture=(pass|concerns|blocked)\s+integration=(pass|concerns|blocked)\s+durability=(pass|concerns|blocked)\s+live=(pass|concerns|blocked)\s+docs=(pass|concerns|blocked)\s*(?:detail=(.*))?$/gim;
34
52
  const WAVE_GAP_REGEX =
@@ -100,6 +118,29 @@ function parsePaths(value) {
100
118
  .filter(Boolean);
101
119
  }
102
120
 
121
+ function parseIdList(value) {
122
+ return cleanText(value)
123
+ .split(",")
124
+ .map((item) => item.trim().toLowerCase())
125
+ .filter(Boolean);
126
+ }
127
+
128
+ function uniqueSorted(values) {
129
+ return Array.from(new Set((values || []).map((value) => cleanText(value)).filter(Boolean))).sort();
130
+ }
131
+
132
+ function sameStringLists(a, b) {
133
+ if (a.length !== b.length) {
134
+ return false;
135
+ }
136
+ for (let i = 0; i < a.length; i += 1) {
137
+ if (a[i] !== b[i]) {
138
+ return false;
139
+ }
140
+ }
141
+ return true;
142
+ }
143
+
103
144
  function findLastMatch(text, regex, mapper) {
104
145
  if (!text) {
105
146
  return null;
@@ -197,6 +238,44 @@ function meetsOrExceeds(actual, required, orderMap) {
197
238
  return orderMap[actual] >= orderMap[required];
198
239
  }
199
240
 
241
+ function proofCentricLevelReached(level) {
242
+ return (
243
+ COMPONENT_MATURITY_ORDER[String(level || "").trim()] >=
244
+ COMPONENT_MATURITY_ORDER[PROOF_CENTRIC_COMPONENT_LEVEL]
245
+ );
246
+ }
247
+
248
+ function highestAgentComponentTargetLevel(agent) {
249
+ const levels = Array.isArray(agent?.components)
250
+ ? agent.components
251
+ .map((componentId) => agent?.componentTargets?.[componentId] || null)
252
+ .filter(Boolean)
253
+ : [];
254
+ if (levels.length === 0) {
255
+ return null;
256
+ }
257
+ return levels.sort(
258
+ (left, right) => COMPONENT_MATURITY_ORDER[right] - COMPONENT_MATURITY_ORDER[left],
259
+ )[0];
260
+ }
261
+
262
+ function proofArtifactRequiredForAgent(agent, artifact) {
263
+ if (!artifact) {
264
+ return false;
265
+ }
266
+ const requiredFor = Array.isArray(artifact.requiredFor) ? artifact.requiredFor : [];
267
+ if (requiredFor.length === 0) {
268
+ return true;
269
+ }
270
+ const highestTarget = highestAgentComponentTargetLevel(agent);
271
+ if (!highestTarget) {
272
+ return true;
273
+ }
274
+ return requiredFor.some(
275
+ (level) => COMPONENT_MATURITY_ORDER[highestTarget] >= COMPONENT_MATURITY_ORDER[level],
276
+ );
277
+ }
278
+
200
279
  export function normalizeExitContract(raw) {
201
280
  if (!raw || typeof raw !== "object") {
202
281
  return null;
@@ -292,6 +371,21 @@ export function buildAgentExecutionSummary({ agent, statusRecord, logPath, repor
292
371
  blockers: Number.parseInt(String(match[4] || "0"), 10) || 0,
293
372
  detail: cleanText(match[5]),
294
373
  })),
374
+ eval: findLastMatch(signalText, WAVE_EVAL_REGEX, (match) => ({
375
+ state: match[1],
376
+ targets: Number.parseInt(String(match[2] || "0"), 10) || 0,
377
+ benchmarks: Number.parseInt(String(match[3] || "0"), 10) || 0,
378
+ regressions: Number.parseInt(String(match[4] || "0"), 10) || 0,
379
+ targetIds: parseIdList(match[5]),
380
+ benchmarkIds: parseIdList(match[6]),
381
+ detail: cleanText(match[7]),
382
+ })),
383
+ security: findLastMatch(signalText, WAVE_SECURITY_REGEX, (match) => ({
384
+ state: match[1],
385
+ findings: Number.parseInt(String(match[2] || "0"), 10) || 0,
386
+ approvals: Number.parseInt(String(match[3] || "0"), 10) || 0,
387
+ detail: cleanText(match[4]),
388
+ })),
295
389
  gate: findLastMatch(signalText, WAVE_GATE_REGEX, (match) => ({
296
390
  architecture: match[1],
297
391
  integration: match[2],
@@ -309,8 +403,25 @@ export function buildAgentExecutionSummary({ agent, statusRecord, logPath, repor
309
403
  ? agent.deliverables.map((deliverable) => ({
310
404
  path: deliverable,
311
405
  exists: fs.existsSync(path.resolve(REPO_ROOT, deliverable)),
406
+ modifiedAt:
407
+ fs.existsSync(path.resolve(REPO_ROOT, deliverable))
408
+ ? fs.statSync(path.resolve(REPO_ROOT, deliverable)).mtime.toISOString()
409
+ : null,
312
410
  }))
313
411
  : [],
412
+ proofArtifacts: Array.isArray(agent?.proofArtifacts)
413
+ ? agent.proofArtifacts.map((artifact) => {
414
+ const absolutePath = path.resolve(REPO_ROOT, artifact.path);
415
+ const exists = fs.existsSync(absolutePath);
416
+ return {
417
+ path: artifact.path,
418
+ kind: artifact.kind || null,
419
+ requiredFor: Array.isArray(artifact.requiredFor) ? artifact.requiredFor : [],
420
+ exists,
421
+ modifiedAt: exists ? fs.statSync(absolutePath).mtime.toISOString() : null,
422
+ };
423
+ })
424
+ : [],
314
425
  verdict: verdict.verdict
315
426
  ? {
316
427
  verdict: verdict.verdict,
@@ -453,6 +564,34 @@ export function validateImplementationSummary(agent, summary) {
453
564
  }
454
565
  }
455
566
  }
567
+ const proofArtifacts = Array.isArray(agent?.proofArtifacts) ? agent.proofArtifacts : [];
568
+ if (proofArtifacts.length > 0) {
569
+ const artifactState = new Map(
570
+ Array.isArray(summary.proofArtifacts)
571
+ ? summary.proofArtifacts.map((artifact) => [artifact.path, artifact])
572
+ : [],
573
+ );
574
+ for (const proofArtifact of proofArtifacts) {
575
+ if (!proofArtifactRequiredForAgent(agent, proofArtifact)) {
576
+ continue;
577
+ }
578
+ const artifact = artifactState.get(proofArtifact.path);
579
+ if (!artifact) {
580
+ return {
581
+ ok: false,
582
+ statusCode: "missing-proof-artifact-summary",
583
+ detail: `Missing proof artifact presence record for ${agent.agentId} path ${proofArtifact.path}.`,
584
+ };
585
+ }
586
+ if (artifact.exists !== true) {
587
+ return {
588
+ ok: false,
589
+ statusCode: "missing-proof-artifact",
590
+ detail: `Agent ${agent.agentId} did not land required proof artifact ${proofArtifact.path}.`,
591
+ };
592
+ }
593
+ }
594
+ }
456
595
  return {
457
596
  ok: true,
458
597
  statusCode: "pass",
@@ -488,6 +627,62 @@ export function validateDocumentationClosureSummary(agent, summary) {
488
627
  };
489
628
  }
490
629
 
630
+ export function validateSecuritySummary(agent, summary) {
631
+ if (!summary?.security) {
632
+ return {
633
+ ok: false,
634
+ statusCode: "missing-wave-security",
635
+ detail: appendTerminationHint(
636
+ `Missing [wave-security] marker for ${agent?.agentId || "A7"}.`,
637
+ summary,
638
+ ),
639
+ };
640
+ }
641
+ if (!summary.reportPath) {
642
+ return {
643
+ ok: false,
644
+ statusCode: "missing-security-report",
645
+ detail: `Missing security review report path for ${agent?.agentId || "A7"}.`,
646
+ };
647
+ }
648
+ if (!fs.existsSync(path.resolve(REPO_ROOT, summary.reportPath))) {
649
+ return {
650
+ ok: false,
651
+ statusCode: "missing-security-report",
652
+ detail: `Missing security review report at ${summary.reportPath}.`,
653
+ };
654
+ }
655
+ if (
656
+ summary.security.state === "clear" &&
657
+ ((summary.security.findings || 0) > 0 || (summary.security.approvals || 0) > 0)
658
+ ) {
659
+ return {
660
+ ok: false,
661
+ statusCode: "invalid-security-clear-state",
662
+ detail:
663
+ "Security review cannot report clear while findings or approvals remain open.",
664
+ };
665
+ }
666
+ if (summary.security.state === "blocked") {
667
+ return {
668
+ ok: false,
669
+ statusCode: "security-blocked",
670
+ detail:
671
+ summary.security.detail ||
672
+ `Security review reported blocked for ${agent?.agentId || "A7"}.`,
673
+ };
674
+ }
675
+ return {
676
+ ok: true,
677
+ statusCode: summary.security.state === "concerns" ? "security-concerns" : "pass",
678
+ detail:
679
+ summary.security.detail ||
680
+ (summary.security.state === "concerns"
681
+ ? "Security review reported advisory concerns."
682
+ : "Security review reported clear."),
683
+ };
684
+ }
685
+
491
686
  export function validateIntegrationSummary(agent, summary) {
492
687
  if (!summary?.integration) {
493
688
  return {
@@ -515,7 +710,148 @@ export function validateIntegrationSummary(agent, summary) {
515
710
  };
516
711
  }
517
712
 
518
- export function validateEvaluatorSummary(agent, summary) {
713
+ export function validateContEvalSummary(agent, summary, options = {}) {
714
+ const mode = String(options.mode || "compat").trim().toLowerCase();
715
+ const strict = mode === "live";
716
+ if (!summary?.eval) {
717
+ return {
718
+ ok: false,
719
+ statusCode: "missing-wave-eval",
720
+ detail: appendTerminationHint(
721
+ `Missing [wave-eval] marker for ${agent?.agentId || "E0"}.`,
722
+ summary,
723
+ ),
724
+ };
725
+ }
726
+ if (strict) {
727
+ if (!summary.reportPath) {
728
+ return {
729
+ ok: false,
730
+ statusCode: "missing-cont-eval-report",
731
+ detail: `Missing cont-EVAL report path for ${agent?.agentId || "E0"}.`,
732
+ };
733
+ }
734
+ }
735
+ if (summary.eval.state !== "satisfied") {
736
+ return {
737
+ ok: false,
738
+ statusCode:
739
+ summary.eval.state === "blocked" ? "cont-eval-blocked" : "cont-eval-needs-more-work",
740
+ detail:
741
+ summary.eval.detail ||
742
+ `cont-EVAL reported ${summary.eval.state}.`,
743
+ };
744
+ }
745
+ if (summary.reportPath && !fs.existsSync(path.resolve(REPO_ROOT, summary.reportPath))) {
746
+ return {
747
+ ok: false,
748
+ statusCode: "missing-cont-eval-report",
749
+ detail: `Missing cont-EVAL report at ${summary.reportPath}.`,
750
+ };
751
+ }
752
+ if (strict) {
753
+ const evalTargets = Array.isArray(options.evalTargets) ? options.evalTargets : [];
754
+ if (evalTargets.length === 0) {
755
+ return {
756
+ ok: false,
757
+ statusCode: "missing-cont-eval-contract",
758
+ detail: `Missing eval target contract for ${agent?.agentId || "E0"}.`,
759
+ };
760
+ }
761
+ const expectedTargetIds = uniqueSorted(evalTargets.map((target) => target.id));
762
+ const actualTargetIds = uniqueSorted(summary.eval.targetIds);
763
+ if (actualTargetIds.length === 0) {
764
+ return {
765
+ ok: false,
766
+ statusCode: "missing-cont-eval-target-ids",
767
+ detail: `Missing target_ids in [wave-eval] marker for ${agent?.agentId || "E0"}.`,
768
+ };
769
+ }
770
+ if (summary.eval.targets !== actualTargetIds.length) {
771
+ return {
772
+ ok: false,
773
+ statusCode: "cont-eval-target-count-mismatch",
774
+ detail: `cont-EVAL reported ${summary.eval.targets} targets, but target_ids enumerates ${actualTargetIds.length}.`,
775
+ };
776
+ }
777
+ if (!sameStringLists(actualTargetIds, expectedTargetIds)) {
778
+ return {
779
+ ok: false,
780
+ statusCode: "cont-eval-target-mismatch",
781
+ detail: `cont-EVAL target_ids must match the declared eval targets (${expectedTargetIds.join(", ")}).`,
782
+ };
783
+ }
784
+ const actualBenchmarkIds = uniqueSorted(summary.eval.benchmarkIds);
785
+ if (actualBenchmarkIds.length === 0) {
786
+ return {
787
+ ok: false,
788
+ statusCode: "missing-cont-eval-benchmarks",
789
+ detail: `Missing benchmark_ids in [wave-eval] marker for ${agent?.agentId || "E0"}.`,
790
+ };
791
+ }
792
+ if (summary.eval.benchmarks !== actualBenchmarkIds.length) {
793
+ return {
794
+ ok: false,
795
+ statusCode: "cont-eval-benchmark-count-mismatch",
796
+ detail: `cont-EVAL reported ${summary.eval.benchmarks} benchmarks, but benchmark_ids enumerates ${actualBenchmarkIds.length}.`,
797
+ };
798
+ }
799
+ if ((summary.eval.regressions || 0) > 0) {
800
+ return {
801
+ ok: false,
802
+ statusCode: "cont-eval-regressions",
803
+ detail: summary.eval.detail || "cont-EVAL reported unresolved regressions.",
804
+ };
805
+ }
806
+ const resolvedTargets = resolveEvalTargetsAgainstCatalog(evalTargets, {
807
+ benchmarkCatalogPath: options.benchmarkCatalogPath,
808
+ });
809
+ const actualBenchmarkSet = new Set(actualBenchmarkIds);
810
+ const allowedBenchmarkIds = new Set(
811
+ resolvedTargets.targets.flatMap((target) => target.allowedBenchmarks || []),
812
+ );
813
+ for (const benchmarkId of actualBenchmarkIds) {
814
+ if (!allowedBenchmarkIds.has(benchmarkId)) {
815
+ return {
816
+ ok: false,
817
+ statusCode: "cont-eval-benchmark-mismatch",
818
+ detail: `cont-EVAL selected undeclared benchmark "${benchmarkId}".`,
819
+ };
820
+ }
821
+ }
822
+ for (const target of resolvedTargets.targets) {
823
+ if (target.selection === "pinned") {
824
+ const missingPinned = (target.benchmarks || []).filter(
825
+ (benchmarkId) => !actualBenchmarkSet.has(benchmarkId),
826
+ );
827
+ if (missingPinned.length > 0) {
828
+ return {
829
+ ok: false,
830
+ statusCode: "cont-eval-benchmark-mismatch",
831
+ detail: `cont-EVAL must include pinned benchmarks for ${target.id}: ${missingPinned.join(", ")}.`,
832
+ };
833
+ }
834
+ continue;
835
+ }
836
+ if (!(target.allowedBenchmarks || []).some((benchmarkId) => actualBenchmarkSet.has(benchmarkId))) {
837
+ return {
838
+ ok: false,
839
+ statusCode: "cont-eval-benchmark-mismatch",
840
+ detail: `cont-EVAL must select at least one benchmark from family "${target.benchmarkFamily}" for ${target.id}.`,
841
+ };
842
+ }
843
+ }
844
+ }
845
+ return {
846
+ ok: true,
847
+ statusCode: "pass",
848
+ detail: summary.eval.detail || "cont-EVAL reported satisfied targets.",
849
+ };
850
+ }
851
+
852
+ export function validateContQaSummary(agent, summary, options = {}) {
853
+ const mode = String(options.mode || "compat").trim().toLowerCase();
854
+ const strict = mode === "live";
519
855
  if (!summary?.gate) {
520
856
  return {
521
857
  ok: false,
@@ -526,10 +862,26 @@ export function validateEvaluatorSummary(agent, summary) {
526
862
  ),
527
863
  };
528
864
  }
865
+ if (strict) {
866
+ if (!summary.reportPath) {
867
+ return {
868
+ ok: false,
869
+ statusCode: "missing-cont-qa-report",
870
+ detail: `Missing cont-QA report path for ${agent?.agentId || "A0"}.`,
871
+ };
872
+ }
873
+ if (!fs.existsSync(path.resolve(REPO_ROOT, summary.reportPath))) {
874
+ return {
875
+ ok: false,
876
+ statusCode: "missing-cont-qa-report",
877
+ detail: `Missing cont-QA report at ${summary.reportPath}.`,
878
+ };
879
+ }
880
+ }
529
881
  if (!summary?.verdict?.verdict) {
530
882
  return {
531
883
  ok: false,
532
- statusCode: "missing-evaluator-verdict",
884
+ statusCode: "missing-cont-qa-verdict",
533
885
  detail: appendTerminationHint(
534
886
  `Missing Verdict line or [wave-verdict] marker for ${agent?.agentId || "A0"}.`,
535
887
  summary,
@@ -539,8 +891,8 @@ export function validateEvaluatorSummary(agent, summary) {
539
891
  if (summary.verdict.verdict !== "pass") {
540
892
  return {
541
893
  ok: false,
542
- statusCode: `evaluator-${summary.verdict.verdict}`,
543
- detail: summary.verdict.detail || "Verdict read from evaluator report.",
894
+ statusCode: `cont-qa-${summary.verdict.verdict}`,
895
+ detail: summary.verdict.detail || "Verdict read from cont-QA report.",
544
896
  };
545
897
  }
546
898
  for (const key of ["architecture", "integration", "durability", "live", "docs"]) {
@@ -550,13 +902,13 @@ export function validateEvaluatorSummary(agent, summary) {
550
902
  statusCode: `gate-${key}-${summary.gate[key]}`,
551
903
  detail:
552
904
  summary.gate.detail ||
553
- `Final evaluator gate did not pass ${key}; got ${summary.gate[key]}.`,
905
+ `Final cont-QA gate did not pass ${key}; got ${summary.gate[key]}.`,
554
906
  };
555
907
  }
556
908
  }
557
909
  return {
558
910
  ok: true,
559
911
  statusCode: "pass",
560
- detail: summary.verdict.detail || summary.gate.detail || "Evaluator gate passed.",
912
+ detail: summary.verdict.detail || summary.gate.detail || "cont-QA gate passed.",
561
913
  };
562
914
  }
@@ -0,0 +1,173 @@
1
+ import { readJsonOrNull, toIsoTimestamp, writeJsonAtomic } from "./shared.mjs";
2
+
3
+ export const MANIFEST_SCHEMA_VERSION = 1;
4
+ export const GLOBAL_DASHBOARD_SCHEMA_VERSION = 1;
5
+ export const WAVE_DASHBOARD_SCHEMA_VERSION = 1;
6
+ export const RELAUNCH_PLAN_SCHEMA_VERSION = 1;
7
+ export const ASSIGNMENT_SNAPSHOT_SCHEMA_VERSION = 1;
8
+ export const DEPENDENCY_SNAPSHOT_SCHEMA_VERSION = 1;
9
+ export const RUN_STATE_SCHEMA_VERSION = 2;
10
+
11
+ export const MANIFEST_KIND = "wave-manifest";
12
+ export const GLOBAL_DASHBOARD_KIND = "global-dashboard";
13
+ export const WAVE_DASHBOARD_KIND = "wave-dashboard";
14
+ export const RELAUNCH_PLAN_KIND = "wave-relaunch-plan";
15
+ export const ASSIGNMENT_SNAPSHOT_KIND = "wave-assignment-snapshot";
16
+ export const DEPENDENCY_SNAPSHOT_KIND = "wave-dependency-snapshot";
17
+ export const RUN_STATE_KIND = "wave-run-state";
18
+
19
+ function isPlainObject(value) {
20
+ return Boolean(value) && typeof value === "object" && !Array.isArray(value);
21
+ }
22
+
23
+ function normalizeInteger(value, fallback = null) {
24
+ const parsed = Number.parseInt(String(value ?? ""), 10);
25
+ return Number.isFinite(parsed) ? parsed : fallback;
26
+ }
27
+
28
+ function normalizeText(value, fallback = null) {
29
+ const normalized = String(value ?? "").trim();
30
+ return normalized || fallback;
31
+ }
32
+
33
+ function cloneJson(value) {
34
+ return value === undefined ? undefined : JSON.parse(JSON.stringify(value));
35
+ }
36
+
37
+ export function normalizeManifest(payload) {
38
+ const source = isPlainObject(payload) ? payload : {};
39
+ return {
40
+ schemaVersion: MANIFEST_SCHEMA_VERSION,
41
+ kind: MANIFEST_KIND,
42
+ generatedAt: normalizeText(source.generatedAt, toIsoTimestamp()),
43
+ source: normalizeText(source.source, ""),
44
+ waves: Array.isArray(source.waves) ? source.waves : [],
45
+ docs: Array.isArray(source.docs) ? source.docs : [],
46
+ };
47
+ }
48
+
49
+ export function normalizeWaveDashboardState(payload) {
50
+ const source = isPlainObject(payload) ? payload : {};
51
+ return {
52
+ ...source,
53
+ schemaVersion: WAVE_DASHBOARD_SCHEMA_VERSION,
54
+ kind: WAVE_DASHBOARD_KIND,
55
+ };
56
+ }
57
+
58
+ export function normalizeGlobalDashboardState(payload) {
59
+ const source = isPlainObject(payload) ? payload : {};
60
+ return {
61
+ ...source,
62
+ schemaVersion: GLOBAL_DASHBOARD_SCHEMA_VERSION,
63
+ kind: GLOBAL_DASHBOARD_KIND,
64
+ };
65
+ }
66
+
67
+ export function normalizeRelaunchPlan(payload, defaults = {}) {
68
+ const source = isPlainObject(payload) ? payload : {};
69
+ return {
70
+ schemaVersion: RELAUNCH_PLAN_SCHEMA_VERSION,
71
+ kind: RELAUNCH_PLAN_KIND,
72
+ wave: normalizeInteger(source.wave, normalizeInteger(defaults.wave, null)),
73
+ attempt: normalizeInteger(source.attempt, null),
74
+ phase: normalizeText(source.phase, null),
75
+ selectedAgentIds: Array.isArray(source.selectedAgentIds) ? source.selectedAgentIds : [],
76
+ reasonBuckets: isPlainObject(source.reasonBuckets) ? source.reasonBuckets : {},
77
+ executorStates: isPlainObject(source.executorStates) ? source.executorStates : {},
78
+ fallbackHistory: isPlainObject(source.fallbackHistory) ? source.fallbackHistory : {},
79
+ createdAt: normalizeText(source.createdAt, toIsoTimestamp()),
80
+ };
81
+ }
82
+
83
+ export function readRelaunchPlan(filePath, defaults = {}) {
84
+ const payload = readJsonOrNull(filePath);
85
+ if (!payload) {
86
+ return null;
87
+ }
88
+ return normalizeRelaunchPlan(payload, defaults);
89
+ }
90
+
91
+ export function writeRelaunchPlan(filePath, payload, defaults = {}) {
92
+ const normalized = normalizeRelaunchPlan(payload, defaults);
93
+ writeJsonAtomic(filePath, normalized);
94
+ return normalized;
95
+ }
96
+
97
+ export function normalizeAssignmentSnapshot(payload, defaults = {}) {
98
+ if (Array.isArray(payload)) {
99
+ return {
100
+ schemaVersion: ASSIGNMENT_SNAPSHOT_SCHEMA_VERSION,
101
+ kind: ASSIGNMENT_SNAPSHOT_KIND,
102
+ lane: normalizeText(defaults.lane, null),
103
+ wave: normalizeInteger(defaults.wave, null),
104
+ generatedAt: normalizeText(defaults.generatedAt, toIsoTimestamp()),
105
+ assignments: payload,
106
+ };
107
+ }
108
+ const source = isPlainObject(payload) ? payload : {};
109
+ return {
110
+ schemaVersion: ASSIGNMENT_SNAPSHOT_SCHEMA_VERSION,
111
+ kind: ASSIGNMENT_SNAPSHOT_KIND,
112
+ lane: normalizeText(source.lane, normalizeText(defaults.lane, null)),
113
+ wave: normalizeInteger(source.wave, normalizeInteger(defaults.wave, null)),
114
+ generatedAt: normalizeText(source.generatedAt, normalizeText(defaults.generatedAt, toIsoTimestamp())),
115
+ assignments: Array.isArray(source.assignments) ? source.assignments : [],
116
+ };
117
+ }
118
+
119
+ export function readAssignmentSnapshot(filePath, defaults = {}) {
120
+ const payload = readJsonOrNull(filePath);
121
+ if (!payload) {
122
+ return null;
123
+ }
124
+ return normalizeAssignmentSnapshot(payload, defaults);
125
+ }
126
+
127
+ export function writeAssignmentSnapshot(filePath, payload, defaults = {}) {
128
+ const normalized = normalizeAssignmentSnapshot(payload, defaults);
129
+ writeJsonAtomic(filePath, normalized);
130
+ return normalized;
131
+ }
132
+
133
+ export function normalizeDependencySnapshot(payload, defaults = {}) {
134
+ const source = isPlainObject(payload) ? payload : {};
135
+ const legacyShape = !("schemaVersion" in source) && !("kind" in source);
136
+ return {
137
+ schemaVersion: DEPENDENCY_SNAPSHOT_SCHEMA_VERSION,
138
+ kind: DEPENDENCY_SNAPSHOT_KIND,
139
+ lane: normalizeText(
140
+ source.lane,
141
+ legacyShape ? normalizeText(defaults.lane, null) : normalizeText(defaults.lane, null),
142
+ ),
143
+ wave: normalizeInteger(source.wave, normalizeInteger(defaults.wave, null)),
144
+ generatedAt: normalizeText(source.generatedAt, normalizeText(defaults.generatedAt, toIsoTimestamp())),
145
+ inbound: Array.isArray(source.inbound) ? source.inbound : [],
146
+ outbound: Array.isArray(source.outbound) ? source.outbound : [],
147
+ openInbound: Array.isArray(source.openInbound) ? source.openInbound : [],
148
+ openOutbound: Array.isArray(source.openOutbound) ? source.openOutbound : [],
149
+ requiredInbound: Array.isArray(source.requiredInbound) ? source.requiredInbound : [],
150
+ requiredOutbound: Array.isArray(source.requiredOutbound) ? source.requiredOutbound : [],
151
+ unresolvedInboundAssignments: Array.isArray(source.unresolvedInboundAssignments)
152
+ ? source.unresolvedInboundAssignments
153
+ : [],
154
+ };
155
+ }
156
+
157
+ export function readDependencySnapshot(filePath, defaults = {}) {
158
+ const payload = readJsonOrNull(filePath);
159
+ if (!payload) {
160
+ return null;
161
+ }
162
+ return normalizeDependencySnapshot(payload, defaults);
163
+ }
164
+
165
+ export function writeDependencySnapshot(filePath, payload, defaults = {}) {
166
+ const normalized = normalizeDependencySnapshot(payload, defaults);
167
+ writeJsonAtomic(filePath, normalized);
168
+ return normalized;
169
+ }
170
+
171
+ export function cloneArtifactPayload(value) {
172
+ return cloneJson(value);
173
+ }
@@ -260,11 +260,18 @@ function buildPolicyResolution(record, wave, lanePaths, resolutionContext = {})
260
260
  guidance: `Shared plan and component matrix updates are owned by ${lanePaths.documentationAgentId}.`,
261
261
  };
262
262
  }
263
- if (lower.includes("evaluator") || lower.includes("gate")) {
263
+ if (lower.includes("cont-eval") || lower.includes("benchmark") || lower.includes("eval target")) {
264
264
  return {
265
265
  type: "route",
266
- routeAgentId: lanePaths.evaluatorAgentId,
267
- guidance: `Final pass/fail judgement and gate interpretation are owned by ${lanePaths.evaluatorAgentId}.`,
266
+ routeAgentId: lanePaths.contEvalAgentId,
267
+ guidance: `Eval target selection and tuning are owned by ${lanePaths.contEvalAgentId}.`,
268
+ };
269
+ }
270
+ if (lower.includes("cont-qa") || lower.includes("evaluator") || lower.includes("gate")) {
271
+ return {
272
+ type: "route",
273
+ routeAgentId: lanePaths.contQaAgentId,
274
+ guidance: `Final cont-QA pass/fail judgement and gate interpretation are owned by ${lanePaths.contQaAgentId}.`,
268
275
  };
269
276
  }
270
277
  return null;