@chllming/wave-orchestration 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/CHANGELOG.md +52 -3
  2. package/README.md +33 -5
  3. package/docs/README.md +18 -4
  4. package/docs/agents/wave-cont-eval-role.md +36 -0
  5. package/docs/agents/{wave-evaluator-role.md → wave-cont-qa-role.md} +14 -11
  6. package/docs/agents/wave-documentation-role.md +1 -1
  7. package/docs/agents/wave-infra-role.md +1 -1
  8. package/docs/agents/wave-integration-role.md +3 -3
  9. package/docs/agents/wave-launcher-role.md +4 -3
  10. package/docs/agents/wave-security-role.md +40 -0
  11. package/docs/concepts/context7-vs-skills.md +1 -1
  12. package/docs/concepts/what-is-a-wave.md +56 -6
  13. package/docs/evals/README.md +166 -0
  14. package/docs/evals/benchmark-catalog.json +663 -0
  15. package/docs/guides/author-and-run-waves.md +135 -0
  16. package/docs/guides/planner.md +5 -0
  17. package/docs/guides/terminal-surfaces.md +2 -0
  18. package/docs/plans/component-cutover-matrix.json +1 -1
  19. package/docs/plans/component-cutover-matrix.md +1 -1
  20. package/docs/plans/current-state.md +19 -1
  21. package/docs/plans/examples/wave-example-live-proof.md +435 -0
  22. package/docs/plans/migration.md +42 -0
  23. package/docs/plans/wave-orchestrator.md +46 -7
  24. package/docs/plans/waves/wave-0.md +4 -4
  25. package/docs/reference/live-proof-waves.md +177 -0
  26. package/docs/reference/migration-0.2-to-0.5.md +26 -19
  27. package/docs/reference/npmjs-trusted-publishing.md +6 -5
  28. package/docs/reference/runtime-config/README.md +14 -4
  29. package/docs/reference/sample-waves.md +87 -0
  30. package/docs/reference/skills.md +110 -42
  31. package/docs/research/agent-context-sources.md +130 -11
  32. package/docs/research/coordination-failure-review.md +266 -0
  33. package/docs/roadmap.md +6 -2
  34. package/package.json +2 -2
  35. package/releases/manifest.json +35 -2
  36. package/scripts/research/agent-context-archive.mjs +83 -1
  37. package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +811 -0
  38. package/scripts/wave-orchestrator/adhoc.mjs +1331 -0
  39. package/scripts/wave-orchestrator/agent-state.mjs +358 -6
  40. package/scripts/wave-orchestrator/artifact-schemas.mjs +173 -0
  41. package/scripts/wave-orchestrator/clarification-triage.mjs +10 -3
  42. package/scripts/wave-orchestrator/config.mjs +48 -12
  43. package/scripts/wave-orchestrator/context7.mjs +2 -0
  44. package/scripts/wave-orchestrator/coord-cli.mjs +51 -19
  45. package/scripts/wave-orchestrator/coordination-store.mjs +26 -4
  46. package/scripts/wave-orchestrator/coordination.mjs +83 -9
  47. package/scripts/wave-orchestrator/dashboard-state.mjs +20 -8
  48. package/scripts/wave-orchestrator/dep-cli.mjs +5 -2
  49. package/scripts/wave-orchestrator/docs-queue.mjs +8 -2
  50. package/scripts/wave-orchestrator/evals.mjs +451 -0
  51. package/scripts/wave-orchestrator/feedback.mjs +15 -1
  52. package/scripts/wave-orchestrator/install.mjs +32 -9
  53. package/scripts/wave-orchestrator/launcher-closure.mjs +281 -0
  54. package/scripts/wave-orchestrator/launcher-runtime.mjs +334 -0
  55. package/scripts/wave-orchestrator/launcher.mjs +709 -601
  56. package/scripts/wave-orchestrator/ledger.mjs +123 -20
  57. package/scripts/wave-orchestrator/local-executor.mjs +99 -12
  58. package/scripts/wave-orchestrator/planner.mjs +177 -42
  59. package/scripts/wave-orchestrator/replay.mjs +6 -3
  60. package/scripts/wave-orchestrator/role-helpers.mjs +84 -0
  61. package/scripts/wave-orchestrator/shared.mjs +75 -11
  62. package/scripts/wave-orchestrator/skills.mjs +637 -106
  63. package/scripts/wave-orchestrator/traces.mjs +71 -48
  64. package/scripts/wave-orchestrator/wave-files.mjs +947 -101
  65. package/scripts/wave.mjs +9 -0
  66. package/skills/README.md +202 -0
  67. package/skills/provider-aws/SKILL.md +111 -0
  68. package/skills/provider-aws/adapters/claude.md +1 -0
  69. package/skills/provider-aws/adapters/codex.md +1 -0
  70. package/skills/provider-aws/references/service-verification.md +39 -0
  71. package/skills/provider-aws/skill.json +50 -1
  72. package/skills/provider-custom-deploy/SKILL.md +59 -0
  73. package/skills/provider-custom-deploy/skill.json +46 -1
  74. package/skills/provider-docker-compose/SKILL.md +90 -0
  75. package/skills/provider-docker-compose/adapters/local.md +1 -0
  76. package/skills/provider-docker-compose/skill.json +49 -1
  77. package/skills/provider-github-release/SKILL.md +116 -1
  78. package/skills/provider-github-release/adapters/claude.md +1 -0
  79. package/skills/provider-github-release/adapters/codex.md +1 -0
  80. package/skills/provider-github-release/skill.json +51 -1
  81. package/skills/provider-kubernetes/SKILL.md +137 -0
  82. package/skills/provider-kubernetes/adapters/claude.md +1 -0
  83. package/skills/provider-kubernetes/adapters/codex.md +1 -0
  84. package/skills/provider-kubernetes/references/kubectl-patterns.md +58 -0
  85. package/skills/provider-kubernetes/skill.json +48 -1
  86. package/skills/provider-railway/SKILL.md +118 -1
  87. package/skills/provider-railway/references/verification-commands.md +39 -0
  88. package/skills/provider-railway/skill.json +67 -1
  89. package/skills/provider-ssh-manual/SKILL.md +91 -0
  90. package/skills/provider-ssh-manual/skill.json +50 -1
  91. package/skills/repo-coding-rules/SKILL.md +84 -0
  92. package/skills/repo-coding-rules/skill.json +30 -1
  93. package/skills/role-cont-eval/SKILL.md +90 -0
  94. package/skills/role-cont-eval/adapters/codex.md +1 -0
  95. package/skills/role-cont-eval/skill.json +36 -0
  96. package/skills/role-cont-qa/SKILL.md +93 -0
  97. package/skills/role-cont-qa/adapters/claude.md +1 -0
  98. package/skills/role-cont-qa/skill.json +36 -0
  99. package/skills/role-deploy/SKILL.md +90 -0
  100. package/skills/role-deploy/skill.json +32 -1
  101. package/skills/role-documentation/SKILL.md +66 -0
  102. package/skills/role-documentation/skill.json +32 -1
  103. package/skills/role-implementation/SKILL.md +62 -0
  104. package/skills/role-implementation/skill.json +32 -1
  105. package/skills/role-infra/SKILL.md +74 -0
  106. package/skills/role-infra/skill.json +32 -1
  107. package/skills/role-integration/SKILL.md +79 -1
  108. package/skills/role-integration/skill.json +32 -1
  109. package/skills/role-research/SKILL.md +58 -0
  110. package/skills/role-research/skill.json +32 -1
  111. package/skills/role-security/SKILL.md +60 -0
  112. package/skills/role-security/skill.json +36 -0
  113. package/skills/runtime-claude/SKILL.md +60 -1
  114. package/skills/runtime-claude/skill.json +32 -1
  115. package/skills/runtime-codex/SKILL.md +52 -1
  116. package/skills/runtime-codex/skill.json +32 -1
  117. package/skills/runtime-local/SKILL.md +39 -0
  118. package/skills/runtime-local/skill.json +32 -1
  119. package/skills/runtime-opencode/SKILL.md +51 -0
  120. package/skills/runtime-opencode/skill.json +32 -1
  121. package/skills/wave-core/SKILL.md +107 -0
  122. package/skills/wave-core/references/marker-syntax.md +62 -0
  123. package/skills/wave-core/skill.json +31 -1
  124. package/wave.config.json +35 -6
  125. package/skills/role-evaluator/SKILL.md +0 -6
  126. package/skills/role-evaluator/skill.json +0 -5
@@ -118,10 +118,16 @@ function defaultExecutorProfile(roleKind) {
118
118
  if (roleKind === "infra" || roleKind === "deploy" || roleKind === "research") {
119
119
  return "ops-triage";
120
120
  }
121
+ if (roleKind === "security") {
122
+ return "security-review";
123
+ }
121
124
  return "implement-fast";
122
125
  }
123
126
 
124
127
  function defaultExitContract(roleKind) {
128
+ if (roleKind === "security") {
129
+ return null;
130
+ }
125
131
  if (roleKind === "infra" || roleKind === "deploy") {
126
132
  return {
127
133
  completion: "live",
@@ -147,6 +153,9 @@ function defaultExitContract(roleKind) {
147
153
  }
148
154
 
149
155
  function buildDefaultValidationCommand(template, roleKind) {
156
+ if (roleKind === "security") {
157
+ return "Manual review of the changed security-sensitive surfaces plus required proofs.";
158
+ }
150
159
  if (template === "qa" || roleKind === "qa") {
151
160
  return "pnpm test";
152
161
  }
@@ -157,6 +166,9 @@ function buildDefaultValidationCommand(template, roleKind) {
157
166
  }
158
167
 
159
168
  function buildDefaultOutputSummary(template, roleKind) {
169
+ if (roleKind === "security") {
170
+ return "Summarize the threat model, findings, required approvals, requested fixes, and final security disposition.";
171
+ }
160
172
  if (template === "qa" || roleKind === "qa") {
161
173
  return "Summarize the proved QA coverage, the remaining gaps, and whether the wave is closure-ready.";
162
174
  }
@@ -167,6 +179,9 @@ function buildDefaultOutputSummary(template, roleKind) {
167
179
  }
168
180
 
169
181
  function buildDefaultPrimaryGoal(template, roleKind, title) {
182
+ if (roleKind === "security") {
183
+ return `Review the ${title.toLowerCase()} slice for security risks and route exact fixes before integration.`;
184
+ }
170
185
  if (template === "qa" || roleKind === "qa") {
171
186
  return `Build and validate the ${title.toLowerCase()} QA slice.`;
172
187
  }
@@ -355,7 +370,18 @@ function renderSkillsSection(skills) {
355
370
  return Array.isArray(skills) && skills.length > 0 ? renderBulletLines(skills) : [];
356
371
  }
357
372
 
358
- function renderWaveMarkdown(spec, lanePaths) {
373
+ function renderEvalTargetsSection(evalTargets) {
374
+ if (!Array.isArray(evalTargets) || evalTargets.length === 0) {
375
+ return [];
376
+ }
377
+ return evalTargets.map((target) =>
378
+ target.selection === "delegated"
379
+ ? `- id: ${target.id} | selection: delegated | benchmark-family: ${target.benchmarkFamily} | objective: ${target.objective} | threshold: ${target.threshold}`
380
+ : `- id: ${target.id} | selection: pinned | benchmarks: ${(target.benchmarks || []).join(", ")} | objective: ${target.objective} | threshold: ${target.threshold}`,
381
+ );
382
+ }
383
+
384
+ export function renderWaveMarkdown(spec, lanePaths) {
359
385
  const sections = [];
360
386
  sections.push(`# Wave ${spec.wave} - ${spec.title}`);
361
387
  sections.push("");
@@ -404,6 +430,12 @@ function renderWaveMarkdown(spec, lanePaths) {
404
430
  sections.push("## Context7 defaults");
405
431
  sections.push("");
406
432
  sections.push(...renderContext7Section(spec.context7Defaults));
433
+ if (Array.isArray(spec.evalTargets) && spec.evalTargets.length > 0) {
434
+ sections.push("");
435
+ sections.push("## Eval targets");
436
+ sections.push("");
437
+ sections.push(...renderEvalTargetsSection(spec.evalTargets));
438
+ }
407
439
  for (const agent of spec.agents) {
408
440
  sections.push("");
409
441
  sections.push(`## Agent ${agent.agentId}: ${agent.title}`);
@@ -547,39 +579,71 @@ function buildSpecialAgents({ spec, lanePaths, standardRoles }) {
547
579
  ...SHARED_PLAN_DOC_PATHS,
548
580
  ]),
549
581
  );
550
- const evaluatorTitle = standardRoles.evaluator ? "Running Evaluator" : "Custom Evaluator";
582
+ const contQaTitle = standardRoles.contQa ? "cont-QA" : "Custom cont-QA";
583
+ const contEvalTitle = standardRoles.contEval ? "cont-EVAL" : "Custom cont-EVAL";
551
584
  const integrationTitle = standardRoles.integration ? "Integration Steward" : "Custom Integration Steward";
552
585
  const documentationTitle = standardRoles.documentation
553
586
  ? "Documentation Steward"
554
587
  : "Custom Documentation Steward";
555
588
  return [
556
589
  {
557
- agentId: lanePaths.evaluatorAgentId,
558
- title: evaluatorTitle,
559
- rolePromptPaths: [lanePaths.evaluatorRolePromptPath],
590
+ agentId: lanePaths.contQaAgentId,
591
+ title: contQaTitle,
592
+ rolePromptPaths: [lanePaths.contQaRolePromptPath],
560
593
  skills: [],
561
594
  executor: { profile: "deep-review" },
562
595
  context7: { bundle: "none", query: "Architecture evaluation only; repository docs remain canonical" },
563
596
  components: [],
564
597
  capabilities: [],
565
598
  exitContract: null,
566
- primaryGoal: `Evaluate Wave ${spec.wave} and publish the final verdict.`,
599
+ primaryGoal: `Run continuous QA for Wave ${spec.wave} and publish the final closure verdict.`,
567
600
  collaborationNotes: [
568
601
  "Collect explicit verdicts from the implementation-facing agents plus A8 and A9 before closing the wave.",
569
602
  "Do not publish PASS unless the evidence, documentation closure, and integration summary are all coherent.",
570
603
  ],
571
604
  requiredContext: commonRequiredContext,
572
605
  earlierWaveOutputs: [],
573
- ownedPaths: [`docs/plans/waves/reviews/wave-${spec.wave}-evaluator.md`],
606
+ ownedPaths: [`docs/plans/waves/reviews/wave-${spec.wave}-cont-qa.md`],
574
607
  requirements: [
575
608
  "Verify the wave requirements are covered by landed evidence, not only by intent.",
576
609
  "Record any blocker that later waves must not silently assume away.",
577
610
  ],
578
611
  validationCommand:
579
- "Re-read the changed reports and end the evaluator report with `Verdict: PASS`, `Verdict: CONCERNS`, or `Verdict: BLOCKED`.",
580
- outputSummary: "Summarize the gate verdict and the top unresolved cross-cutting risks.",
612
+ "Re-read the changed reports and end the cont-QA report with `Verdict: PASS`, `Verdict: CONCERNS`, or `Verdict: BLOCKED`.",
613
+ outputSummary: "Summarize the cont-QA verdict and the top unresolved cross-cutting risks.",
581
614
  deployEnvironmentId: null,
582
615
  },
616
+ ...(standardRoles.contEval
617
+ ? [
618
+ {
619
+ agentId: lanePaths.contEvalAgentId,
620
+ title: contEvalTitle,
621
+ rolePromptPaths: [lanePaths.contEvalRolePromptPath],
622
+ skills: [],
623
+ executor: { profile: "eval-tuning" },
624
+ context7: { bundle: "none", query: "Eval tuning only; repository docs remain canonical" },
625
+ components: [],
626
+ capabilities: ["eval"],
627
+ exitContract: null,
628
+ primaryGoal: `Run the Wave ${spec.wave} eval tuning loop until the declared eval targets are satisfied or explicitly blocked.`,
629
+ collaborationNotes: [
630
+ "Treat the wave's eval targets as the governing contract for benchmark choice and tuning depth.",
631
+ "This standard cont-EVAL role is report-only by default; if fixes belong to another owner, open exact follow-up work instead of broadening scope implicitly.",
632
+ ],
633
+ requiredContext: commonRequiredContext,
634
+ earlierWaveOutputs: [],
635
+ ownedPaths: [`docs/plans/waves/reviews/wave-${spec.wave}-cont-eval.md`],
636
+ requirements: [
637
+ "Record the selected benchmark set, the commands run, observed output gaps, and regressions.",
638
+ "Emit a final `[wave-eval]` marker with target_ids and benchmark_ids that matches the final tuning state.",
639
+ ],
640
+ validationCommand:
641
+ "Re-run the selected benchmarks or service-output checks and end with a final `[wave-eval]` marker that enumerates target_ids and benchmark_ids.",
642
+ outputSummary: "Summarize the selected benchmarks, tuning outcome, regressions, and remaining owners.",
643
+ deployEnvironmentId: null,
644
+ },
645
+ ]
646
+ : []),
583
647
  {
584
648
  agentId: lanePaths.integrationAgentId,
585
649
  title: integrationTitle,
@@ -590,7 +654,7 @@ function buildSpecialAgents({ spec, lanePaths, standardRoles }) {
590
654
  components: [],
591
655
  capabilities: ["integration", "docs-shared-plan"],
592
656
  exitContract: null,
593
- primaryGoal: `Synthesize the final Wave ${spec.wave} state before documentation and evaluator closure.`,
657
+ primaryGoal: `Synthesize the final Wave ${spec.wave} state before documentation and cont-QA closure.`,
594
658
  collaborationNotes: [
595
659
  "Re-read the message board, compiled inboxes, and latest artifacts before final output.",
596
660
  "Treat contradictions, missing proof, or stale shared-plan assumptions as integration failures.",
@@ -656,6 +720,9 @@ function buildWorkerAgentSpec({
656
720
  ]),
657
721
  );
658
722
  const capabilities = values.capabilities.slice();
723
+ if (roleKind === "security" && !capabilities.some((capability) => capability.startsWith("security"))) {
724
+ capabilities.push("security-review");
725
+ }
659
726
  if (roleKind === "infra" && !capabilities.includes("infra")) {
660
727
  capabilities.push("infra");
661
728
  }
@@ -668,7 +735,8 @@ function buildWorkerAgentSpec({
668
735
  return {
669
736
  agentId,
670
737
  title,
671
- rolePromptPaths: [],
738
+ rolePromptPaths:
739
+ roleKind === "security" ? [lanePaths.securityRolePromptPath] : [],
672
740
  skills: values.skills || [],
673
741
  executor: {
674
742
  profile: values.executorProfile,
@@ -684,7 +752,7 @@ function buildWorkerAgentSpec({
684
752
  values.primaryGoal || buildDefaultPrimaryGoal(template, roleKind, title),
685
753
  collaborationNotes: [
686
754
  "Re-read the wave message board before major decisions, before validation, and before final output.",
687
- `Notify Agent ${lanePaths.evaluatorAgentId} when your evidence changes the closure picture.`,
755
+ `Notify Agent ${lanePaths.contQaAgentId} when your evidence changes the closure picture.`,
688
756
  ],
689
757
  requiredContext,
690
758
  earlierWaveOutputs: values.earlierWaveOutputs,
@@ -722,6 +790,7 @@ function buildSpecPayload({ config, lanePaths, profile, draftValues }) {
722
790
  bundle: draftValues.context7Bundle,
723
791
  query: draftValues.context7Query || null,
724
792
  },
793
+ evalTargets: draftValues.evalTargets,
725
794
  componentPromotions: draftValues.componentPromotions,
726
795
  componentsCatalog: draftValues.componentCatalog,
727
796
  agents: [
@@ -879,7 +948,7 @@ async function runProjectSetupFlow(options = {}) {
879
948
  async function collectComponentPromotions({ prompt, matrix, template, waveNumber }) {
880
949
  const targetLevel = defaultTargetLevel(template);
881
950
  const promotionCount = await prompt.askInteger("How many component promotions belong in this wave?", 1, {
882
- min: 1,
951
+ min: 0,
883
952
  });
884
953
  const componentPromotions = [];
885
954
  const componentCatalog = [];
@@ -935,7 +1004,7 @@ async function collectComponentPromotions({ prompt, matrix, template, waveNumber
935
1004
  return { componentPromotions, componentCatalog };
936
1005
  }
937
1006
 
938
- async function collectWorkerAgents({ prompt, template, profile, componentPromotions, waveNumber }) {
1007
+ async function collectWorkerAgents({ prompt, template, profile, componentPromotions, waveNumber, lane }) {
939
1008
  const defaultRoleKind = defaultWorkerRoleKindForTemplate(template);
940
1009
  const workerCount = await prompt.askInteger("How many worker agents should this wave include?", 1, {
941
1010
  min: 1,
@@ -951,19 +1020,21 @@ async function collectWorkerAgents({ prompt, template, profile, componentPromoti
951
1020
  const title = cleanText(await prompt.ask(`Worker ${agentId} title`, defaults.title));
952
1021
  const roleKind = await prompt.askChoice(
953
1022
  `Worker ${agentId} role kind`,
954
- ["implementation", "qa", "infra", "deploy", "research"],
1023
+ ["implementation", "qa", "infra", "deploy", "research", "security"],
955
1024
  defaultRoleKind,
956
1025
  );
957
1026
  const executorProfile = await prompt.askChoice(
958
1027
  `Worker ${agentId} executor profile`,
959
- ["implement-fast", "deep-review", "docs-pass", "ops-triage"],
1028
+ ["implement-fast", "deep-review", "eval-tuning", "docs-pass", "ops-triage", "security-review"],
960
1029
  defaultExecutorProfile(roleKind),
961
1030
  );
962
1031
  const ownedPaths = normalizeRepoPathList(
963
1032
  normalizeListText(
964
1033
  await prompt.ask(
965
1034
  `Worker ${agentId} owned paths (comma or | separated)`,
966
- template === "infra"
1035
+ roleKind === "security"
1036
+ ? `.tmp/${lane}-wave-launcher/security/wave-${waveNumber}-review.md`
1037
+ : template === "infra"
967
1038
  ? "scripts/,docs/plans/"
968
1039
  : template === "release"
969
1040
  ? "CHANGELOG.md,README.md"
@@ -975,11 +1046,16 @@ async function collectWorkerAgents({ prompt, template, profile, componentPromoti
975
1046
  const components = normalizeListText(
976
1047
  await prompt.ask(
977
1048
  `Worker ${agentId} component ids (comma or | separated)`,
978
- componentPromotions.map((promotion) => promotion.componentId).join(", "),
1049
+ roleKind === "security"
1050
+ ? ""
1051
+ : componentPromotions.map((promotion) => promotion.componentId).join(", "),
979
1052
  ),
980
1053
  ).map((componentId) => normalizeComponentId(componentId, `${agentId}.components`));
981
1054
  const capabilities = normalizeListText(
982
- await prompt.ask(`Worker ${agentId} capabilities (comma or | separated)`, roleKind === "implementation" ? "" : roleKind),
1055
+ await prompt.ask(
1056
+ `Worker ${agentId} capabilities (comma or | separated)`,
1057
+ roleKind === "implementation" ? "" : roleKind === "security" ? "security-review" : roleKind,
1058
+ ),
983
1059
  );
984
1060
  const additionalContext = normalizeRepoPathList(
985
1061
  normalizeListText(
@@ -1041,28 +1117,30 @@ async function collectWorkerAgents({ prompt, template, profile, componentPromoti
1041
1117
  );
1042
1118
  const context7Query = cleanText(await prompt.ask(`Worker ${agentId} Context7 query`, ""));
1043
1119
  const exitDefaults = defaultExitContract(roleKind);
1044
- const exitContract = {
1045
- completion: await prompt.askChoice(
1046
- `Worker ${agentId} exit completion`,
1047
- EXIT_CONTRACT_COMPLETION_VALUES,
1048
- exitDefaults.completion,
1049
- ),
1050
- durability: await prompt.askChoice(
1051
- `Worker ${agentId} exit durability`,
1052
- EXIT_CONTRACT_DURABILITY_VALUES,
1053
- exitDefaults.durability,
1054
- ),
1055
- proof: await prompt.askChoice(
1056
- `Worker ${agentId} exit proof`,
1057
- EXIT_CONTRACT_PROOF_VALUES,
1058
- exitDefaults.proof,
1059
- ),
1060
- docImpact: await prompt.askChoice(
1061
- `Worker ${agentId} exit doc impact`,
1062
- EXIT_CONTRACT_DOC_IMPACT_VALUES,
1063
- exitDefaults.docImpact,
1064
- ),
1065
- };
1120
+ const exitContract = exitDefaults
1121
+ ? {
1122
+ completion: await prompt.askChoice(
1123
+ `Worker ${agentId} exit completion`,
1124
+ EXIT_CONTRACT_COMPLETION_VALUES,
1125
+ exitDefaults.completion,
1126
+ ),
1127
+ durability: await prompt.askChoice(
1128
+ `Worker ${agentId} exit durability`,
1129
+ EXIT_CONTRACT_DURABILITY_VALUES,
1130
+ exitDefaults.durability,
1131
+ ),
1132
+ proof: await prompt.askChoice(
1133
+ `Worker ${agentId} exit proof`,
1134
+ EXIT_CONTRACT_PROOF_VALUES,
1135
+ exitDefaults.proof,
1136
+ ),
1137
+ docImpact: await prompt.askChoice(
1138
+ `Worker ${agentId} exit doc impact`,
1139
+ EXIT_CONTRACT_DOC_IMPACT_VALUES,
1140
+ exitDefaults.docImpact,
1141
+ ),
1142
+ }
1143
+ : null;
1066
1144
  workerAgents.push({
1067
1145
  agentId,
1068
1146
  title,
@@ -1086,6 +1164,57 @@ async function collectWorkerAgents({ prompt, template, profile, componentPromoti
1086
1164
  return workerAgents;
1087
1165
  }
1088
1166
 
1167
+ async function collectEvalTargets({ prompt }) {
1168
+ const targetCount = await prompt.askInteger(
1169
+ "How many eval targets should cont-EVAL own?",
1170
+ 1,
1171
+ { min: 1 },
1172
+ );
1173
+ const evalTargets = [];
1174
+ for (let index = 0; index < targetCount; index += 1) {
1175
+ const id = normalizeComponentId(
1176
+ await prompt.ask(`Eval target ${index + 1} id`, index === 0 ? "service-output" : `eval-target-${index + 1}`),
1177
+ `eval target ${index + 1} id`,
1178
+ );
1179
+ const selection = await prompt.askChoice(
1180
+ `Eval target ${id} benchmark selection`,
1181
+ ["delegated", "pinned"],
1182
+ "delegated",
1183
+ );
1184
+ const benchmarkFamily =
1185
+ selection === "delegated"
1186
+ ? normalizeComponentId(
1187
+ await prompt.ask(`Eval target ${id} benchmark family`, "service-output"),
1188
+ `eval target ${id} benchmark family`,
1189
+ )
1190
+ : null;
1191
+ const benchmarks =
1192
+ selection === "pinned"
1193
+ ? normalizeListText(
1194
+ await prompt.ask(
1195
+ `Eval target ${id} benchmark ids (comma or | separated)`,
1196
+ "golden-response-smoke, manual-session-review",
1197
+ ),
1198
+ ).map((entry) => normalizeComponentId(entry, `eval target ${id} benchmark id`))
1199
+ : [];
1200
+ const objective = cleanText(
1201
+ await prompt.ask(`Eval target ${id} objective`, "Improve the observable service output against the selected benchmark set."),
1202
+ );
1203
+ const threshold = cleanText(
1204
+ await prompt.ask(`Eval target ${id} success threshold`, "All selected checks green with no unresolved regressions."),
1205
+ );
1206
+ evalTargets.push({
1207
+ id,
1208
+ selection,
1209
+ benchmarkFamily,
1210
+ benchmarks,
1211
+ objective,
1212
+ threshold,
1213
+ });
1214
+ }
1215
+ return evalTargets;
1216
+ }
1217
+
1089
1218
  async function runDraftFlow(options = {}) {
1090
1219
  const config = options.config || loadWaveConfig();
1091
1220
  const profile = await ensureProjectProfile({ config });
@@ -1136,10 +1265,14 @@ async function runDraftFlow(options = {}) {
1136
1265
  const context7Bundle = await prompt.askChoice("Wave Context7 bundle", ["none"], "none");
1137
1266
  const context7Query = cleanText(await prompt.ask("Wave Context7 query", ""));
1138
1267
  const standardRoles = {
1139
- evaluator: await prompt.askBoolean("Use the standard evaluator role?", true),
1268
+ contQa: await prompt.askBoolean("Use the standard cont-QA role?", true),
1269
+ contEval: await prompt.askBoolean("Include the standard cont-EVAL role?", false),
1140
1270
  integration: await prompt.askBoolean("Use the standard integration role?", true),
1141
1271
  documentation: await prompt.askBoolean("Use the standard documentation role?", true),
1142
1272
  };
1273
+ const evalTargets = standardRoles.contEval
1274
+ ? await collectEvalTargets({ prompt })
1275
+ : [];
1143
1276
  const { componentPromotions, componentCatalog } = await collectComponentPromotions({
1144
1277
  prompt,
1145
1278
  matrix,
@@ -1152,6 +1285,7 @@ async function runDraftFlow(options = {}) {
1152
1285
  profile,
1153
1286
  componentPromotions,
1154
1287
  waveNumber,
1288
+ lane: lanePaths.lane,
1155
1289
  });
1156
1290
  const draftValues = {
1157
1291
  wave: waveNumber,
@@ -1165,6 +1299,7 @@ async function runDraftFlow(options = {}) {
1165
1299
  context7Bundle,
1166
1300
  context7Query,
1167
1301
  standardRoles,
1302
+ evalTargets,
1168
1303
  componentPromotions,
1169
1304
  componentCatalog,
1170
1305
  workerAgents,
@@ -22,19 +22,22 @@ function buildReplayLanePaths(metadata) {
22
22
  : null;
23
23
  const roles = replayContext?.roles || metadata?.roles || {};
24
24
  const validation = replayContext?.validation || metadata?.validation || {};
25
- const evaluatorAgentId = roles.evaluatorAgentId || "A0";
25
+ const contQaAgentId = roles.contQaAgentId || roles.evaluatorAgentId || "A0";
26
+ const contEvalAgentId = roles.contEvalAgentId || "E0";
26
27
  const integrationAgentId = roles.integrationAgentId || "A8";
27
28
  const documentationAgentId = roles.documentationAgentId || "A9";
28
29
  return {
29
30
  lane: replayContext?.lane || metadata?.lane || "main",
30
- evaluatorAgentId,
31
+ contQaAgentId,
32
+ contEvalAgentId,
31
33
  integrationAgentId,
32
34
  documentationAgentId,
33
35
  requireIntegrationStewardFromWave:
34
36
  validation.requireIntegrationStewardFromWave ?? null,
35
37
  laneProfile: {
36
38
  roles: {
37
- evaluatorAgentId,
39
+ contQaAgentId,
40
+ contEvalAgentId,
38
41
  integrationAgentId,
39
42
  documentationAgentId,
40
43
  },
@@ -0,0 +1,84 @@
1
+ import {
2
+ DEFAULT_CONT_EVAL_AGENT_ID,
3
+ DEFAULT_SECURITY_ROLE_PROMPT_PATH,
4
+ } from "./config.mjs";
5
+
6
+ function cleanPath(value) {
7
+ return String(value || "")
8
+ .trim()
9
+ .replaceAll("\\", "/");
10
+ }
11
+
12
+ export function isContQaReportPath(relPath) {
13
+ return /(?:^|\/)(?:reviews?|.*cont[-_]?qa).*\.(?:md|txt)$/i.test(cleanPath(relPath));
14
+ }
15
+
16
+ export function isContEvalReportPath(relPath) {
17
+ return /(?:^|\/)(?:reviews?|.*cont[-_]?eval|.*eval).*\.(?:md|txt)$/i.test(cleanPath(relPath));
18
+ }
19
+
20
+ export function isSecurityRolePromptPath(
21
+ relPath,
22
+ securityRolePromptPath = DEFAULT_SECURITY_ROLE_PROMPT_PATH,
23
+ ) {
24
+ const normalized = cleanPath(relPath);
25
+ const configured = cleanPath(securityRolePromptPath);
26
+ return (
27
+ normalized === configured ||
28
+ normalized === DEFAULT_SECURITY_ROLE_PROMPT_PATH ||
29
+ normalized.endsWith("/wave-security-role.md")
30
+ );
31
+ }
32
+
33
+ export function isSecurityReportPath(relPath) {
34
+ return /(?:^|\/).*security.*\.(?:md|txt)$/i.test(cleanPath(relPath));
35
+ }
36
+
37
+ export function isContEvalImplementationOwningAgent(
38
+ agent,
39
+ { contEvalAgentId = DEFAULT_CONT_EVAL_AGENT_ID } = {},
40
+ ) {
41
+ if (!agent || agent.agentId !== contEvalAgentId) {
42
+ return false;
43
+ }
44
+ const ownedPaths = Array.isArray(agent.ownedPaths) ? agent.ownedPaths.map(cleanPath).filter(Boolean) : [];
45
+ if (ownedPaths.length === 0) {
46
+ return false;
47
+ }
48
+ return ownedPaths.some((ownedPath) => !isContEvalReportPath(ownedPath));
49
+ }
50
+
51
+ export function isContEvalReportOnlyAgent(
52
+ agent,
53
+ { contEvalAgentId = DEFAULT_CONT_EVAL_AGENT_ID } = {},
54
+ ) {
55
+ return agent?.agentId === contEvalAgentId && !isContEvalImplementationOwningAgent(agent, {
56
+ contEvalAgentId,
57
+ });
58
+ }
59
+
60
+ export function isSecurityReviewAgent(
61
+ agent,
62
+ { securityRolePromptPath = DEFAULT_SECURITY_ROLE_PROMPT_PATH } = {},
63
+ ) {
64
+ if (!agent || typeof agent !== "object") {
65
+ return false;
66
+ }
67
+ const rolePromptPaths = Array.isArray(agent.rolePromptPaths) ? agent.rolePromptPaths : [];
68
+ if (
69
+ rolePromptPaths.some((rolePromptPath) =>
70
+ isSecurityRolePromptPath(rolePromptPath, securityRolePromptPath),
71
+ )
72
+ ) {
73
+ return true;
74
+ }
75
+ const capabilities = Array.isArray(agent.capabilities)
76
+ ? agent.capabilities.map((entry) => String(entry || "").trim().toLowerCase())
77
+ : [];
78
+ return capabilities.includes("security-review");
79
+ }
80
+
81
+ export function resolveSecurityReviewReportPath(agent) {
82
+ const ownedPaths = Array.isArray(agent?.ownedPaths) ? agent.ownedPaths.map(cleanPath).filter(Boolean) : [];
83
+ return ownedPaths.find((ownedPath) => isSecurityReportPath(ownedPath)) || null;
84
+ }
@@ -77,22 +77,71 @@ export function sanitizeOrchestratorId(value) {
77
77
  return id.slice(0, 64);
78
78
  }
79
79
 
80
+ export function sanitizeAdhocRunId(value) {
81
+ const id = String(value || "")
82
+ .trim()
83
+ .toLowerCase()
84
+ .replace(/[^a-z0-9._-]+/g, "-")
85
+ .replace(/-+/g, "-")
86
+ .replace(/^-+|-+$/g, "");
87
+ if (!id) {
88
+ throw new Error("Ad-hoc run ID is required");
89
+ }
90
+ if (!/^[a-z0-9][a-z0-9._-]*$/.test(id)) {
91
+ throw new Error(`Invalid ad-hoc run ID: ${value}`);
92
+ }
93
+ return id;
94
+ }
95
+
96
+ export function buildWorkspaceTmuxToken(workspaceRoot = REPO_ROOT) {
97
+ const repoBase =
98
+ path
99
+ .basename(path.resolve(String(workspaceRoot || REPO_ROOT)))
100
+ .toLowerCase()
101
+ .replace(/[^a-z0-9]+/g, "_")
102
+ .replace(/^_+|_+$/g, "")
103
+ .slice(0, 12) || "repo";
104
+ const repoHash = crypto
105
+ .createHash("sha1")
106
+ .update(path.resolve(String(workspaceRoot || REPO_ROOT)))
107
+ .digest("hex")
108
+ .slice(0, 8);
109
+ return `${repoBase}_${repoHash}`;
110
+ }
111
+
80
112
  export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
81
113
  const config = options.config || loadWaveConfig();
82
- const laneProfile = resolveLaneProfile(config, laneInput || config.defaultLane);
114
+ const baseLaneProfile = resolveLaneProfile(config, laneInput || config.defaultLane);
115
+ const adhocRunId = options.adhocRunId ? sanitizeAdhocRunId(options.adhocRunId) : null;
116
+ const laneProfile = adhocRunId
117
+ ? {
118
+ ...baseLaneProfile,
119
+ validation: {
120
+ ...baseLaneProfile.validation,
121
+ requireComponentPromotionsFromWave: null,
122
+ requireAgentComponentsFromWave: null,
123
+ },
124
+ }
125
+ : baseLaneProfile;
83
126
  const lane = laneProfile.lane;
84
127
  const laneTmux = lane.replace(/-/g, "_");
128
+ const runKind = adhocRunId ? "adhoc" : "roadmap";
85
129
  const runVariant = String(options.runVariant || "")
86
130
  .trim()
87
131
  .toLowerCase();
88
132
  if (runVariant && runVariant !== "dry-run") {
89
133
  throw new Error(`Unsupported lane path variant: ${options.runVariant}`);
90
134
  }
135
+ const workspaceTmuxToken = buildWorkspaceTmuxToken(REPO_ROOT);
91
136
  const docsDir = path.join(REPO_ROOT, laneProfile.docsDir);
92
137
  const plansDir = path.join(REPO_ROOT, laneProfile.plansDir);
93
138
  const preferredWavesDir = path.join(REPO_ROOT, laneProfile.wavesDir);
94
139
  const legacyWavesDir = path.join(docsDir, "waves");
95
- const baseStateDir = path.join(REPO_ROOT, laneProfile.paths.stateRoot, `${lane}-wave-launcher`);
140
+ const adhocRootDir = path.join(REPO_ROOT, ".wave", "adhoc");
141
+ const adhocRunDir = adhocRunId ? path.join(adhocRootDir, "runs", adhocRunId) : null;
142
+ const baseStateDir = adhocRunId
143
+ ? path.join(REPO_ROOT, laneProfile.paths.stateRoot, `${lane}-wave-launcher`, "adhoc", adhocRunId)
144
+ : path.join(REPO_ROOT, laneProfile.paths.stateRoot, `${lane}-wave-launcher`);
96
145
  const stateDir = runVariant === "dry-run" ? path.join(baseStateDir, "dry-run") : baseStateDir;
97
146
  const orchestratorStateDir =
98
147
  runVariant === "dry-run"
@@ -103,14 +152,24 @@ export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
103
152
  config,
104
153
  laneProfile,
105
154
  lane,
155
+ runKind,
156
+ runId: adhocRunId,
106
157
  runVariant,
107
158
  docsDir,
108
159
  plansDir,
109
160
  wavesDir:
110
- fs.existsSync(preferredWavesDir) || !fs.existsSync(legacyWavesDir)
161
+ adhocRunDir ||
162
+ (fs.existsSync(preferredWavesDir) || !fs.existsSync(legacyWavesDir)
111
163
  ? preferredWavesDir
112
- : legacyWavesDir,
164
+ : legacyWavesDir),
113
165
  legacyWavesDir,
166
+ adhocRootDir,
167
+ adhocRunDir,
168
+ adhocIndexPath: path.join(adhocRootDir, "index.json"),
169
+ adhocRequestPath: adhocRunDir ? path.join(adhocRunDir, "request.json") : null,
170
+ adhocSpecPath: adhocRunDir ? path.join(adhocRunDir, "spec.json") : null,
171
+ adhocWavePath: adhocRunDir ? path.join(adhocRunDir, "wave-0.md") : null,
172
+ adhocResultPath: adhocRunDir ? path.join(adhocRunDir, "result.json") : null,
114
173
  promptsDir: path.join(stateDir, "prompts"),
115
174
  logsDir: path.join(stateDir, "logs"),
116
175
  statusDir: path.join(stateDir, "status"),
@@ -121,6 +180,7 @@ export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
121
180
  inboxesDir: path.join(stateDir, "inboxes"),
122
181
  ledgerDir: path.join(stateDir, "ledger"),
123
182
  integrationDir: path.join(stateDir, "integration"),
183
+ securityDir: path.join(stateDir, "security"),
124
184
  dependencySnapshotsDir: path.join(stateDir, "dependencies"),
125
185
  docsQueueDir: path.join(stateDir, "docs-queue"),
126
186
  tracesDir: path.join(stateDir, "traces"),
@@ -130,6 +190,7 @@ export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
130
190
  terminalsPath: path.join(REPO_ROOT, laneProfile.paths.terminalsPath),
131
191
  skillsDir: path.join(REPO_ROOT, laneProfile.skills?.dir || "skills"),
132
192
  context7BundleIndexPath: path.join(REPO_ROOT, laneProfile.paths.context7BundleIndexPath),
193
+ benchmarkCatalogPath: path.join(REPO_ROOT, laneProfile.paths.benchmarkCatalogPath),
133
194
  componentCutoverMatrixDocPath: path.join(
134
195
  REPO_ROOT,
135
196
  laneProfile.paths.componentCutoverMatrixDocPath,
@@ -138,15 +199,18 @@ export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
138
199
  REPO_ROOT,
139
200
  laneProfile.paths.componentCutoverMatrixJsonPath,
140
201
  ),
141
- sharedPlanDocs: laneProfile.sharedPlanDocs,
202
+ sharedPlanDocs: laneProfile.sharedPlanDocs || [],
142
203
  requiredPromptReferences: laneProfile.validation.requiredPromptReferences,
143
204
  rolePromptDir: laneProfile.roles.rolePromptDir,
144
- evaluatorAgentId: laneProfile.roles.evaluatorAgentId,
205
+ contQaAgentId: laneProfile.roles.contQaAgentId,
206
+ contEvalAgentId: laneProfile.roles.contEvalAgentId,
145
207
  integrationAgentId: laneProfile.roles.integrationAgentId,
146
208
  documentationAgentId: laneProfile.roles.documentationAgentId,
147
- evaluatorRolePromptPath: laneProfile.roles.evaluatorRolePromptPath,
209
+ contQaRolePromptPath: laneProfile.roles.contQaRolePromptPath,
210
+ contEvalRolePromptPath: laneProfile.roles.contEvalRolePromptPath,
148
211
  integrationRolePromptPath: laneProfile.roles.integrationRolePromptPath,
149
212
  documentationRolePromptPath: laneProfile.roles.documentationRolePromptPath,
213
+ securityRolePromptPath: laneProfile.roles.securityRolePromptPath,
150
214
  requireDocumentationStewardFromWave:
151
215
  laneProfile.validation.requireDocumentationStewardFromWave,
152
216
  requireContext7DeclarationsFromWave:
@@ -167,10 +231,10 @@ export function buildLanePaths(laneInput = DEFAULT_WAVE_LANE, options = {}) {
167
231
  terminalNamePrefix: `${lane}-wave`,
168
232
  dashboardTerminalNamePrefix: `${lane}-wave-dashboard`,
169
233
  globalDashboardTerminalName: `${lane}-wave-dashboard-global`,
170
- tmuxSessionPrefix: `oc_${laneTmux}_wave`,
171
- tmuxDashboardSessionPrefix: `oc_${laneTmux}_wave_dashboard`,
172
- tmuxGlobalDashboardSessionPrefix: `oc_${laneTmux}_wave_dashboard_global`,
173
- tmuxSocketName: `oc_${laneTmux}_waves`,
234
+ tmuxSessionPrefix: `oc_${laneTmux}_${workspaceTmuxToken}_wave`,
235
+ tmuxDashboardSessionPrefix: `oc_${laneTmux}_${workspaceTmuxToken}_wave_dashboard`,
236
+ tmuxGlobalDashboardSessionPrefix: `oc_${laneTmux}_${workspaceTmuxToken}_wave_dashboard_global`,
237
+ tmuxSocketName: `oc_${laneTmux}_${workspaceTmuxToken}_waves`,
174
238
  orchestratorStateDir,
175
239
  defaultOrchestratorBoardPath: path.join(
176
240
  orchestratorStateDir,