@jterrats/open-orchestra 1.0.14 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/AGENTS.md +4 -1
  2. package/CHANGELOG.md +26 -0
  3. package/dist/automation-evidence.d.ts +2 -23
  4. package/dist/automation-evidence.js +1 -218
  5. package/dist/automation-evidence.js.map +1 -1
  6. package/dist/command-manifest.js +6 -2
  7. package/dist/command-manifest.js.map +1 -1
  8. package/dist/command-routes-integrations.js +2 -1
  9. package/dist/command-routes-integrations.js.map +1 -1
  10. package/dist/command-routes.js +4 -1
  11. package/dist/command-routes.js.map +1 -1
  12. package/dist/commands.d.ts +3 -2
  13. package/dist/commands.js +7 -2
  14. package/dist/commands.js.map +1 -1
  15. package/dist/context-index-commands.d.ts +2 -0
  16. package/dist/context-index-commands.js +51 -1
  17. package/dist/context-index-commands.js.map +1 -1
  18. package/dist/context-index-service.d.ts +2 -1
  19. package/dist/context-index-service.js +3 -1
  20. package/dist/context-index-service.js.map +1 -1
  21. package/dist/context-index-signals.d.ts +2 -0
  22. package/dist/context-index-signals.js +102 -0
  23. package/dist/context-index-signals.js.map +1 -0
  24. package/dist/context-pack-service.d.ts +14 -0
  25. package/dist/context-pack-service.js +153 -0
  26. package/dist/context-pack-service.js.map +1 -0
  27. package/dist/context-search-service.d.ts +10 -0
  28. package/dist/context-search-service.js +142 -0
  29. package/dist/context-search-service.js.map +1 -0
  30. package/dist/diagrams/geometry.d.ts +12 -0
  31. package/dist/diagrams/geometry.js +69 -0
  32. package/dist/diagrams/geometry.js.map +1 -0
  33. package/dist/diagrams/iconify.d.ts +22 -0
  34. package/dist/diagrams/iconify.js +42 -0
  35. package/dist/diagrams/iconify.js.map +1 -0
  36. package/dist/diagrams/index.d.ts +6 -0
  37. package/dist/diagrams/index.js +6 -0
  38. package/dist/diagrams/index.js.map +1 -0
  39. package/dist/diagrams/layout.d.ts +2 -0
  40. package/dist/diagrams/layout.js +142 -0
  41. package/dist/diagrams/layout.js.map +1 -0
  42. package/dist/diagrams/model.d.ts +94 -0
  43. package/dist/diagrams/model.js +2 -0
  44. package/dist/diagrams/model.js.map +1 -0
  45. package/dist/diagrams/pipeline.d.ts +11 -0
  46. package/dist/diagrams/pipeline.js +90 -0
  47. package/dist/diagrams/pipeline.js.map +1 -0
  48. package/dist/diagrams/render-svg.d.ts +7 -0
  49. package/dist/diagrams/render-svg.js +66 -0
  50. package/dist/diagrams/render-svg.js.map +1 -0
  51. package/dist/diagrams/validate.d.ts +5 -0
  52. package/dist/diagrams/validate.js +120 -0
  53. package/dist/diagrams/validate.js.map +1 -0
  54. package/dist/handoff-acceptance-metadata.d.ts +6 -0
  55. package/dist/handoff-acceptance-metadata.js +8 -0
  56. package/dist/handoff-acceptance-metadata.js.map +1 -0
  57. package/dist/model-providers.d.ts +3 -1
  58. package/dist/model-providers.js +68 -11
  59. package/dist/model-providers.js.map +1 -1
  60. package/dist/performance-benchmark.d.ts +45 -0
  61. package/dist/performance-benchmark.js +134 -0
  62. package/dist/performance-benchmark.js.map +1 -0
  63. package/dist/phase-executor.js +24 -6
  64. package/dist/phase-executor.js.map +1 -1
  65. package/dist/provider-agent-wrapper.d.ts +12 -0
  66. package/dist/provider-agent-wrapper.js +264 -0
  67. package/dist/provider-agent-wrapper.js.map +1 -0
  68. package/dist/qa-commands.d.ts +1 -0
  69. package/dist/qa-commands.js +26 -0
  70. package/dist/qa-commands.js.map +1 -1
  71. package/dist/qa-evidence-surface-rules.d.ts +5 -0
  72. package/dist/qa-evidence-surface-rules.js +238 -0
  73. package/dist/qa-evidence-surface-rules.js.map +1 -0
  74. package/dist/qa-evidence-validation.d.ts +23 -0
  75. package/dist/qa-evidence-validation.js +78 -0
  76. package/dist/qa-evidence-validation.js.map +1 -0
  77. package/dist/quality-contracts.js +9 -0
  78. package/dist/quality-contracts.js.map +1 -1
  79. package/dist/release-candidate.d.ts +4 -0
  80. package/dist/release-candidate.js +54 -0
  81. package/dist/release-candidate.js.map +1 -1
  82. package/dist/release-commands.js +8 -0
  83. package/dist/release-commands.js.map +1 -1
  84. package/dist/release-readiness.d.ts +8 -0
  85. package/dist/release-readiness.js +59 -4
  86. package/dist/release-readiness.js.map +1 -1
  87. package/dist/release-reviewer-selection.d.ts +13 -0
  88. package/dist/release-reviewer-selection.js +135 -0
  89. package/dist/release-reviewer-selection.js.map +1 -0
  90. package/dist/runtime-adapters.js +4 -1
  91. package/dist/runtime-adapters.js.map +1 -1
  92. package/dist/runtime-bootstrap.js +11 -0
  93. package/dist/runtime-bootstrap.js.map +1 -1
  94. package/dist/runtime-child-prompt.d.ts +14 -0
  95. package/dist/runtime-child-prompt.js +49 -0
  96. package/dist/runtime-child-prompt.js.map +1 -0
  97. package/dist/runtime-commands.js +7 -2
  98. package/dist/runtime-commands.js.map +1 -1
  99. package/dist/runtime-context-manifest.d.ts +15 -1
  100. package/dist/runtime-context-manifest.js +21 -1
  101. package/dist/runtime-context-manifest.js.map +1 -1
  102. package/dist/runtime-context-pack-reference.d.ts +10 -0
  103. package/dist/runtime-context-pack-reference.js +37 -0
  104. package/dist/runtime-context-pack-reference.js.map +1 -0
  105. package/dist/runtime-execution-renderer.js +6 -0
  106. package/dist/runtime-execution-renderer.js.map +1 -1
  107. package/dist/runtime-execution.js +8 -4
  108. package/dist/runtime-execution.js.map +1 -1
  109. package/dist/runtime-lifecycle-notifications.d.ts +28 -0
  110. package/dist/runtime-lifecycle-notifications.js +110 -0
  111. package/dist/runtime-lifecycle-notifications.js.map +1 -0
  112. package/dist/runtime-lifecycle-watch-adapters.js +22 -3
  113. package/dist/runtime-lifecycle-watch-adapters.js.map +1 -1
  114. package/dist/runtime-lifecycle-watch.d.ts +1 -1
  115. package/dist/runtime-lifecycle-watch.js +55 -27
  116. package/dist/runtime-lifecycle-watch.js.map +1 -1
  117. package/dist/runtime-notification-commands.d.ts +2 -0
  118. package/dist/runtime-notification-commands.js +31 -0
  119. package/dist/runtime-notification-commands.js.map +1 -0
  120. package/dist/runtime-parent-action-dispatch.js +9 -0
  121. package/dist/runtime-parent-action-dispatch.js.map +1 -1
  122. package/dist/runtime-parent-actions.d.ts +2 -1
  123. package/dist/runtime-parent-actions.js +17 -6
  124. package/dist/runtime-parent-actions.js.map +1 -1
  125. package/dist/runtime-spawn-bridge.js +54 -41
  126. package/dist/runtime-spawn-bridge.js.map +1 -1
  127. package/dist/task-graph-commands.js +1 -1
  128. package/dist/task-graph-commands.js.map +1 -1
  129. package/dist/types/context-index.d.ts +57 -0
  130. package/dist/types/model-config.d.ts +93 -0
  131. package/dist/types/runtime.d.ts +10 -1
  132. package/dist/types.d.ts +4 -2
  133. package/dist/types.js.map +1 -1
  134. package/dist/web-api.js +45 -22
  135. package/dist/web-api.js.map +1 -1
  136. package/dist/web-console/assets/{index-jxCY5eEc.css → index-BHs7OIv8.css} +1 -1
  137. package/dist/web-console/assets/index-Bis4CecA.js +11 -0
  138. package/dist/web-console/index.html +2 -2
  139. package/dist/workflow-evidence-service.js +2 -3
  140. package/dist/workflow-evidence-service.js.map +1 -1
  141. package/dist/workflow-gates.js +1 -13
  142. package/dist/workflow-gates.js.map +1 -1
  143. package/dist/workflow-handoff-assessment.js +35 -2
  144. package/dist/workflow-handoff-assessment.js.map +1 -1
  145. package/dist/workflow-handoff-gates.d.ts +2 -0
  146. package/dist/workflow-handoff-gates.js +28 -0
  147. package/dist/workflow-handoff-gates.js.map +1 -0
  148. package/dist/workflow-run-commands.js +78 -1
  149. package/dist/workflow-run-commands.js.map +1 -1
  150. package/dist/workflow-services.js +124 -150
  151. package/dist/workflow-services.js.map +1 -1
  152. package/dist/workspace-claude-settings.d.ts +27 -0
  153. package/dist/workspace-claude-settings.js +54 -0
  154. package/dist/workspace-claude-settings.js.map +1 -0
  155. package/dist/workspace-runtime-bootstrap.js +5 -0
  156. package/dist/workspace-runtime-bootstrap.js.map +1 -1
  157. package/dist/workspace-selection.d.ts +12 -0
  158. package/dist/workspace-selection.js +25 -0
  159. package/dist/workspace-selection.js.map +1 -0
  160. package/docs/context-index-packs.md +45 -2
  161. package/docs/diagrams/deterministic-pipeline/README.md +68 -0
  162. package/docs/e2e-test-batteries.md +3 -3
  163. package/docs/reports/context-pack-benchmark-gh-452.json +119 -0
  164. package/docs/reports/context-pack-benchmark-gh-452.md +32 -0
  165. package/docs/reports/gh-428-test-coverage-context-review-20260522.md +75 -0
  166. package/docs/runtime-adapters.md +138 -27
  167. package/docs/security-env-vars.md +41 -0
  168. package/docs/sonar-quality-gates.md +22 -0
  169. package/package.json +4 -2
  170. package/rules/diagram-quality.mdc +2 -0
  171. package/dist/web-console/assets/index-DA8Fs4r7.js +0 -11
@@ -0,0 +1,119 @@
1
+ {
2
+ "generatedAt": "2026-05-22T17:09:28.420Z",
3
+ "workspaceRoot": "/var/folders/bv/08sjh7yj717c8shc52b5_lh00000gn/T/orchestra-context-pack-bench-YyJ38r",
4
+ "fixture": {
5
+ "requestedFiles": 120,
6
+ "indexedFiles": 120,
7
+ "excludedFiles": 2,
8
+ "totalIndexedBytes": 132854
9
+ },
10
+ "query": "payment command security evidence",
11
+ "role": "developer",
12
+ "phase": "developer",
13
+ "budget": {
14
+ "targetChars": 6000,
15
+ "hardCapChars": 8000,
16
+ "usedChars": 2866,
17
+ "maxFiles": 12,
18
+ "maxSnippets": 24,
19
+ "perSnippetChars": 600,
20
+ "truncated": false
21
+ },
22
+ "reduction": {
23
+ "sourceBytes": 132854,
24
+ "packedChars": 2866,
25
+ "ratio": 0.0216,
26
+ "targetSatisfied": true,
27
+ "hardCapSatisfied": true
28
+ },
29
+ "selectedFiles": [
30
+ {
31
+ "path": "src/payments/payment-001.ts",
32
+ "score": 163,
33
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
34
+ "snippetCount": 1
35
+ },
36
+ {
37
+ "path": "src/payments/payment-007.ts",
38
+ "score": 163,
39
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
40
+ "snippetCount": 1
41
+ },
42
+ {
43
+ "path": "src/payments/payment-013.ts",
44
+ "score": 163,
45
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
46
+ "snippetCount": 1
47
+ },
48
+ {
49
+ "path": "src/payments/payment-019.ts",
50
+ "score": 163,
51
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
52
+ "snippetCount": 1
53
+ },
54
+ {
55
+ "path": "src/payments/payment-025.ts",
56
+ "score": 163,
57
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
58
+ "snippetCount": 1
59
+ },
60
+ {
61
+ "path": "src/payments/payment-031.ts",
62
+ "score": 163,
63
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
64
+ "snippetCount": 1
65
+ },
66
+ {
67
+ "path": "src/payments/payment-037.ts",
68
+ "score": 163,
69
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
70
+ "snippetCount": 1
71
+ },
72
+ {
73
+ "path": "src/payments/payment-043.ts",
74
+ "score": 163,
75
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
76
+ "snippetCount": 1
77
+ },
78
+ {
79
+ "path": "src/payments/payment-049.ts",
80
+ "score": 163,
81
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
82
+ "snippetCount": 1
83
+ },
84
+ {
85
+ "path": "src/payments/payment-055.ts",
86
+ "score": 163,
87
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
88
+ "snippetCount": 1
89
+ },
90
+ {
91
+ "path": "src/payments/payment-061.ts",
92
+ "score": 163,
93
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
94
+ "snippetCount": 1
95
+ },
96
+ {
97
+ "path": "src/payments/payment-067.ts",
98
+ "score": 163,
99
+ "inclusionReason": "path matched query; basename matched query; symbol matched query; developer phase boost",
100
+ "snippetCount": 1
101
+ }
102
+ ],
103
+ "redactions": 12,
104
+ "omittedCount": 108,
105
+ "staleIndex": {
106
+ "detected": true,
107
+ "statusReason": "context index file changed: src/payments/payment-001.ts",
108
+ "packFailure": "context pack requires a fresh context index: context index file changed: src/payments/payment-001.ts"
109
+ },
110
+ "runtimeSpawnPackMetadata": {
111
+ "taskId": "GH-452",
112
+ "phase": "developer",
113
+ "role": "developer",
114
+ "jsonArtifact": ".agent-workflow/context-packs/gh-452-payment-command-security-evidence-1779469768399.json",
115
+ "markdownArtifact": ".agent-workflow/context-packs/gh-452-payment-command-security-evidence-1779469768399.md",
116
+ "contextPackArtifactPresent": true
117
+ },
118
+ "passed": true
119
+ }
@@ -0,0 +1,32 @@
1
+ # Context Pack Benchmark Report
2
+
3
+ - Task: GH-452
4
+ - Result: passed
5
+ - Indexed files: 120
6
+ - Excluded files: 2
7
+ - Source bytes: 132854
8
+ - Packed chars: 2866
9
+ - Reduction ratio: 0.0216
10
+ - Redactions: 12
11
+ - Stale index detected: true
12
+
13
+ ## Selected Files
14
+
15
+ - src/payments/payment-001.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
16
+ - src/payments/payment-007.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
17
+ - src/payments/payment-013.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
18
+ - src/payments/payment-019.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
19
+ - src/payments/payment-025.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
20
+ - src/payments/payment-031.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
21
+ - src/payments/payment-037.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
22
+ - src/payments/payment-043.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
23
+ - src/payments/payment-049.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
24
+ - src/payments/payment-055.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
25
+ - src/payments/payment-061.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
26
+ - src/payments/payment-067.ts score=163 snippets=1: path matched query; basename matched query; symbol matched query; developer phase boost
27
+
28
+ ## Runtime Spawn Pack Metadata
29
+
30
+ - JSON: .agent-workflow/context-packs/gh-452-payment-command-security-evidence-1779469768399.json
31
+ - Markdown: .agent-workflow/context-packs/gh-452-payment-command-security-evidence-1779469768399.md
32
+ - Context pack artifact present: true
@@ -0,0 +1,75 @@
1
+ # GH-428 Test Coverage And Context Review
2
+
3
+ Date: 2026-05-22
4
+ Task: GH-428-TEST-COVERAGE-CONTEXT-REVIEW
5
+ Scope: CLI, web console/site, runtime workflow gates, and context-token performance.
6
+
7
+ ## Summary
8
+
9
+ Architect and QA review found no release-blocking source test gap for the current implemented behavior. The current suite has broad unit/integration coverage, browser E2E coverage for the web console and site, runtime E2E coverage for init/target guidance and queue behavior, and context-pack coverage for bounded runtime delegation.
10
+
11
+ The main remaining gaps are backlog improvements: full multi-squad E2E, workflow lifecycle CLI E2E as a standalone installed-user battery, security-boundary E2E, and stronger context budget telemetry over real-world repositories.
12
+
13
+ ## Evidence Reviewed
14
+
15
+ - `npm run precommit`: passed. Remaining warnings are historical `workflow-runs.jsonl` references to archived/unknown tasks.
16
+ - `npm run release:matrix`: passed and lists required release flows.
17
+ - `npm run performance:bench`: passed with 250 tasks. `graph-plan` 118 ms, `health` 120 ms, web `/api/graph/plan` 119 ms.
18
+ - `node --test test/context-index.test.js test/context-search.test.js test/context-pack.test.js test/runtime-scheduler.test.js test/runtime-adapters.test.js`: 80 passing.
19
+ - GH-419 CI passed after push. GH-419 and prior GH-424 Sonar workflows failed at the quality-gate step after importing Orchestra evidence.
20
+
21
+ ## Coverage Matrix
22
+
23
+ | Area | Current evidence | Gaps | Priority | Recommendation |
24
+ | --- | --- | --- | --- | --- |
25
+ | Source quality gate | `npm run precommit` covers lint, typecheck, secret scan, security audit, build, unit tests, workflow validation | Historical workflow-run warnings add noise | Medium | Create cleanup task for stale workflow-run references, not a release blocker |
26
+ | CLI onboarding | `e2e/init-onboarding.test.js` covers `/tmp` workspaces, first-use flow, target guidance refresh, acceptance mapping | Installed-package journey is listed in matrix but should be mandatory in release checklist | Medium | Promote installed-package init to release-blocking before publish |
27
+ | Runtime target guidance | `e2e/runtime-instruction-flow.test.js` and runtime adapter tests cover Codex, Claude, Cursor, VS Code/GitHub Copilot, Windsurf guidance contracts | Real Claude CLI validation remains user-environment dependent | Medium | Keep GH-439 for real Claude validation; add optional local smoke recipe |
28
+ | Runtime delegation/queue | `e2e/runtime-manual-queue.test.js`, runtime scheduler tests, parent-action tests | Multi-squad E2E listed in docs but no standalone `e2e/runtime-multi-squad.test.js` file | High | Create E2E for async multi-squad, queue promotion, independent completion, and no parent blocking |
29
+ | Workflow lifecycle CLI | Unit/integration workflow tests cover gates, resume, failback, no-go, handoffs | Release matrix references `e2e/workflow-lifecycle-cli.test.js`, but the file does not exist | High | Add standalone E2E for installed-user lifecycle: run, gate, request changes, failback, resume, release readiness |
30
+ | Web console | `e2e/web-console.spec.js` covers task creation, dashboard, providers, recovery, evidence, lifecycle, accessibility, artifact canvas | Full visual regression and screenshot review are not release-blocking by default | Medium | Add snapshot/screenshots for high-risk UI states when frontend changes |
31
+ | Public site | `e2e/project-site.spec.js` covers docs/site navigation and public docs behavior | Docs search/content freshness should be tied to generated docs manifest in CI | Medium | Add docs content freshness check before site build |
32
+ | Acceptance evidence | `test/qa-coverage.test.js`, `test/cli-output-evidence.test.js`, automation evidence tests | E2E file listed as `e2e/acceptance-evidence.test.js` does not exist | Medium | Add cross-surface acceptance evidence E2E for CLI/API/browser/deferred validation |
33
+ | Security boundaries | Unit tests cover web action security, secret scan, path handling | Release matrix lists `e2e/security-boundaries.test.js`, but the file does not exist | High | Add E2E for path traversal, unsafe writes, secret redaction, no raw stack traces |
34
+ | Context-token performance | Context index/search/pack tests and `performance:bench` pass. Context packs are bounded and fail closed on stale indexes | No large-repo telemetry baseline checked into reports | Medium | Add benchmark fixture/report for large repo context pack size, redaction count, selected files, and token budget |
35
+ | Sonar quality loop | Local/import contracts exist and release matrix includes Sonar | GH-419 and GH-424 Sonar workflows failed at the quality-gate step after importing Orchestra evidence | High | Treat Sonar workflow failure as operational quality-gate follow-up before release candidate promotion |
36
+
37
+ ## Architect Review
38
+
39
+ The test architecture is layered correctly:
40
+
41
+ - Unit and service tests cover domain contracts, workflow gates, runtime scheduler, context pack services, and evidence rules.
42
+ - E2E tests run against isolated `/tmp` workspaces and validate stdout/stderr, generated files, events, and browser-visible state.
43
+ - Runtime context work now has a bounded context index/search/pack path, reducing full-transcript and full-file context pressure.
44
+
45
+ Architecture gaps are mostly orchestration-level:
46
+
47
+ - The release matrix names some E2E batteries that are not implemented as standalone files yet.
48
+ - Runtime/provider validation is intentionally mixed: deterministic source tests prove contracts, while real provider/Claude validation remains environment dependent.
49
+ - Sonar is part of the quality loop, but failing CI Sonar should be made visible as a release readiness blocker or accepted operational risk.
50
+
51
+ ## QA Review
52
+
53
+ QA coverage is sufficient for the current GH-419/GH-424/GH-451 implemented behavior based on precommit and focused suites.
54
+
55
+ QA gaps to convert into backlog:
56
+
57
+ - Multi-squad runtime E2E must prove multiple queued/active squads, parent remains conversational, completions reconcile independently, and queued work promotes after capacity frees.
58
+ - Workflow lifecycle CLI E2E must prove phase handoffs, blocked transitions, return to owner, resume after correction, and release artifact content in a disposable workspace.
59
+ - Security boundary E2E must prove path traversal, unsafe writes, stack trace leakage, and secret-like payload redaction across CLI/API surfaces.
60
+ - Context performance evidence should include large-repo budgets, selected-file rationale, redaction counts, stale-index behavior, and token budget deltas.
61
+
62
+ ## Follow-Up Candidates
63
+
64
+ 1. High: Add standalone multi-squad runtime E2E battery: GH-455.
65
+ 2. High: Add standalone workflow lifecycle CLI E2E battery: GH-453.
66
+ 3. High: Add security-boundary E2E battery: GH-454.
67
+ 4. High: Make Sonar workflow failure visible in release readiness.
68
+ 5. Medium: Promote installed-package CLI init to release-blocking publish evidence.
69
+ 6. Medium: Add large-repo context-pack benchmark report: GH-452.
70
+ 7. Medium: Add docs/site content freshness check against generated docs manifest.
71
+ 8. Medium: Add cross-surface acceptance evidence E2E.
72
+
73
+ ## Release Assessment
74
+
75
+ No source implementation was performed for GH-428. Current implemented behavior is not blocked by this review. The high-priority follow-ups should be scheduled as hardening work before the next broad release candidate, especially multi-squad E2E, workflow lifecycle CLI E2E, security-boundary E2E, and Sonar release visibility.
@@ -62,10 +62,19 @@ Provider-backed phases require explicit opt-in. Connecting a provider with
62
62
  `runtimePolicy.delegation.allowDirectProviderApi=true`; without that opt-in,
63
63
  workflow phases fail before calling the provider. Successful provider-backed
64
64
  phases are recorded as `executor: provider-backed-phase` with provider, model,
65
- fallbacks, and `directProviderApiAllowed: true`. Runtime-native subagent
65
+ fallbacks, request id, response id, token usage source, cost source, final
66
+ provider/model, and `directProviderApiAllowed: true`. Runtime-native subagent
66
67
  requests remain `directProviderApiAllowed: false`, even when provider routing is
67
68
  configured for the same role.
68
69
 
70
+ Provider-backed phase execution goes through the provider-backed agent wrapper.
71
+ The wrapper resolves `openai`, `anthropic`, `gemini`, `ollama`, `fake`, and
72
+ future providers through the provider registry/factory boundary around
73
+ `ModelProvider`; workflow orchestration should not branch on provider vendors.
74
+ OpenAI/Codex provider models are provider-backed execution. `codex-cli` is a
75
+ runtime-native parent session and never becomes a provider API fallback unless a
76
+ future explicit hybrid policy records that decision as evidence.
77
+
69
78
  ## Init Modes
70
79
 
71
80
  Default project init keeps the current compact bootstrap behavior:
@@ -129,6 +138,29 @@ orchestra workflow render --target codex --task STORY-001
129
138
  Change `--target` to the runtime that is executing the work. The workflow state,
130
139
  roles, evidence, reviews, and gates remain runtime-agnostic.
131
140
 
141
+ ## Codex Recurring Preflight
142
+
143
+ Codex does not provide a project-native recurring hook that Open Orchestra can
144
+ install for every context compaction, resumed session, interruption, or role
145
+ handoff. The fallback is explicit managed guidance in `AGENTS.md` plus the
146
+ existing pre-run validation command.
147
+
148
+ Before each new Codex work block, and again after any context shift, run:
149
+
150
+ ```bash
151
+ orchestra health --json
152
+ orchestra task list --json --status pending,blocked,in_progress
153
+ orchestra validate --pre-run --task STORY-001 --json
154
+ ```
155
+
156
+ The JSON report includes `activeOrchestraContext` and `missingActiveContext`.
157
+ When `activeOrchestraContext` is `false`, the current Codex session is missing
158
+ one or more required workflow anchors: task registration, effort estimate, or a
159
+ workflow run for the task. Reload task context and resume or register the
160
+ workflow before editing files. Evidence and review checks still appear in the
161
+ same report, but those are completion/handoff signals rather than active-context
162
+ anchors.
163
+
132
164
  ## Web And VS Code
133
165
 
134
166
  The local web console exposes workspace classification and supported runtime
@@ -185,6 +217,54 @@ points to the prompt artifact, expected result artifact, ownership paths,
185
217
  allowed commands, and lifecycle commands. It does not include secrets or direct
186
218
  provider credentials.
187
219
 
220
+ ## Runtime Spawn Bridge Boundary
221
+
222
+ Open Orchestra is the runtime delegation control plane, not the owner of hidden
223
+ LLM runtime tools. Core commands normalize the spawn intent, evaluate guardrails,
224
+ write prompt and handoff artifacts, expose parent actions, record lifecycle, and
225
+ resume workflows. The actual native spawn call belongs to a parent-side consumer
226
+ running inside the active runtime session.
227
+
228
+ The bridge contract has two sides:
229
+
230
+ - **Control-plane wrapper**: Orchestra emits a runtime-neutral request with task
231
+ id, run id, phase, role, context bundle, ownership paths, expected output,
232
+ evidence contract, queue metadata, and lifecycle commands.
233
+ - **Parent-side consumer**: the active Codex, Claude, Cursor, local worker, or
234
+ other runtime reads the parent action, invokes any native child-agent tool it
235
+ owns, and records `runtime spawn-lifecycle` with the real child identifier.
236
+
237
+ A session is only considered actually spawned after a lifecycle event records
238
+ `--status spawned` with a real runtime child id. Parent actions, request
239
+ artifacts, and dispatch guidance are not equivalent to a running subagent.
240
+
241
+ Adapter capability terms are strict:
242
+
243
+ - `parent-tool`: a parent runtime can invoke a native tool such as Codex
244
+ `spawn_agent`, but Orchestra still cannot call that hidden tool from Node.
245
+ - `request-only`: Orchestra can produce the request and lifecycle instructions,
246
+ while the parent runtime must execute the native tool manually or by following
247
+ its session prompt.
248
+ - `local-process`: a future explicit local executor can be launched as a child
249
+ process under Orchestra policy.
250
+ - `unsupported`: no subagent request should be emitted except as an explicit
251
+ unsupported/fallback result.
252
+
253
+ For Codex, the intended path is parent-tool mediated: Orchestra renders
254
+ `codex-spawn-agent`, the parent Codex agent consumes the action and calls
255
+ `spawn_agent`, and the returned Codex agent id is recorded through
256
+ `runtime spawn-lifecycle --status spawned`. Until that happens, Codex actions
257
+ remain requested or skipped with manual guidance.
258
+
259
+ For Claude, the current supported path is request-only or parent-agent mediated.
260
+ Claude Code's parent session can launch Agent/Subagent work, but Orchestra
261
+ cannot invoke that tool directly from its CLI process. Real automation requires
262
+ the parent Claude agent to follow the session instruction to inspect
263
+ `runtime parent-actions` and call the Agent tool, or a future Claude hook/API
264
+ that can trigger the same action. Until such a hook or callback is available and
265
+ verified, automated Claude native execution remains deferred/manual and must not
266
+ claim spawned lifecycle.
267
+
188
268
  Pending parent actions also include structured `eligibility` metadata. The
189
269
  metadata records the checked runtime, action kind, tool name, session status,
190
270
  runtime filter when supplied, and safety state. Dispatchable actions report
@@ -200,13 +280,14 @@ have two supported paths:
200
280
  - Manual inspection: run `runtime parent-actions --task <id> --json`, inspect
201
281
  each requested action, call the active runtime's native tool, then record
202
282
  `runtime spawn-lifecycle` with the returned child id.
203
- - Auto-dispatch: run
283
+ - Verified dispatch: run
204
284
  `runtime parent-actions --task <id> --dispatch --until-idle --runtime <runtime-id>`.
205
285
  The dispatcher repeatedly inspects pending parent actions, dispatches only
206
286
  safe actions for the active runtime, records spawned and active lifecycle
207
- events with stable runtime child ids or verified callback correlation ids, applies
208
- `runtime watch` completions when expected handoff artifacts appear, resumes
209
- paused workflow runs, and continues across later phases until idle or timeout.
287
+ events only when the adapter has a real runtime child id or verified callback
288
+ correlation id, applies `runtime watch` completions when expected handoff
289
+ artifacts appear, resumes paused workflow runs, and continues across later
290
+ phases until idle or timeout.
210
291
 
211
292
  The auto-dispatch loop is bounded by `--timeout`, `--idle-timeout`, and
212
293
  `--interval`, so it never polls forever. It skips queued actions, suspended
@@ -216,14 +297,17 @@ manual requests, and tool mismatches. Skipped actions include fallback guidance
216
297
  with the prompt artifact, expected result artifact, and manual lifecycle
217
298
  commands so a human parent runtime can safely continue without provider API
218
299
  access. This keeps the boundary explicit: Orchestra emits auditable actions and
219
- lifecycle commands; the active parent runtime executes native tools such as
220
- Codex `spawn_agent`, and the dispatcher only consumes actions that are safe for
221
- the runtime declared on the command line. For Claude, the tested dispatch
222
- contract accepts `claude-agent-request` with `tool=claude-code-agent`, but it
223
- records `spawned` and `active` only when the active parent runtime is Claude and
224
- the native callback capability is explicitly verified. Unsupported Codex, CI,
225
- non-Claude, or callback-unavailable contexts return fallback guidance and do not
226
- claim native execution. Orchestra does not call Claude Code, Anthropic APIs, or
300
+ lifecycle commands; the active parent runtime or a verified local bridge
301
+ executes native tools. Codex dispatch cannot invoke `spawn_agent` from the CLI
302
+ process, so `codex-spawn-agent` dispatch returns manual guidance and must not
303
+ record `spawned` until the Codex parent runtime has called `spawn_agent` and
304
+ then runs `runtime spawn-lifecycle --status spawned` with the real returned
305
+ agent id. For Claude, the tested dispatch contract accepts
306
+ `claude-agent-request` with `tool=claude-code-agent`, but it records `spawned`
307
+ and `active` only when the active parent runtime is Claude and the native
308
+ callback capability is explicitly verified. Unsupported Codex, CI, non-Claude,
309
+ or callback-unavailable contexts return fallback guidance and do not claim
310
+ native execution. Orchestra does not call Claude Code, Anthropic APIs, or
227
311
  another provider API.
228
312
 
229
313
  Runtime-native dispatch also enforces delegation capacity before calling parent
@@ -238,14 +322,37 @@ or gates from flooding the parent runtime at once while still allowing
238
322
  background work to continue as capacity becomes available.
239
323
 
240
324
  Runtime lifecycle watching is adapter-driven. Each inspected session reports a
241
- `watcher` object with adapter id, detection mode, support level, fallback
242
- behavior, and the reason a native callback is unavailable. `codex-cli`,
243
- `claude-cli`, and `cursor-cli` currently reconcile completion through explicit
244
- parent lifecycle events and then fall back to bounded artifact inspection.
245
- `generic-runtime`, unknown runtime ids, and runtimes without declared callbacks
246
- use the same artifact fallback directly. Event-driven callbacks should only be
247
- used when the selected watcher adapter declares native support; otherwise
248
- `runtime watch` performs bounded inspection of the expected handoff artifact.
325
+ `watcher` object with adapter id, detection mode, support level, supported
326
+ completion signals, fallback behavior, and the reason a native callback is
327
+ unavailable. `codex-cli`, `claude-cli`, and `cursor-cli` reconcile completion
328
+ through observable runtime notifications, explicit lifecycle events, child
329
+ self-report commands, and bounded expected-artifact inspection. `generic-runtime`,
330
+ unknown runtime ids, and runtimes without declared callbacks use the same safe
331
+ artifact fallback directly. Event-driven callbacks should only be used when the
332
+ selected watcher adapter declares native support; otherwise `runtime watch`
333
+ requires a safe handoff artifact or a recorded runtime notification before it
334
+ marks a session terminal.
335
+
336
+ Runtime notifications are provider-neutral. A parent runtime, local integration,
337
+ web callback, or child agent that cannot directly run `runtime spawn-lifecycle`
338
+ can record an observable signal with:
339
+
340
+ ```bash
341
+ orchestra runtime notification --session <session-id> --status completed --artifact <expected-handoff.md> --agent-id <runtime-child-id>
342
+ ```
343
+
344
+ The watcher still validates task id, phase, role, runtime, session id, and the
345
+ expected handoff artifact before applying completion. A notification without the
346
+ expected artifact stays waiting; a mismatched or unsafe artifact is skipped with
347
+ an explicit reason. Failed notifications record failed lifecycle state and do not
348
+ auto-resume the workflow.
349
+ For spawned or active sessions, expected handoff validation runs before timeout
350
+ evaluation. A stale session that already produced the valid expected handoff is
351
+ completed, while stale sessions without a valid completion artifact can still be
352
+ marked timed out.
353
+ Requested sessions are also reconciled from a valid expected handoff artifact.
354
+ This covers parent runtimes that produce the handoff but cannot self-report the
355
+ intermediate spawned lifecycle event.
249
356
 
250
357
  ## Claude Adapter Support Level
251
358
 
@@ -321,7 +428,10 @@ They need a precise packet and lifecycle hooks:
321
428
  assignment. In workflow auto-consumer mode, use
322
429
  `runtime parent-actions --dispatch --until-idle --runtime codex-cli` to
323
430
  discover and consume safe actions after the run pauses. Keep the child
324
- detached unless the parent is blocked.
431
+ detached unless the parent is blocked. The child prompt must write the
432
+ expected handoff and self-report completion with `runtime spawn-lifecycle`; if
433
+ the runtime cannot execute commands, it must emit a runtime notification that
434
+ can be reconciled by `runtime watch`.
325
435
  - Cursor: render `runtime spawn-request`, then launch it as a Cursor Background
326
436
  Agent. Background work should stay detached from the current chat and report
327
437
  lifecycle state back to Orchestra before the workflow is resumed.
@@ -350,12 +460,13 @@ role/profile with the runtime executor:
350
460
  - **Subagent**: a runtime-native role-scoped execution unit, only available
351
461
  when the selected runtime adapter declares `subagents.runtimeNative: true`
352
462
  and a supported `subagents.spawn.mode`.
353
- - **Spawn bridge**: the runtime-specific mechanism for creating that child
463
+ - **Spawn bridge**: the runtime-specific mechanism for requesting that child
354
464
  execution. Modes are `unsupported`, `request-only`, `parent-tool`, and
355
- `local-process`. `codex-cli` is the first `parent-tool` bridge and renders a
356
- `spawn_agent` request for the active Codex parent session; other runtimes can
357
- consume the same request artifact without allowing Orchestra to call vendor
358
- APIs directly.
465
+ `local-process`. `codex-cli` renders a `spawn_agent` request for the active
466
+ Codex parent session, but the parent Codex agent must call the tool and record
467
+ the returned id. `claude-cli` is request-only or parent-agent mediated until a
468
+ reliable hook/API/callback is available. Other runtimes can consume the same
469
+ request artifact without allowing Orchestra to call vendor APIs directly.
359
470
  - **Provider**: a direct model/provider route used by provider-backed phase
360
471
  prompts. Provider APIs are separate from runtime-native subagents and are
361
472
  never used as a silent fallback for runtime delegation.
@@ -0,0 +1,41 @@
1
+ # Security Environment Variable Manifest
2
+
3
+ This manifest documents environment variable names that are referenced by local
4
+ security-sensitive automation and runtime detection. It intentionally records
5
+ names only; secret values must stay in the caller environment, CI secret store,
6
+ or local secret files.
7
+
8
+ ## Local Runtime And Tooling
9
+
10
+ - `ANTHROPIC_API_KEY_FILE`: optional Anthropic credential file path.
11
+ - `CLAUDE_CODE`: Claude Code runtime marker.
12
+ - `CLAUDECODE`: Claude Code runtime marker.
13
+ - `CODEX_SANDBOX`: Codex runtime marker.
14
+ - `CODEX_THREAD_ID`: Codex runtime marker.
15
+ - `CURSOR_AGENT`: Cursor runtime marker.
16
+ - `CURSOR_TRACE_ID`: Cursor runtime marker.
17
+ - `OLLAMA_API_KEY_FILE`: optional Ollama credential file path.
18
+ - `OPENAI_API_KEY_FILE`: optional OpenAI credential file path.
19
+ - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CALLBACK`: local Claude native callback marker.
20
+ - `OPEN_ORCHESTRA_CLAUDE_NATIVE_CHILD_ID`: Claude native child id marker.
21
+ - `ORCHESTRA_GITLEAKS_BIN`: optional absolute gitleaks binary override.
22
+ - `ORCHESTRA_SECRET_SCAN_FORCE_FALLBACK`: forces fallback secret scanning.
23
+ - `ORCHESTRA_SKIP_UPDATE_CHECK`: disables package update checks.
24
+ - `ORCHESTRA_UPDATE_CHECK_CACHE_DIR`: package update check cache directory.
25
+ - `ORCHESTRA_WORKFLOW_HEARTBEAT_MS`: workflow heartbeat interval override.
26
+
27
+ ## CI And Release Automation
28
+
29
+ - `CF_ACCESS_PROXY_LISTEN_HOST`: local Cloudflare Access proxy host.
30
+ - `CF_ACCESS_PROXY_LISTEN_PORT`: local Cloudflare Access proxy port.
31
+ - `CLOUDFLARE_ACCOUNT_ID`: Cloudflare account identifier from CI secrets.
32
+ - `CLOUDFLARE_API_TOKEN`: Cloudflare deployment token from CI secrets.
33
+ - `GH_TOKEN`: GitHub CLI token provided by GitHub Actions.
34
+ - `GITHUB_HEAD_REF`: GitHub Actions pull request source branch.
35
+ - `GITHUB_REPOSITORY`: GitHub Actions repository slug.
36
+ - `GITLEAKS_ASSET`: temporary gitleaks release asset name in CI.
37
+ - `GITLEAKS_VERSION`: pinned gitleaks release version in CI.
38
+ - `NPM_USER`: npm authenticated username captured during release validation.
39
+ - `NPM_VERSION`: npm package version captured during release validation.
40
+ - `PKG_VERSION`: package version used by release tag creation.
41
+ - `PRERELEASE_FLAG`: release tag prerelease marker.
@@ -18,6 +18,28 @@ Supported provider modes:
18
18
  regulated tenants, private codebases, or repositories where hosted LOC limits
19
19
  and external API permissions are a concern.
20
20
 
21
+ ### Community Branch and PR Decoration Option
22
+
23
+ SonarQube Community Edition does not include the same branch analysis and pull
24
+ request decoration capabilities as paid editions. If those capabilities are
25
+ needed in a private local or self-hosted Community setup, evaluate
26
+ [`mc1arke/sonarqube-community-branch-plugin`](https://github.com/mc1arke/sonarqube-community-branch-plugin)
27
+ as an optional infrastructure add-on.
28
+
29
+ Use this only as an explicitly accepted operational dependency:
30
+
31
+ - It is not maintained or supported by SonarSource.
32
+ - Plugin compatibility follows the SonarQube major/minor version; pin the image
33
+ or plugin release to the running SonarQube version.
34
+ - Migration from Community Edition plus this plugin to commercial SonarQube
35
+ editions has no guaranteed official upgrade path.
36
+ - SaaS or regulated-tenant usage requires a separate security, supportability,
37
+ upgrade, backup, and rollback review before adoption.
38
+
39
+ For low-cost local dogfooding, prefer the plugin's published Docker image or a
40
+ separate shared SonarQube infrastructure repository instead of coupling plugin
41
+ installation to this product repository.
42
+
21
43
  Required GitHub secret when the GitHub Actions workflow is enabled:
22
44
 
23
45
  - `SONAR_TOKEN`: token for SonarQube Cloud or SonarQube Server.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jterrats/open-orchestra",
3
- "version": "1.0.14",
3
+ "version": "1.0.15",
4
4
  "type": "module",
5
5
  "workspaces": [
6
6
  "extensions/vscode-open-orchestra",
@@ -17,13 +17,15 @@
17
17
  "test:coverage": "npm run build && c8 --reporter=lcov --reports-dir coverage --exclude \"test/**\" --exclude \"e2e/**\" --exclude \"extensions/**/test/**\" --exclude \"dist/assets/**\" --exclude \"dist/web-console/**\" node --test test/**/*.js extensions/**/*.test.cjs",
18
18
  "test:e2e": "npm run build && npm run site:build && playwright test",
19
19
  "test:e2e:init": "node --test e2e/init-onboarding.test.js e2e/runtime-instruction-flow.test.js",
20
- "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js",
20
+ "test:e2e:runtime": "node --test e2e/runtime-manual-queue.test.js e2e/runtime-multi-squad.test.js",
21
+ "test:e2e:security": "npm run build && node --test e2e/security-boundaries.test.js",
21
22
  "test:e2e:runtime:ollama": "npm run build && node --test e2e/runtime-ollama-provider.test.js",
22
23
  "lint": "eslint . && prettier --check \"{bin,e2e,scripts,test,src}/**/*.js\" \"{site,web-console}/src/**/*.{css,js,jsx}\" \"{site,web-console}/*.{html,js,json}\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.{js,json}\"",
23
24
  "format": "prettier --write \"{bin,e2e,scripts,test,src}/**/*.js\" \"{site,web-console}/src/**/*.{css,js,jsx}\" \"{site,web-console}/*.{html,js,json}\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.{js,json}\"",
24
25
  "secret-scan": "node scripts/secret-scan.js",
25
26
  "security:audit": "node scripts/security-audit.js",
26
27
  "architecture:inventory": "npm run build && node scripts/architecture-debt-inventory.js",
28
+ "context:pack:bench": "npm run build && node scripts/context-pack-benchmark.js --report-dir docs/reports",
27
29
  "duplicates": "jscpd --config .jscpd.json",
28
30
  "validate:workflow": "node scripts/validate-workflow.js",
29
31
  "release:matrix": "node scripts/release-test-matrix.js",
@@ -25,6 +25,8 @@
25
25
  - Validate annotation target clarity; annotation arrows must visibly land on the element or line they describe, and annotation text must not obscure the target.
26
26
  - For diagrams without a source reference, create a diagram contract before drawing and validate the render against that contract before handoff.
27
27
  - Source-free diagrams still require a pixel-perfect pass against their own contract before delivery: no text overflow, no clipped containers, no floating or buried connector endpoints, no unintended overlaps, no hidden arrowheads, and no incoherent whitespace.
28
+ - For deterministic SVG pipeline work, treat the typed diagram model as the source of truth. Layout, SVG rendering, validation findings, icon references, and final deliverables must be reproducible from model input without LLM-selected absolute coordinates.
29
+ - Icon references in deterministic diagrams should use semantic purpose plus Iconify id, resolved from a cacheable source. Tests must not require network access to fetch icons.
28
30
  - For source-free diagrams, iterate after the first render until container sizes, line routing, connector anchors, label positions, and visual balance are correct. Do not deliver an unreviewed first render.
29
31
  - For every post-render correction, re-run the full diagram review rather than checking only the edited area. A diagram passes only when the whole canvas still satisfies container containment, label clearance, connector routing, z-order, and whitespace rules.
30
32
  - A regenerated diagram must materially change geometry for the finding it claims to fix. If two versions preserve the same collision, overflow, endpoint gap, or unnecessary route bend, change the layout strategy instead of only re-rendering.