@jterrats/open-orchestra 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +151 -0
- package/CLAUDE.md +157 -0
- package/README.md +60 -0
- package/bin/orchestra.js +8 -0
- package/dist/args.d.ts +3 -0
- package/dist/args.js +30 -0
- package/dist/args.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +190 -0
- package/dist/cli.js.map +1 -0
- package/dist/commands.d.ts +44 -0
- package/dist/commands.js +883 -0
- package/dist/commands.js.map +1 -0
- package/dist/constants.d.ts +15 -0
- package/dist/constants.js +69 -0
- package/dist/constants.js.map +1 -0
- package/dist/defaults.d.ts +72 -0
- package/dist/defaults.js +694 -0
- package/dist/defaults.js.map +1 -0
- package/dist/fs-utils.d.ts +8 -0
- package/dist/fs-utils.js +35 -0
- package/dist/fs-utils.js.map +1 -0
- package/dist/model-providers.d.ts +19 -0
- package/dist/model-providers.js +78 -0
- package/dist/model-providers.js.map +1 -0
- package/dist/types.d.ts +550 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/validation.d.ts +10 -0
- package/dist/validation.js +163 -0
- package/dist/validation.js.map +1 -0
- package/dist/web-api.d.ts +16 -0
- package/dist/web-api.js +220 -0
- package/dist/web-api.js.map +1 -0
- package/dist/web-chart-contracts.d.ts +13 -0
- package/dist/web-chart-contracts.js +13 -0
- package/dist/web-chart-contracts.js.map +1 -0
- package/dist/web-console.d.ts +1 -0
- package/dist/web-console.js +232 -0
- package/dist/web-console.js.map +1 -0
- package/dist/web-evidence.d.ts +25 -0
- package/dist/web-evidence.js +67 -0
- package/dist/web-evidence.js.map +1 -0
- package/dist/web-playwright.d.ts +3 -0
- package/dist/web-playwright.js +14 -0
- package/dist/web-playwright.js.map +1 -0
- package/dist/web-roles.d.ts +33 -0
- package/dist/web-roles.js +70 -0
- package/dist/web-roles.js.map +1 -0
- package/dist/workflow-gates.d.ts +7 -0
- package/dist/workflow-gates.js +291 -0
- package/dist/workflow-gates.js.map +1 -0
- package/dist/workflow-services.d.ts +56 -0
- package/dist/workflow-services.js +1240 -0
- package/dist/workflow-services.js.map +1 -0
- package/dist/workspace-validator.d.ts +6 -0
- package/dist/workspace-validator.js +189 -0
- package/dist/workspace-validator.js.map +1 -0
- package/dist/workspace.d.ts +10 -0
- package/dist/workspace.js +72 -0
- package/dist/workspace.js.map +1 -0
- package/docs/multi-agent-orchestrator-backlog.md +445 -0
- package/docs/multi-agent-orchestrator-sprint-1.md +433 -0
- package/docs/orchestra-mvp.md +176 -0
- package/package.json +63 -0
- package/rules/agent-collaboration.mdc +58 -0
- package/rules/agent-roles.mdc +105 -0
- package/rules/ai-assisted-development.mdc +31 -0
- package/rules/api-design.mdc +31 -0
- package/rules/architecture-decisions.mdc +27 -0
- package/rules/code-review-engineering.mdc +34 -0
- package/rules/concurrency-async.mdc +32 -0
- package/rules/configuration-management.mdc +31 -0
- package/rules/data-modeling-domain.mdc +31 -0
- package/rules/delivery-quality-gates.mdc +40 -0
- package/rules/dependency-management.mdc +31 -0
- package/rules/devops-tooling.mdc +55 -0
- package/rules/documentation-standards.mdc +26 -0
- package/rules/dry-clean-code.mdc +30 -0
- package/rules/error-handling.mdc +28 -0
- package/rules/frontend-engineering.mdc +32 -0
- package/rules/git-discipline.mdc +39 -0
- package/rules/infra-data-encryption.mdc +81 -0
- package/rules/performance-reliability.mdc +32 -0
- package/rules/readiness-done.mdc +32 -0
- package/rules/release-rollback.mdc +32 -0
- package/rules/rule-composition.mdc +28 -0
- package/rules/security-guardrails.mdc +37 -0
- package/rules/solid-architecture.mdc +32 -0
- package/rules/static-analysis-githooks.mdc +32 -0
- package/rules/testing-discipline.mdc +42 -0
- package/rules/ux-ui-product-experience.mdc +51 -0
- package/rules/work-intake-sequencing.mdc +39 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
# Multi-Agent Orchestrator Sprint 1
|
|
2
|
+
|
|
3
|
+
## Sprint Goal
|
|
4
|
+
|
|
5
|
+
Deliver a file-based MVP that can initialize a workflow workspace, define roles and tasks, record append-only events, enforce basic readiness gates, create structured handoffs, and show status. This sprint proves collaboration through artifacts before adding real LLM provider execution.
|
|
6
|
+
|
|
7
|
+
## Sprint Assumptions
|
|
8
|
+
|
|
9
|
+
- The first implementation is local-first and file-based.
|
|
10
|
+
- No real LLM provider integration is required in Sprint 1.
|
|
11
|
+
- The CLI is the primary interface.
|
|
12
|
+
- TypeScript/Node is the primary runtime for the CLI, schemas, workflow engine, and Playwright integration.
|
|
13
|
+
- Python is allowed as an auxiliary worker runtime for tasks where it is clearly stronger, such as data processing, ML/AI utilities, report generation, or integrations with Python-first tooling.
|
|
14
|
+
- The orchestrator must be provider-agnostic from the data model, even if providers are not implemented yet.
|
|
15
|
+
- The MVP should work inside any git repository without requiring external services.
|
|
16
|
+
|
|
17
|
+
## Stack Decision
|
|
18
|
+
|
|
19
|
+
- **Primary language**: TypeScript.
|
|
20
|
+
- **Runtime**: Node.js.
|
|
21
|
+
- **CLI**: Node-based `orchestra`.
|
|
22
|
+
- **Schema validation**: TypeScript-first schema library.
|
|
23
|
+
- **Automated browser testing**: Playwright.
|
|
24
|
+
- **Unit tests**: TypeScript test runner selected during implementation.
|
|
25
|
+
- **Auxiliary runtime**: Python workers invoked only through explicit command contracts.
|
|
26
|
+
|
|
27
|
+
Rationale:
|
|
28
|
+
- Web and Playwright are first-class priorities, so TypeScript keeps CLI, browser automation, schemas, and future UI work in one ecosystem.
|
|
29
|
+
- Python remains available for specialized workers without making the orchestrator depend on Python for its core control plane.
|
|
30
|
+
- Provider and tool integrations must stay behind interfaces so the runtime choice does not leak into workflow artifacts.
|
|
31
|
+
|
|
32
|
+
## In Scope
|
|
33
|
+
|
|
34
|
+
- `.agent-workflow/` structure.
|
|
35
|
+
- Configurable roles.
|
|
36
|
+
- Task graph persistence.
|
|
37
|
+
- Append-only event log.
|
|
38
|
+
- Basic status command.
|
|
39
|
+
- Basic handoff generation and validation.
|
|
40
|
+
- Basic review recording.
|
|
41
|
+
- Basic evidence registration.
|
|
42
|
+
- Definition of Ready gate.
|
|
43
|
+
- Provider configuration schema stub.
|
|
44
|
+
- Version-controlled pre-commit hook contract for static analysis.
|
|
45
|
+
|
|
46
|
+
## Out of Scope
|
|
47
|
+
|
|
48
|
+
- Real LLM API calls.
|
|
49
|
+
- Automatic code editing.
|
|
50
|
+
- Parallel process execution.
|
|
51
|
+
- Playwright test generation.
|
|
52
|
+
- Python worker implementation.
|
|
53
|
+
- Web UI.
|
|
54
|
+
- Cloud storage.
|
|
55
|
+
- Multi-user authentication.
|
|
56
|
+
|
|
57
|
+
## Sprint Backlog
|
|
58
|
+
|
|
59
|
+
### Story CLI-001: Initialize Workflow
|
|
60
|
+
Priority: P0
|
|
61
|
+
|
|
62
|
+
As a user, I want `orchestra init` to create workflow files so that a repo can opt into orchestrated agent work.
|
|
63
|
+
|
|
64
|
+
Acceptance criteria:
|
|
65
|
+
- Creates `.agent-workflow/`.
|
|
66
|
+
- Creates `tasks.json`, `roles.json`, `locks.json`, `events.jsonl`, and `config.json`.
|
|
67
|
+
- Creates `decisions/`, `handoffs/`, `evidence/`, and `reviews/`.
|
|
68
|
+
- Does not overwrite existing workflow files unless `--force` is passed.
|
|
69
|
+
- Prints next steps after initialization.
|
|
70
|
+
|
|
71
|
+
Developer tasks:
|
|
72
|
+
- Define default workspace layout.
|
|
73
|
+
- Implement init command.
|
|
74
|
+
- Add schema validation for generated files.
|
|
75
|
+
- Add unit tests for normal init, existing files, and force mode.
|
|
76
|
+
|
|
77
|
+
QA notes:
|
|
78
|
+
- Verify init in empty repo.
|
|
79
|
+
- Verify init in repo with existing `.agent-workflow/`.
|
|
80
|
+
- Verify generated JSON is valid.
|
|
81
|
+
|
|
82
|
+
Evidence required:
|
|
83
|
+
- Unit test command and result.
|
|
84
|
+
- Sample generated tree.
|
|
85
|
+
|
|
86
|
+
### Story ROLE-001: Define Role Catalog
|
|
87
|
+
Priority: P0
|
|
88
|
+
|
|
89
|
+
As an administrator, I want default roles generated during init so that tasks can be assigned to explicit responsibilities.
|
|
90
|
+
|
|
91
|
+
Acceptance criteria:
|
|
92
|
+
- `roles.json` includes core roles: parent, product_owner, architect, developer, qa, security, devops, sre, dba, ux_ui_designer, release_manager, compliance_privacy, technical_writer.
|
|
93
|
+
- Each role includes name, description, capabilities, required handoff fields, and blocking authority.
|
|
94
|
+
- Role schema rejects missing role ID, description, or capabilities.
|
|
95
|
+
|
|
96
|
+
Developer tasks:
|
|
97
|
+
- Define role schema.
|
|
98
|
+
- Add default roles.
|
|
99
|
+
- Add validation tests.
|
|
100
|
+
|
|
101
|
+
QA notes:
|
|
102
|
+
- Verify all default roles are created.
|
|
103
|
+
- Verify invalid roles fail validation.
|
|
104
|
+
|
|
105
|
+
Evidence required:
|
|
106
|
+
- Unit test command and result.
|
|
107
|
+
- `roles.json` sample.
|
|
108
|
+
|
|
109
|
+
### Story TASK-001: Persist Task State
|
|
110
|
+
Priority: P0
|
|
111
|
+
|
|
112
|
+
As a parent agent, I want task state persisted to disk so that workflow status survives process restarts.
|
|
113
|
+
|
|
114
|
+
Acceptance criteria:
|
|
115
|
+
- `tasks.json` stores task ID, title, owner role, status, dependencies, inputs, outputs, gates, and timestamps.
|
|
116
|
+
- Supports statuses: pending, ready, in_progress, blocked, review, approved, rejected, done, canceled.
|
|
117
|
+
- Rejects dependencies that point to unknown task IDs.
|
|
118
|
+
- Rejects tasks assigned to unknown roles.
|
|
119
|
+
|
|
120
|
+
Developer tasks:
|
|
121
|
+
- Define task schema.
|
|
122
|
+
- Implement task read/write helpers.
|
|
123
|
+
- Add validation for dependencies and role references.
|
|
124
|
+
- Add unit tests.
|
|
125
|
+
|
|
126
|
+
QA notes:
|
|
127
|
+
- Verify valid task file passes.
|
|
128
|
+
- Verify unknown dependency fails.
|
|
129
|
+
- Verify unknown owner role fails.
|
|
130
|
+
|
|
131
|
+
Evidence required:
|
|
132
|
+
- Unit test command and result.
|
|
133
|
+
- Example task graph.
|
|
134
|
+
|
|
135
|
+
### Story MEM-001: Append Workflow Events
|
|
136
|
+
Priority: P0
|
|
137
|
+
|
|
138
|
+
As a system, I want all workflow events stored in an append-only log so that collaboration is auditable.
|
|
139
|
+
|
|
140
|
+
Acceptance criteria:
|
|
141
|
+
- Appends JSON lines to `events.jsonl`.
|
|
142
|
+
- Event includes ID, type, task ID, actor role, timestamp, summary, artifact links, and metadata.
|
|
143
|
+
- Supports event types: WORKFLOW_INITIALIZED, TASK_ASSIGNED, HANDOFF_READY, REVIEW_RECORDED, EVIDENCE_ADDED, GATE_BLOCKED, GATE_PASSED.
|
|
144
|
+
- Does not rewrite existing events.
|
|
145
|
+
|
|
146
|
+
Developer tasks:
|
|
147
|
+
- Define event schema.
|
|
148
|
+
- Implement append helper.
|
|
149
|
+
- Add event ID generation.
|
|
150
|
+
- Add unit tests for append-only behavior.
|
|
151
|
+
|
|
152
|
+
QA notes:
|
|
153
|
+
- Verify multiple events append in order.
|
|
154
|
+
- Verify invalid event is rejected.
|
|
155
|
+
|
|
156
|
+
Evidence required:
|
|
157
|
+
- Unit test command and result.
|
|
158
|
+
- Example `events.jsonl`.
|
|
159
|
+
|
|
160
|
+
### Story CLI-002: Show Status
|
|
161
|
+
Priority: P0
|
|
162
|
+
|
|
163
|
+
As a user, I want `orchestra status` to show workflow progress so that I can inspect current state quickly.
|
|
164
|
+
|
|
165
|
+
Acceptance criteria:
|
|
166
|
+
- Shows task counts by status.
|
|
167
|
+
- Shows blocked tasks and reasons.
|
|
168
|
+
- Shows pending reviews and missing evidence.
|
|
169
|
+
- Shows active locks.
|
|
170
|
+
- Supports `--json`.
|
|
171
|
+
|
|
172
|
+
Developer tasks:
|
|
173
|
+
- Implement status reader.
|
|
174
|
+
- Add human-readable formatter.
|
|
175
|
+
- Add JSON output.
|
|
176
|
+
- Add unit tests for empty, normal, and blocked states.
|
|
177
|
+
|
|
178
|
+
QA notes:
|
|
179
|
+
- Verify status before and after sample tasks.
|
|
180
|
+
- Verify JSON output is parseable.
|
|
181
|
+
|
|
182
|
+
Evidence required:
|
|
183
|
+
- Unit test command and result.
|
|
184
|
+
- Example status output.
|
|
185
|
+
|
|
186
|
+
### Story GATE-001: Enforce Definition of Ready
|
|
187
|
+
Priority: P0
|
|
188
|
+
|
|
189
|
+
As a parent agent, I want readiness validated before implementation so that work does not start with hidden ambiguity.
|
|
190
|
+
|
|
191
|
+
Acceptance criteria:
|
|
192
|
+
- Validates backlog item, goal, scope, acceptance criteria, assumptions, risks, selected roles, and test strategy.
|
|
193
|
+
- Produces a readiness report.
|
|
194
|
+
- Blocks tasks from moving to `ready` when required fields are missing.
|
|
195
|
+
- Allows mechanical-task override with rationale.
|
|
196
|
+
|
|
197
|
+
Developer tasks:
|
|
198
|
+
- Define readiness schema.
|
|
199
|
+
- Implement readiness validator.
|
|
200
|
+
- Add transition guard for pending to ready.
|
|
201
|
+
- Add unit tests.
|
|
202
|
+
|
|
203
|
+
QA notes:
|
|
204
|
+
- Verify complete task passes readiness.
|
|
205
|
+
- Verify missing acceptance criteria blocks readiness.
|
|
206
|
+
- Verify mechanical override records rationale.
|
|
207
|
+
|
|
208
|
+
Evidence required:
|
|
209
|
+
- Unit test command and result.
|
|
210
|
+
- Example readiness report.
|
|
211
|
+
|
|
212
|
+
### Story HAND-001: Generate Developer-to-QA Handoff
|
|
213
|
+
Priority: P1
|
|
214
|
+
|
|
215
|
+
As a Developer, I want a structured handoff to QA so that QA can test with full context.
|
|
216
|
+
|
|
217
|
+
Acceptance criteria:
|
|
218
|
+
- Generates handoff markdown under `handoffs/`.
|
|
219
|
+
- Includes task ID, changed components, behavior changed, unit tests, commands run, known gaps, risks, and recommended Playwright coverage.
|
|
220
|
+
- Validates required fields before recording HANDOFF_READY event.
|
|
221
|
+
|
|
222
|
+
Developer tasks:
|
|
223
|
+
- Create handoff template.
|
|
224
|
+
- Implement `orchestra handoff`.
|
|
225
|
+
- Validate required fields.
|
|
226
|
+
- Add unit tests.
|
|
227
|
+
|
|
228
|
+
QA notes:
|
|
229
|
+
- Verify handoff generated from valid input.
|
|
230
|
+
- Verify missing test evidence blocks handoff.
|
|
231
|
+
|
|
232
|
+
Evidence required:
|
|
233
|
+
- Unit test command and result.
|
|
234
|
+
- Example handoff markdown.
|
|
235
|
+
|
|
236
|
+
### Story CLI-004: Record Review
|
|
237
|
+
Priority: P1
|
|
238
|
+
|
|
239
|
+
As a reviewer, I want `orchestra review` to approve, block, or request changes so that gate status is auditable.
|
|
240
|
+
|
|
241
|
+
Acceptance criteria:
|
|
242
|
+
- Writes review markdown under `reviews/`.
|
|
243
|
+
- Captures reviewer role, result, severity, findings, recommendation, and artifact links.
|
|
244
|
+
- Appends REVIEW_RECORDED event.
|
|
245
|
+
- Blocks invalid reviewer role.
|
|
246
|
+
|
|
247
|
+
Developer tasks:
|
|
248
|
+
- Create review template.
|
|
249
|
+
- Implement review command.
|
|
250
|
+
- Add validation.
|
|
251
|
+
- Add unit tests.
|
|
252
|
+
|
|
253
|
+
QA notes:
|
|
254
|
+
- Verify approve, block, and request changes.
|
|
255
|
+
- Verify unknown reviewer role fails.
|
|
256
|
+
|
|
257
|
+
Evidence required:
|
|
258
|
+
- Unit test command and result.
|
|
259
|
+
- Example review artifact.
|
|
260
|
+
|
|
261
|
+
### Story CLI-005: Add Evidence
|
|
262
|
+
Priority: P1
|
|
263
|
+
|
|
264
|
+
As an agent, I want `orchestra evidence add` to register evidence artifacts so that tests and reviews are traceable.
|
|
265
|
+
|
|
266
|
+
Acceptance criteria:
|
|
267
|
+
- Supports evidence types: command, file, screenshot, trace, video, log, report.
|
|
268
|
+
- Links evidence to task and role.
|
|
269
|
+
- Appends EVIDENCE_ADDED event.
|
|
270
|
+
- Validates referenced file paths when evidence points to a file.
|
|
271
|
+
|
|
272
|
+
Developer tasks:
|
|
273
|
+
- Define evidence schema.
|
|
274
|
+
- Implement evidence command.
|
|
275
|
+
- Add validation and tests.
|
|
276
|
+
|
|
277
|
+
QA notes:
|
|
278
|
+
- Verify command evidence.
|
|
279
|
+
- Verify file evidence with existing path.
|
|
280
|
+
- Verify missing file fails validation.
|
|
281
|
+
|
|
282
|
+
Evidence required:
|
|
283
|
+
- Unit test command and result.
|
|
284
|
+
- Example evidence artifact.
|
|
285
|
+
|
|
286
|
+
### Story MODEL-002: Configure Models by Role
|
|
287
|
+
Priority: P2
|
|
288
|
+
|
|
289
|
+
As an administrator, I want provider and model preferences in config so that multi-model routing can be added without schema redesign.
|
|
290
|
+
|
|
291
|
+
Acceptance criteria:
|
|
292
|
+
- `config.json` supports provider defaults and per-role model settings.
|
|
293
|
+
- Settings include provider, model, fallbacks, max tokens, max cost, timeout, retries, and required capabilities.
|
|
294
|
+
- No real provider call is implemented.
|
|
295
|
+
|
|
296
|
+
Developer tasks:
|
|
297
|
+
- Define provider config schema.
|
|
298
|
+
- Add default config stub.
|
|
299
|
+
- Add validation tests.
|
|
300
|
+
|
|
301
|
+
QA notes:
|
|
302
|
+
- Verify valid multi-provider config passes.
|
|
303
|
+
- Verify unknown provider reference fails if used by a role.
|
|
304
|
+
|
|
305
|
+
Evidence required:
|
|
306
|
+
- Unit test command and result.
|
|
307
|
+
- Example provider config.
|
|
308
|
+
|
|
309
|
+
### Story CFG-004: Define Tool Runtime Contracts
|
|
310
|
+
Priority: P1
|
|
311
|
+
|
|
312
|
+
As a developer, I want tool runtime contracts for Node, Playwright, and optional Python workers so that future agents can invoke tools consistently.
|
|
313
|
+
|
|
314
|
+
Acceptance criteria:
|
|
315
|
+
- `config.json` can declare tool runtimes for node, playwright, and python.
|
|
316
|
+
- Tool entries include command, args, working directory, allowed roles, timeout, evidence behavior, and risk level.
|
|
317
|
+
- Python tools are disabled by default unless explicitly configured.
|
|
318
|
+
- Runtime config is validated without executing tools.
|
|
319
|
+
|
|
320
|
+
Developer tasks:
|
|
321
|
+
- Add tool runtime schema.
|
|
322
|
+
- Add default Node and Playwright tool entries.
|
|
323
|
+
- Add optional Python worker config example.
|
|
324
|
+
- Add validation tests.
|
|
325
|
+
|
|
326
|
+
QA notes:
|
|
327
|
+
- Verify default tool config passes validation.
|
|
328
|
+
- Verify invalid runtime command fails validation.
|
|
329
|
+
- Verify Python runtime remains opt-in.
|
|
330
|
+
|
|
331
|
+
Evidence required:
|
|
332
|
+
- Unit test command and result.
|
|
333
|
+
- Example tool runtime config.
|
|
334
|
+
|
|
335
|
+
### Story CFG-005: Enforce Static Analysis Git Hooks
|
|
336
|
+
Priority: P0
|
|
337
|
+
|
|
338
|
+
As a maintainer, I want a version-controlled pre-commit hook contract so that static analysis runs before any commit.
|
|
339
|
+
|
|
340
|
+
Acceptance criteria:
|
|
341
|
+
- Defines required pre-commit checks for TypeScript/Node: format check, lint, typecheck, secret scan, and staged-file validation.
|
|
342
|
+
- Defines optional Python checks when Python workers exist: Ruff, typecheck, Bandit, and dependency audit.
|
|
343
|
+
- Pre-commit hook fails closed and blocks commit when checks fail.
|
|
344
|
+
- `--no-verify` bypass requires explicit user approval and a recorded follow-up item.
|
|
345
|
+
- CI requirements include the same checks plus deeper SAST, dependency scan, Playwright, container, and IaC checks when applicable.
|
|
346
|
+
|
|
347
|
+
Developer tasks:
|
|
348
|
+
- Add static analysis hook requirements to config schema.
|
|
349
|
+
- Add default hook command definitions for Node/TypeScript.
|
|
350
|
+
- Add optional Python hook command examples.
|
|
351
|
+
- Add validation tests for required hook entries.
|
|
352
|
+
|
|
353
|
+
QA notes:
|
|
354
|
+
- Verify default hook config includes required checks.
|
|
355
|
+
- Verify missing lint or typecheck entry fails validation.
|
|
356
|
+
- Verify Python checks are only required when Python workers are enabled.
|
|
357
|
+
|
|
358
|
+
Evidence required:
|
|
359
|
+
- Unit test command and result.
|
|
360
|
+
- Example pre-commit hook config.
|
|
361
|
+
|
|
362
|
+
## Sprint 1 Definition of Ready
|
|
363
|
+
|
|
364
|
+
- Product goal and MVP scope are documented.
|
|
365
|
+
- Stories have acceptance criteria.
|
|
366
|
+
- Out-of-scope work is explicit.
|
|
367
|
+
- File-based architecture is accepted for MVP.
|
|
368
|
+
- No external provider dependency is required.
|
|
369
|
+
|
|
370
|
+
## Sprint 1 Definition of Done
|
|
371
|
+
|
|
372
|
+
- CLI can initialize workflow state.
|
|
373
|
+
- Roles, tasks, events, handoffs, reviews, and evidence have schemas.
|
|
374
|
+
- Status command reports current workflow health.
|
|
375
|
+
- Readiness gate blocks incomplete work.
|
|
376
|
+
- Tool runtime contracts exist for Node, Playwright, and optional Python workers.
|
|
377
|
+
- Static analysis pre-commit hook contract exists and is validated.
|
|
378
|
+
- Unit tests cover core schema validation and CLI behavior.
|
|
379
|
+
- Documentation includes setup, commands, examples, and known limitations.
|
|
380
|
+
|
|
381
|
+
## Proposed Technical Architecture
|
|
382
|
+
|
|
383
|
+
```text
|
|
384
|
+
orchestra
|
|
385
|
+
commands/
|
|
386
|
+
init
|
|
387
|
+
status
|
|
388
|
+
handoff
|
|
389
|
+
review
|
|
390
|
+
evidence
|
|
391
|
+
core/
|
|
392
|
+
workspace
|
|
393
|
+
schemas
|
|
394
|
+
events
|
|
395
|
+
tasks
|
|
396
|
+
roles
|
|
397
|
+
gates
|
|
398
|
+
artifacts
|
|
399
|
+
tools
|
|
400
|
+
tools/
|
|
401
|
+
playwright
|
|
402
|
+
python
|
|
403
|
+
.agent-workflow/
|
|
404
|
+
config.json
|
|
405
|
+
roles.json
|
|
406
|
+
tasks.json
|
|
407
|
+
locks.json
|
|
408
|
+
events.jsonl
|
|
409
|
+
decisions/
|
|
410
|
+
handoffs/
|
|
411
|
+
evidence/
|
|
412
|
+
reviews/
|
|
413
|
+
```
|
|
414
|
+
|
|
415
|
+
## Sprint Risks
|
|
416
|
+
|
|
417
|
+
- Scope creep into real LLM orchestration before file workflow is proven.
|
|
418
|
+
- Too much process overhead for small tasks.
|
|
419
|
+
- Schema too rigid for different repos.
|
|
420
|
+
- Evidence capture can become noisy if summaries are not concise.
|
|
421
|
+
- Hybrid runtime can add complexity if Python workers are allowed without strict contracts.
|
|
422
|
+
|
|
423
|
+
## Recommended First Implementation Order
|
|
424
|
+
|
|
425
|
+
1. Workspace layout and init command.
|
|
426
|
+
2. Schemas for roles, tasks, events, and config.
|
|
427
|
+
3. Event append helper.
|
|
428
|
+
4. Status command.
|
|
429
|
+
5. Readiness gate.
|
|
430
|
+
6. Tool runtime contract schema.
|
|
431
|
+
7. Static analysis pre-commit hook contract.
|
|
432
|
+
8. Handoff, review, and evidence commands.
|
|
433
|
+
9. Documentation and examples.
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# Open Orchestra MVP
|
|
2
|
+
|
|
3
|
+
Open Orchestra is a local-first, provider-agnostic framework for governed multi-agent software delivery. The public CLI is `orchestra`.
|
|
4
|
+
|
|
5
|
+
It stores workflow state in `.agent-workflow/` and coordinates agents through files, events, handoffs, reviews, evidence, gates, locks, and model provenance.
|
|
6
|
+
|
|
7
|
+
## Compatibility
|
|
8
|
+
|
|
9
|
+
- `orchestra` is the only public CLI name.
|
|
10
|
+
- Existing `.agent-workflow/` data remains valid.
|
|
11
|
+
- Existing `AGENTS.md`, `CLAUDE.md`, Cursor rules, and generated instruction files remain supported entry points.
|
|
12
|
+
- `ORCHESTRA.md` is the intended future primary guide name; it is not required for existing projects.
|
|
13
|
+
|
|
14
|
+
## Commands
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install
|
|
18
|
+
npm run build
|
|
19
|
+
npm run lint
|
|
20
|
+
npm run typecheck
|
|
21
|
+
npm run secret-scan
|
|
22
|
+
npm test
|
|
23
|
+
npm run hooks:install
|
|
24
|
+
node bin/orchestra.js init
|
|
25
|
+
node bin/orchestra.js status
|
|
26
|
+
node bin/orchestra.js status --json
|
|
27
|
+
node bin/orchestra.js validate
|
|
28
|
+
node bin/orchestra.js validate --json
|
|
29
|
+
node bin/orchestra.js task add --id TASK-1 --title "First task" --owner developer --paths src/commands.ts
|
|
30
|
+
node bin/orchestra.js task list
|
|
31
|
+
node bin/orchestra.js task update --id TASK-1 --status in_progress
|
|
32
|
+
node bin/orchestra.js task deps --id TASK-1
|
|
33
|
+
node bin/orchestra.js graph plan
|
|
34
|
+
node bin/orchestra.js graph plan --json
|
|
35
|
+
node bin/orchestra.js graph run-next
|
|
36
|
+
node bin/orchestra.js graph run-next --json
|
|
37
|
+
node bin/orchestra.js graph run-ready
|
|
38
|
+
node bin/orchestra.js graph run-ready --json
|
|
39
|
+
node bin/orchestra.js lock claim --task TASK-1 --role developer --path src/commands.ts --reason "editing commands"
|
|
40
|
+
node bin/orchestra.js lock list
|
|
41
|
+
node bin/orchestra.js lock release --id lock-123
|
|
42
|
+
node bin/orchestra.js roles list
|
|
43
|
+
node bin/orchestra.js roles list --json
|
|
44
|
+
node bin/orchestra.js readiness --task TASK-1
|
|
45
|
+
node bin/orchestra.js gate --gate architecture --task TASK-1
|
|
46
|
+
node bin/orchestra.js gate --gate qa-release --task TASK-1
|
|
47
|
+
node bin/orchestra.js gate --gate risk-review --task TASK-1
|
|
48
|
+
node bin/orchestra.js gate --gate release-readiness --task TASK-1
|
|
49
|
+
node bin/orchestra.js review list
|
|
50
|
+
node bin/orchestra.js decision add --task TASK-1 --owner architect --title "Use service layer" --context "CLI should stay thin" --decision "Move behavior to services" --consequences "Reusable core"
|
|
51
|
+
node bin/orchestra.js decision list --task TASK-1
|
|
52
|
+
node bin/orchestra.js evidence list
|
|
53
|
+
node bin/orchestra.js usage --task TASK-1
|
|
54
|
+
node bin/orchestra.js usage --task TASK-1 --json
|
|
55
|
+
node bin/orchestra.js budget check --task TASK-1
|
|
56
|
+
node bin/orchestra.js budget check --task TASK-1 --json
|
|
57
|
+
node bin/orchestra.js config show
|
|
58
|
+
node bin/orchestra.js config show --json
|
|
59
|
+
node bin/orchestra.js approvals list --task TASK-1
|
|
60
|
+
node bin/orchestra.js approvals show --id TASK-1-budget-fallback
|
|
61
|
+
node bin/orchestra.js approvals approve --id TASK-1-budget-fallback --approver "user" --rationale "approved cheaper fallback"
|
|
62
|
+
node bin/orchestra.js approvals reject --id TASK-1-budget-fallback --approver "user" --rationale "not approved"
|
|
63
|
+
node bin/orchestra.js summary
|
|
64
|
+
node bin/orchestra.js summary --json
|
|
65
|
+
node bin/orchestra.js pr-summary --task TASK-1
|
|
66
|
+
node bin/orchestra.js pr-summary --task TASK-1 --json
|
|
67
|
+
node bin/orchestra.js context --task TASK-1
|
|
68
|
+
node bin/orchestra.js context --task TASK-1 --json
|
|
69
|
+
node bin/orchestra.js plan --task TASK-1
|
|
70
|
+
node bin/orchestra.js plan --task TASK-1 --json
|
|
71
|
+
node bin/orchestra.js run --task TASK-1
|
|
72
|
+
node bin/orchestra.js run --task TASK-1 --json
|
|
73
|
+
node bin/orchestra.js run --task TASK-1 --approve-budget-fallback --approver "user" --rationale "approved cheaper fallback"
|
|
74
|
+
node bin/orchestra.js run --task TASK-1 --json # consumes stored approved budget fallback when present
|
|
75
|
+
node bin/orchestra.js playwright plan --task TASK-1
|
|
76
|
+
node bin/orchestra.js playwright plan --task TASK-1 --json
|
|
77
|
+
node bin/orchestra.js playwright evidence --task TASK-1 --kind trace --path trace.zip --summary "trace captured"
|
|
78
|
+
node bin/orchestra.js model providers
|
|
79
|
+
node bin/orchestra.js model providers --json
|
|
80
|
+
node bin/orchestra.js model set-role --role developer --provider openai --model gpt-example
|
|
81
|
+
node bin/orchestra.js model complete-fake --provider primary --model fake-model --prompt "hello" --fallbacks backup --fail-provider primary
|
|
82
|
+
node bin/orchestra.js model provenance add --task TASK-1 --role developer --provider openai --model gpt-example --prompt-id prompt-1 --response-id response-1 --finish-reason stop
|
|
83
|
+
node bin/orchestra.js model provenance list --task TASK-1 --json
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Workflow Files
|
|
87
|
+
|
|
88
|
+
```text
|
|
89
|
+
.agent-workflow/
|
|
90
|
+
config.json
|
|
91
|
+
roles.json
|
|
92
|
+
tasks.json
|
|
93
|
+
locks.json
|
|
94
|
+
events.jsonl
|
|
95
|
+
approvals/
|
|
96
|
+
decisions/
|
|
97
|
+
handoffs/
|
|
98
|
+
evidence/
|
|
99
|
+
reviews/
|
|
100
|
+
runs/
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
## Stable JSON Contracts
|
|
105
|
+
|
|
106
|
+
The VS Code Control Center should consume stable JSON outputs instead of parsing human-readable text or duplicating file reads. Current UI-facing commands include:
|
|
107
|
+
|
|
108
|
+
- `orchestra status --json`
|
|
109
|
+
- `orchestra validate --json`
|
|
110
|
+
- `orchestra graph plan --json`
|
|
111
|
+
- `orchestra summary --json`
|
|
112
|
+
- `orchestra context --task <id> --json`
|
|
113
|
+
- `orchestra approvals list --json`
|
|
114
|
+
- `orchestra evidence list --json`
|
|
115
|
+
- `orchestra roles list --json`
|
|
116
|
+
- `orchestra config show --json`
|
|
117
|
+
|
|
118
|
+
The role catalog JSON includes capabilities, required handoff fields, blocking authority, activation criteria, expected evidence, and gate participation.
|
|
119
|
+
|
|
120
|
+
## Role Activation
|
|
121
|
+
|
|
122
|
+
Open Orchestra initializes a broad role catalog but does not require every role to participate in every task. The parent/orchestrator should activate roles based on task type, risk, touched paths, impact areas, and gate requirements.
|
|
123
|
+
|
|
124
|
+
Default roles include delivery roles such as Product Manager, Product Owner, Business Analyst, Architect, Developer, QA, Security, DevOps, SRE, DBA, UX/UI Designer, Release Manager, Compliance/Privacy, and Technical Writer. They also include orchestration roles for modern multi-agent systems: Planner, Reviewer/Critic, Toolsmith, Context Curator, Policy/Governance, Observability/Incident Response, Data/Privacy Officer, Domain Expert, UX Researcher/Accessibility Reviewer, Performance Engineer, and Game Designer.
|
|
125
|
+
|
|
126
|
+
Each default role declares:
|
|
127
|
+
|
|
128
|
+
- activation criteria for when the role should be used;
|
|
129
|
+
- expected evidence that should be produced or reviewed;
|
|
130
|
+
- gate participation for readiness, architecture, QA, risk, or release decisions.
|
|
131
|
+
|
|
132
|
+
This keeps collaboration explicit even when the underlying LLM provider does not support native subagent-to-subagent communication.
|
|
133
|
+
|
|
134
|
+
## VS Code Control Center
|
|
135
|
+
|
|
136
|
+
The VS Code Control Center scaffold is under `extensions/vscode-open-orchestra`. It follows the same boundary as the CLI: extension UI calls stable JSON commands and renders results, while workflow state remains in Open Orchestra services and files. The first supported commands are status, validate, graph plan, summary, roles, approvals, evidence, config inspection, and Playwright evidence attachment.
|
|
137
|
+
|
|
138
|
+
## Reusable Core
|
|
139
|
+
|
|
140
|
+
- `src/types.ts` owns workflow domain contracts shared by CLI and future adapters.
|
|
141
|
+
- `src/workflow-services.ts` owns reusable workflow behavior for tasks, locks, readiness, handoffs, reviews, evidence, status, and summary.
|
|
142
|
+
- `orchestra task deps` checks whether task dependencies are approved or done before downstream work starts.
|
|
143
|
+
- `orchestra graph plan` classifies current tasks as dependency-ready, blocked, or complete before execution.
|
|
144
|
+
- `orchestra graph run-next` executes the first dependency-ready task through the existing run flow.
|
|
145
|
+
- `orchestra graph run-ready` executes the current dependency-ready task snapshot sequentially through the existing run flow.
|
|
146
|
+
- Graph scheduling omits tasks with active task locks or path conflicts and reports them as locked.
|
|
147
|
+
- Graph batch runs write summary artifacts under `.agent-workflow/runs/batches/`.
|
|
148
|
+
- `orchestra decision` records ADR-like decision artifacts and append-only events for task context.
|
|
149
|
+
- `orchestra context` aggregates task state, dependencies, locks, decisions, handoffs, reviews, evidence, gates, model provenance, and risks before an agent starts work.
|
|
150
|
+
- `orchestra plan` generates an ordered parent-agent execution plan by role without invoking model providers.
|
|
151
|
+
- `orchestra run` checks task dependencies and usage budgets before execution, then can execute an approved budget fallback with the deterministic fake provider, recording step events and model provenance without network calls.
|
|
152
|
+
- Budget fallback requests write reviewable approval proposals under `.agent-workflow/approvals/`.
|
|
153
|
+
- Stored approved budget fallback proposals can be consumed by a later `orchestra run` without repeating approval flags.
|
|
154
|
+
- Run steps write artifacts under `.agent-workflow/runs/<task>/` so downstream agents can consume prior role output.
|
|
155
|
+
- `orchestra pr-summary` generates PR or release-review context from task state, evidence, reviews, gates, handoffs, locks, rollout notes, and rollback notes.
|
|
156
|
+
- `orchestra playwright plan` generates deterministic Playwright test plans from task acceptance criteria and QA context.
|
|
157
|
+
- `orchestra playwright evidence` attaches Playwright screenshots, traces, videos, or reports through the existing evidence workflow.
|
|
158
|
+
- `orchestra usage` aggregates request counts, token totals, and estimated model cost from recorded model provenance events.
|
|
159
|
+
- `orchestra budget check` enforces configured usage budgets from local provenance events and fails when limits are exceeded.
|
|
160
|
+
- `orchestra approvals` lists, shows, approves, and rejects approval proposal artifacts through append-only decision events.
|
|
161
|
+
- `src/workflow-gates.ts` owns reusable gate evaluators. Current gates include `architecture`, which blocks non-trivial work until proposal, user approval, and architect approval are present; `qa-release`, which requires QA plan, execution status, evidence, and no unresolved critical QA block; `risk-review`, which maps impact areas to required risk-owner reviews; and `release-readiness`, which composes readiness, architecture, QA, risk, evidence, handoff, review, and lock checks.
|
|
162
|
+
- `src/model-providers.ts` owns provider-agnostic model contracts, an in-memory provider registry, and a fake provider for deterministic tests.
|
|
163
|
+
- `orchestra model providers` lists configured provider routing without invoking real model APIs.
|
|
164
|
+
- `orchestra model set-role` configures provider/model routing per role without invoking real model APIs.
|
|
165
|
+
- `orchestra model complete-fake` simulates provider fallback behavior without invoking real model APIs.
|
|
166
|
+
- `orchestra model provenance` records model usage metadata in the append-only event log without storing raw prompts, raw responses, or secrets.
|
|
167
|
+
- `src/commands.ts` is the CLI adapter: it parses command options, delegates to services, and renders terminal output.
|
|
168
|
+
- Services accept an explicit repo root, so future web, GitHub Actions, Playwright, or multi-model orchestration layers can reuse the same core without depending on `process.cwd()`.
|
|
169
|
+
|
|
170
|
+
## Current Scope
|
|
171
|
+
|
|
172
|
+
- No real LLM calls.
|
|
173
|
+
- No automatic code editing.
|
|
174
|
+
- No Playwright generation yet.
|
|
175
|
+
- Python workers are represented in config only and disabled by default.
|
|
176
|
+
- Static analysis is enforced locally through `.githooks/pre-commit` after running `npm run hooks:install`.
|
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@jterrats/open-orchestra",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"bin": {
|
|
6
|
+
"orchestra": "bin/orchestra.js"
|
|
7
|
+
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"build": "tsc",
|
|
10
|
+
"typecheck": "tsc --noEmit",
|
|
11
|
+
"test": "npm run build && node --test test/**/*.js extensions/**/*.test.cjs",
|
|
12
|
+
"lint": "eslint . && prettier --check \"{bin,scripts,test}/**/*.js\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.json\"",
|
|
13
|
+
"format": "prettier --write \"{bin,scripts,test}/**/*.js\" \"extensions/**/*.{cjs,json,md}\" \"src/**/*.ts\" \"*.json\"",
|
|
14
|
+
"secret-scan": "node scripts/secret-scan.js",
|
|
15
|
+
"validate:workflow": "sh -c 'test ! -d .agent-workflow || (npm run build && node bin/orchestra.js validate)'",
|
|
16
|
+
"precommit": "npm run lint && npm run typecheck && npm run secret-scan && npm test && npm run validate:workflow",
|
|
17
|
+
"hooks:install": "git config core.hooksPath .githooks"
|
|
18
|
+
},
|
|
19
|
+
"engines": {
|
|
20
|
+
"node": ">=22"
|
|
21
|
+
},
|
|
22
|
+
"devDependencies": {
|
|
23
|
+
"@eslint/js": "^10.0.1",
|
|
24
|
+
"@types/node": "^25.6.0",
|
|
25
|
+
"eslint": "^10.2.1",
|
|
26
|
+
"prettier": "^3.8.3",
|
|
27
|
+
"typescript": "^6.0.3",
|
|
28
|
+
"typescript-eslint": "^8.59.0"
|
|
29
|
+
},
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"chart.js": "^4.5.1"
|
|
32
|
+
},
|
|
33
|
+
"description": "Local control plane for AI-assisted development orchestration, evidence gates, and agent workflows.",
|
|
34
|
+
"repository": {
|
|
35
|
+
"type": "git",
|
|
36
|
+
"url": "git+https://github.com/jterrats/open-orchestra.git"
|
|
37
|
+
},
|
|
38
|
+
"homepage": "https://open-orchestra.jterrats.dev",
|
|
39
|
+
"bugs": {
|
|
40
|
+
"url": "https://github.com/jterrats/open-orchestra/issues"
|
|
41
|
+
},
|
|
42
|
+
"license": "MIT",
|
|
43
|
+
"files": [
|
|
44
|
+
"bin/",
|
|
45
|
+
"dist/",
|
|
46
|
+
"rules/",
|
|
47
|
+
"docs/",
|
|
48
|
+
"AGENTS.md",
|
|
49
|
+
"CLAUDE.md",
|
|
50
|
+
"README.md",
|
|
51
|
+
"package.json"
|
|
52
|
+
],
|
|
53
|
+
"publishConfig": {
|
|
54
|
+
"access": "public"
|
|
55
|
+
},
|
|
56
|
+
"keywords": [
|
|
57
|
+
"ai-agents",
|
|
58
|
+
"agent-orchestration",
|
|
59
|
+
"workflow-gates",
|
|
60
|
+
"playwright",
|
|
61
|
+
"developer-tools"
|
|
62
|
+
]
|
|
63
|
+
}
|