@kbediako/codex-orchestrator 0.1.33 → 0.1.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -232,6 +232,19 @@ codex-orchestrator doctor --usage
232
232
  ```
233
233
  `doctor --usage` prints adoption KPIs (advanced/cloud/rlm/collab/delegation coverage), and per-run `run-summary.json` now includes a `usageKpi` section plus cloud fallback metadata when preflight downgrades to MCP.
234
234
 
235
+ Issue bundle logging (downstream dogfooding / repro handoff):
236
+ ```bash
237
+ codex-orchestrator doctor --issue-log --issue-title "Observed failure" --issue-notes "what happened"
238
+ ```
239
+ `doctor --issue-log` appends `docs/codex-orchestrator-issues.md` (override via `--issue-log-path`) and writes a JSON bundle under `out/<resolved-task>/doctor/issue-bundles/` with doctor/cloud context (latest run context is included when available).
240
+
241
+ Auto-capture issue bundles when runs fail:
242
+ ```bash
243
+ codex-orchestrator start <pipeline> --auto-issue-log
244
+ codex-orchestrator flow --task <task-id> --auto-issue-log
245
+ ```
246
+ This captures both post-manifest run failures and setup failures that occur before a run manifest is created (for example strict repo-config enforcement).
247
+
235
248
  Cloud preflight check (without starting a pipeline):
236
249
  ```bash
237
250
  codex-orchestrator doctor --cloud-preflight
@@ -243,10 +256,13 @@ codex-orchestrator doctor --cloud-preflight
243
256
  - Enable required MCP servers with least privilege: `codex-orchestrator mcp enable --servers delegation --yes` (plan with `--format json`; omit `--servers` only when you intentionally want all disabled servers enabled; env/secret values are redacted in displayed command lines)
244
257
  - Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
245
258
  - Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
259
+ - Capture reproducible downstream failures: `codex-orchestrator doctor --issue-log --issue-title "<title>" --issue-notes "<notes>"`
260
+ - Auto-capture failed run issue bundles: `codex-orchestrator start <pipeline> --auto-issue-log` or `codex-orchestrator flow --auto-issue-log`
246
261
  - Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
247
262
  - Collab (symbolic RLM subagents): `codex-orchestrator rlm --multi-agent auto "<goal>"` (legacy alias: `--collab auto`; requires Codex `features.multi_agent=true`)
248
263
  - Cloud: set `CODEX_CLOUD_ENV_ID` (and optional `CODEX_CLOUD_BRANCH`), then run: `codex-orchestrator start <pipeline> --cloud --target <stage-id>`
249
264
  - Cloud fail-fast (avoid fallback reliance): set `CODEX_ORCHESTRATOR_CLOUD_FALLBACK=deny`
265
+ - Repo-config fail-fast (deny packaged config fallback): set `CODEX_ORCHESTRATOR_REPO_CONFIG_REQUIRED=1` or pass `--repo-config-required`
250
266
  - Cloud status retry tuning (optional): `CODEX_CLOUD_STATUS_RETRY_LIMIT`, `CODEX_CLOUD_STATUS_RETRY_BACKOFF_MS`
251
267
 
252
268
  Print DevTools MCP setup guidance:
@@ -256,11 +272,11 @@ codex-orchestrator devtools setup
256
272
 
257
273
  ## Common commands
258
274
 
259
- - `codex-orchestrator start <pipeline>` — run a pipeline.
260
- - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence.
275
+ - `codex-orchestrator start <pipeline>` — run a pipeline (add `--auto-issue-log` for automatic failure bundle capture; add `--repo-config-required` for strict repo-local config mode).
276
+ - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence (supports `--auto-issue-log` and `--repo-config-required`).
261
277
  - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
262
278
  - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
263
- - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
279
+ - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`, `codex.orchestrator.json`) into a repo.
264
280
  - `codex-orchestrator setup --yes` — install bundled skills and configure delegation + DevTools wiring (add `--refresh-skills` to overwrite existing skills in `$CODEX_HOME/skills`).
265
281
  - `codex-orchestrator init codex --codex-cli --yes --codex-source <path>` — optionally provision a CO-managed Codex CLI binary (build-from-source default; set `CODEX_CLI_SOURCE` to avoid passing `--codex-source` every time, and `CODEX_CLI_USE_MANAGED=1` to route runs to it).
266
282
  - `codex-orchestrator init codex --codex-cli --yes --codex-download-url <url> --codex-download-sha256 <sha>` — opt-in to a prebuilt Codex CLI download.
@@ -0,0 +1,448 @@
1
+ {
2
+ "stageSets": {
3
+ "build-lint-test": [
4
+ {
5
+ "kind": "command",
6
+ "id": "build",
7
+ "title": "npm run build",
8
+ "command": "npm run build"
9
+ },
10
+ {
11
+ "kind": "command",
12
+ "id": "lint",
13
+ "title": "npm run lint",
14
+ "command": "npm run lint"
15
+ },
16
+ {
17
+ "kind": "command",
18
+ "id": "test",
19
+ "title": "npm run test",
20
+ "command": "npm run test"
21
+ }
22
+ ],
23
+ "delegation-guard-stage": [
24
+ {
25
+ "kind": "command",
26
+ "id": "delegation-guard",
27
+ "title": "Run delegation guard",
28
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/delegationGuardRunner.js\""
29
+ }
30
+ ],
31
+ "diagnostics-spec-guard": [
32
+ {
33
+ "kind": "command",
34
+ "id": "spec-guard",
35
+ "title": "node scripts/spec-guard.mjs --dry-run",
36
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run"
37
+ }
38
+ ],
39
+ "docs-review-checks": [
40
+ {
41
+ "kind": "command",
42
+ "id": "docs-check",
43
+ "title": "npm run docs:check",
44
+ "command": "npm run docs:check"
45
+ },
46
+ {
47
+ "kind": "command",
48
+ "id": "docs-freshness",
49
+ "title": "npm run docs:freshness",
50
+ "command": "npm run docs:freshness"
51
+ }
52
+ ],
53
+ "design-artifacts": [
54
+ {
55
+ "kind": "command",
56
+ "id": "design-spec-guard",
57
+ "title": "Validate specs via spec-guard",
58
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/utils/specGuardRunner.js\" --dry-run",
59
+ "env": {
60
+ "DESIGN_PIPELINE": "1"
61
+ },
62
+ "summaryHint": "Ensures design specs are fresh before artifact write"
63
+ },
64
+ {
65
+ "kind": "command",
66
+ "id": "design-artifact-writer",
67
+ "title": "Persist design artifact manifests",
68
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/write-artifacts.js\"",
69
+ "env": {
70
+ "DESIGN_PIPELINE": "1"
71
+ }
72
+ }
73
+ ]
74
+ },
75
+ "pipelines": [
76
+ {
77
+ "id": "diagnostics",
78
+ "title": "Diagnostics Pipeline",
79
+ "description": "Build, lint, test, and spec-guard the repository with grouped runner support.",
80
+ "tags": [
81
+ "diagnostics-primary",
82
+ "diagnostics-secondary"
83
+ ],
84
+ "stages": [
85
+ {
86
+ "kind": "stage-set",
87
+ "ref": "delegation-guard-stage"
88
+ },
89
+ {
90
+ "kind": "stage-set",
91
+ "ref": "build-lint-test"
92
+ },
93
+ {
94
+ "kind": "stage-set",
95
+ "ref": "diagnostics-spec-guard"
96
+ }
97
+ ]
98
+ },
99
+ {
100
+ "id": "rlm",
101
+ "title": "RLM Runner",
102
+ "description": "Runs the recursive language model loop with validator gating.",
103
+ "tags": [
104
+ "rlm"
105
+ ],
106
+ "guardrailsRequired": false,
107
+ "stages": [
108
+ {
109
+ "kind": "command",
110
+ "id": "rlm-runner",
111
+ "title": "Run RLM loop",
112
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/rlmRunner.js\"",
113
+ "summaryHint": "RLM loop completed"
114
+ }
115
+ ]
116
+ },
117
+ {
118
+ "id": "implementation-gate",
119
+ "title": "Implementation Complete Gate",
120
+ "description": "Runs the required implementation validations (spec-guard, build, lint, test, docs:check, docs:freshness, diff-budget) and launches the Codex review handoff, writing a single manifest for evidence.",
121
+ "tags": [
122
+ "implementation-gate"
123
+ ],
124
+ "stages": [
125
+ {
126
+ "kind": "stage-set",
127
+ "ref": "delegation-guard-stage"
128
+ },
129
+ {
130
+ "kind": "stage-set",
131
+ "ref": "diagnostics-spec-guard"
132
+ },
133
+ {
134
+ "kind": "stage-set",
135
+ "ref": "build-lint-test"
136
+ },
137
+ {
138
+ "kind": "stage-set",
139
+ "ref": "docs-review-checks"
140
+ },
141
+ {
142
+ "kind": "command",
143
+ "id": "diff-budget",
144
+ "title": "node scripts/diff-budget.mjs",
145
+ "command": "node scripts/diff-budget.mjs"
146
+ },
147
+ {
148
+ "kind": "command",
149
+ "id": "review",
150
+ "title": "npm run review",
151
+ "command": "npm run review",
152
+ "env": {
153
+ "DIFF_BUDGET_STAGE": "1",
154
+ "CODEX_REVIEW_NON_INTERACTIVE": "1",
155
+ "NOTES": "Goal: implementation gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
156
+ }
157
+ }
158
+ ]
159
+ },
160
+ {
161
+ "id": "docs-review",
162
+ "title": "Docs Review Gate",
163
+ "description": "Pre-implementation docs review: spec-guard, docs:check, docs:freshness, and review (diff budget skipped).",
164
+ "tags": [
165
+ "docs-review"
166
+ ],
167
+ "stages": [
168
+ {
169
+ "kind": "stage-set",
170
+ "ref": "delegation-guard-stage"
171
+ },
172
+ {
173
+ "kind": "stage-set",
174
+ "ref": "diagnostics-spec-guard"
175
+ },
176
+ {
177
+ "kind": "stage-set",
178
+ "ref": "docs-review-checks"
179
+ },
180
+ {
181
+ "kind": "command",
182
+ "id": "review",
183
+ "title": "npm run review",
184
+ "command": "npm run review",
185
+ "env": {
186
+ "SKIP_DIFF_BUDGET": "1",
187
+ "CODEX_REVIEW_NON_INTERACTIVE": "1",
188
+ "NOTES": "Goal: docs review gate review handoff | Summary: automated prompt with manifest evidence + scope hints | Risks: review output depends on local Codex CLI capabilities"
189
+ }
190
+ }
191
+ ]
192
+ },
193
+ {
194
+ "id": "frontend-testing",
195
+ "title": "Frontend Testing",
196
+ "description": "Runs the frontend testing runner (DevTools off by default).",
197
+ "tags": [
198
+ "frontend-testing"
199
+ ],
200
+ "guardrailsRequired": false,
201
+ "stages": [
202
+ {
203
+ "kind": "command",
204
+ "id": "frontend-testing",
205
+ "title": "Run frontend testing",
206
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/orchestrator/src/cli/frontendTestingRunner.js\"",
207
+ "env": {
208
+ "CODEX_NON_INTERACTIVE": "1"
209
+ }
210
+ }
211
+ ]
212
+ },
213
+ {
214
+ "id": "diagnostics-with-eval",
215
+ "title": "Diagnostics Pipeline (with Eval Harness)",
216
+ "description": "Build, lint, test, run evaluation harness, and verify specs for guardrail evidence capture.",
217
+ "stages": [
218
+ {
219
+ "kind": "stage-set",
220
+ "ref": "delegation-guard-stage"
221
+ },
222
+ {
223
+ "kind": "stage-set",
224
+ "ref": "build-lint-test"
225
+ },
226
+ {
227
+ "kind": "command",
228
+ "id": "eval-test",
229
+ "title": "npm run eval:test",
230
+ "command": "npm run eval:test"
231
+ },
232
+ {
233
+ "kind": "stage-set",
234
+ "ref": "diagnostics-spec-guard"
235
+ }
236
+ ]
237
+ },
238
+ {
239
+ "id": "design-reference",
240
+ "title": "Design Reference Pipeline",
241
+ "description": "Extracts design reference assets, stages Storybook-ready components, and records manifest evidence.",
242
+ "tags": [
243
+ "design",
244
+ "reference"
245
+ ],
246
+ "stages": [
247
+ {
248
+ "kind": "stage-set",
249
+ "ref": "delegation-guard-stage"
250
+ },
251
+ {
252
+ "kind": "command",
253
+ "id": "design-config",
254
+ "title": "Resolve design configuration",
255
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
256
+ "env": {
257
+ "DESIGN_PIPELINE": "1"
258
+ }
259
+ },
260
+ {
261
+ "kind": "command",
262
+ "id": "design-extract",
263
+ "title": "Run Playwright design extractor",
264
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/extract.js\"",
265
+ "env": {
266
+ "DESIGN_PIPELINE": "1"
267
+ }
268
+ },
269
+ {
270
+ "kind": "command",
271
+ "id": "design-reference",
272
+ "title": "Build motherduck reference page",
273
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/reference.js\"",
274
+ "env": {
275
+ "DESIGN_PIPELINE": "1"
276
+ }
277
+ },
278
+ {
279
+ "kind": "command",
280
+ "id": "design-componentize",
281
+ "title": "Componentize artifacts via packages/design-system",
282
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/componentize.js\"",
283
+ "env": {
284
+ "DESIGN_PIPELINE": "1"
285
+ }
286
+ },
287
+ {
288
+ "kind": "command",
289
+ "id": "design-advanced-assets",
290
+ "title": "Generate advanced design assets",
291
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
292
+ "env": {
293
+ "DESIGN_PIPELINE": "1"
294
+ },
295
+ "allowFailure": true,
296
+ "summaryHint": "Optional Framer Motion and FFmpeg assets"
297
+ },
298
+ {
299
+ "kind": "command",
300
+ "id": "design-visual-regression",
301
+ "title": "Run visual regression tests",
302
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/visual-regression.js\"",
303
+ "env": {
304
+ "DESIGN_PIPELINE": "1"
305
+ },
306
+ "allowFailure": true,
307
+ "summaryHint": "Visual regression diffs stored under design/visual-regression/"
308
+ },
309
+ {
310
+ "kind": "stage-set",
311
+ "ref": "design-artifacts"
312
+ }
313
+ ]
314
+ },
315
+ {
316
+ "id": "hi-fi-design-toolkit",
317
+ "title": "Hi-Fi Design Toolkit",
318
+ "description": "Runs the hi-fi design toolkit pipeline to extract, tokenize, self-correct, and publish design artifacts.",
319
+ "tags": [
320
+ "design",
321
+ "hi-fi"
322
+ ],
323
+ "stages": [
324
+ {
325
+ "kind": "stage-set",
326
+ "ref": "delegation-guard-stage"
327
+ },
328
+ {
329
+ "kind": "command",
330
+ "id": "design-config",
331
+ "title": "Resolve design configuration",
332
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/prepare.js\"",
333
+ "env": {
334
+ "DESIGN_PIPELINE": "1",
335
+ "DESIGN_TOOLKIT": "1"
336
+ }
337
+ },
338
+ {
339
+ "kind": "command",
340
+ "id": "design-toolkit-extract",
341
+ "title": "Wrap external toolkit extractor",
342
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/extract.js\"",
343
+ "env": {
344
+ "DESIGN_PIPELINE": "1",
345
+ "DESIGN_TOOLKIT": "1"
346
+ }
347
+ },
348
+ {
349
+ "kind": "command",
350
+ "id": "design-toolkit-tokens",
351
+ "title": "Generate tokens and style guides",
352
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/tokens.js\"",
353
+ "env": {
354
+ "DESIGN_PIPELINE": "1",
355
+ "DESIGN_TOOLKIT": "1"
356
+ }
357
+ },
358
+ {
359
+ "kind": "command",
360
+ "id": "design-toolkit-reference",
361
+ "title": "Build reference pages + self-correction",
362
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/reference.js\"",
363
+ "env": {
364
+ "DESIGN_PIPELINE": "1",
365
+ "DESIGN_TOOLKIT": "1"
366
+ }
367
+ },
368
+ {
369
+ "kind": "command",
370
+ "id": "design-advanced-assets",
371
+ "title": "Generate advanced design assets",
372
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/advanced-assets.js\"",
373
+ "env": {
374
+ "DESIGN_PIPELINE": "1",
375
+ "DESIGN_TOOLKIT": "1"
376
+ },
377
+ "allowFailure": true,
378
+ "summaryHint": "Optional motion capture via Framer Motion + FFmpeg"
379
+ },
380
+ {
381
+ "kind": "command",
382
+ "id": "design-toolkit-publish",
383
+ "title": "Publish toolkit outputs to packages/design-system",
384
+ "command": "node \"$CODEX_ORCHESTRATOR_PACKAGE_ROOT/dist/scripts/design/pipeline/toolkit/publish.js\"",
385
+ "env": {
386
+ "DESIGN_PIPELINE": "1",
387
+ "DESIGN_TOOLKIT": "1"
388
+ }
389
+ },
390
+ {
391
+ "kind": "stage-set",
392
+ "ref": "design-artifacts"
393
+ }
394
+ ]
395
+ },
396
+ {
397
+ "id": "tfgrpo-learning",
398
+ "title": "TF-GRPO Learning Loop",
399
+ "description": "Run three TF-GRPO epochs (~100 samples, train temp 0.7, eval temp 0.3, G>=2) and capture guardrail evidence.",
400
+ "tags": [
401
+ "tfgrpo-learning",
402
+ "tfgrpo-learning-secondary"
403
+ ],
404
+ "stages": [
405
+ {
406
+ "kind": "stage-set",
407
+ "ref": "delegation-guard-stage"
408
+ },
409
+ {
410
+ "kind": "command",
411
+ "id": "tfgrpo-loop",
412
+ "title": "tfgrpo learning schedule (3 epochs, 100 samples)",
413
+ "command": "TFGRPO_GROUP_SIZE=2 TFGRPO_REWARDERS=gt,relative TFGRPO_EPOCHS=3 TFGRPO_SAMPLE_SIZE=100 TFGRPO_TRAIN_TEMP=0.7 TFGRPO_EVAL_TEMP=0.3 node --loader ts-node/esm evaluation/harness/scripts/tfgrpo-runner.ts"
414
+ },
415
+ {
416
+ "kind": "stage-set",
417
+ "ref": "diagnostics-spec-guard"
418
+ }
419
+ ]
420
+ },
421
+ {
422
+ "id": "pause-eval",
423
+ "title": "Pause/Resume Eval",
424
+ "description": "Utility pipeline for long pause/resume evaluations (sleep + resume marker).",
425
+ "tags": [
426
+ "eval",
427
+ "pause-resume"
428
+ ],
429
+ "guardrailsRequired": false,
430
+ "stages": [
431
+ {
432
+ "kind": "command",
433
+ "id": "pause-window",
434
+ "title": "Sleep to allow pause",
435
+ "command": "sleep 120",
436
+ "summaryHint": "Pause window elapsed"
437
+ },
438
+ {
439
+ "kind": "command",
440
+ "id": "resume-marker",
441
+ "title": "Resume marker",
442
+ "command": "node -e \"console.log('resume-ok')\"",
443
+ "summaryHint": "Resume marker written"
444
+ }
445
+ ]
446
+ }
447
+ ]
448
+ }