agent-conveyor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/README.md +1123 -0
  2. package/dist/cli/main.d.ts +2 -0
  3. package/dist/cli/main.js +19 -0
  4. package/dist/cli/main.js.map +1 -0
  5. package/dist/cli/program-name.d.ts +2 -0
  6. package/dist/cli/program-name.js +12 -0
  7. package/dist/cli/program-name.js.map +1 -0
  8. package/dist/cli/typescript-runtime.d.ts +52 -0
  9. package/dist/cli/typescript-runtime.js +18009 -0
  10. package/dist/cli/typescript-runtime.js.map +1 -0
  11. package/dist/index.d.ts +37 -0
  12. package/dist/index.js +20 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/runtime/audit.d.ts +96 -0
  15. package/dist/runtime/audit.js +298 -0
  16. package/dist/runtime/audit.js.map +1 -0
  17. package/dist/runtime/classify.d.ts +8 -0
  18. package/dist/runtime/classify.js +128 -0
  19. package/dist/runtime/classify.js.map +1 -0
  20. package/dist/runtime/codex-session.d.ts +103 -0
  21. package/dist/runtime/codex-session.js +408 -0
  22. package/dist/runtime/codex-session.js.map +1 -0
  23. package/dist/runtime/commands.d.ts +92 -0
  24. package/dist/runtime/commands.js +408 -0
  25. package/dist/runtime/commands.js.map +1 -0
  26. package/dist/runtime/dispatch.d.ts +74 -0
  27. package/dist/runtime/dispatch.js +669 -0
  28. package/dist/runtime/dispatch.js.map +1 -0
  29. package/dist/runtime/export.d.ts +22 -0
  30. package/dist/runtime/export.js +77 -0
  31. package/dist/runtime/export.js.map +1 -0
  32. package/dist/runtime/ingest.d.ts +28 -0
  33. package/dist/runtime/ingest.js +177 -0
  34. package/dist/runtime/ingest.js.map +1 -0
  35. package/dist/runtime/loop-evidence.d.ts +87 -0
  36. package/dist/runtime/loop-evidence.js +448 -0
  37. package/dist/runtime/loop-evidence.js.map +1 -0
  38. package/dist/runtime/manager-config.d.ts +20 -0
  39. package/dist/runtime/manager-config.js +34 -0
  40. package/dist/runtime/manager-config.js.map +1 -0
  41. package/dist/runtime/manager-permissions.d.ts +7 -0
  42. package/dist/runtime/manager-permissions.js +85 -0
  43. package/dist/runtime/manager-permissions.js.map +1 -0
  44. package/dist/runtime/notifications.d.ts +89 -0
  45. package/dist/runtime/notifications.js +208 -0
  46. package/dist/runtime/notifications.js.map +1 -0
  47. package/dist/runtime/replay.d.ts +29 -0
  48. package/dist/runtime/replay.js +331 -0
  49. package/dist/runtime/replay.js.map +1 -0
  50. package/dist/runtime/tasks.d.ts +54 -0
  51. package/dist/runtime/tasks.js +195 -0
  52. package/dist/runtime/tasks.js.map +1 -0
  53. package/dist/runtime/tmux.d.ts +61 -0
  54. package/dist/runtime/tmux.js +189 -0
  55. package/dist/runtime/tmux.js.map +1 -0
  56. package/dist/runtime/visual-diff.d.ts +23 -0
  57. package/dist/runtime/visual-diff.js +234 -0
  58. package/dist/runtime/visual-diff.js.map +1 -0
  59. package/dist/state/database.d.ts +21 -0
  60. package/dist/state/database.js +142 -0
  61. package/dist/state/database.js.map +1 -0
  62. package/dist/state/files.d.ts +38 -0
  63. package/dist/state/files.js +73 -0
  64. package/dist/state/files.js.map +1 -0
  65. package/dist/state/schema-v22.d.ts +1 -0
  66. package/dist/state/schema-v22.js +566 -0
  67. package/dist/state/schema-v22.js.map +1 -0
  68. package/dist/state/sqlite-contract.d.ts +4 -0
  69. package/dist/state/sqlite-contract.js +78 -0
  70. package/dist/state/sqlite-contract.js.map +1 -0
  71. package/dist/state/status.d.ts +12 -0
  72. package/dist/state/status.js +40 -0
  73. package/dist/state/status.js.map +1 -0
  74. package/docs/typescript-migration/cli-contract.md +147 -0
  75. package/docs/typescript-migration/dashboard-contract.md +76 -0
  76. package/docs/typescript-migration/package-install-contract.md +98 -0
  77. package/docs/typescript-migration/qa-gate-matrix.md +103 -0
  78. package/docs/typescript-migration/sqlite-state-contract.md +92 -0
  79. package/docs/typescript-migration/t005-runtime-parity.md +47 -0
  80. package/package.json +88 -0
  81. package/scripts/capture-static-html-screenshot.mjs +88 -0
  82. package/skills/codex-review/SKILL.md +116 -0
  83. package/skills/codex-review/scripts/codex-review +344 -0
  84. package/skills/manage-codex-workers/SKILL.md +696 -0
  85. package/skills/manage-codex-workers/agents/openai.yaml +5 -0
@@ -0,0 +1,696 @@
1
+ ---
2
+ name: manage-codex-workers
3
+ description: Use when the user asks to set up an Agent Conveyor Ralph loop, register an existing Codex session as a worker or manager, create a supervised task, bind a pair, run observation cycles, send nudges, interrupt busy-waits, finish a task, or audit/replay supervision history.
4
+ ---
5
+
6
+ # Manage Codex Workers
7
+
8
+ Use `conveyor ...` as the primary CLI. It is installed by the `agent-conveyor`
9
+ Python package. The legacy `workerctl` command remains a compatibility alias,
10
+ but skill-driven flows should prefer `conveyor`.
11
+
12
+ ## One-Prompt Codex App Ralph Loop
13
+
14
+ This is the preferred entry point when the user has installed Agent Conveyor
15
+ and wants to use another Codex app session without learning the low-level
16
+ command sequence.
17
+
18
+ User prompt:
19
+
20
+ ```text
21
+ Use the manage-codex-workers skill.
22
+
23
+ Set up a Codex app Ralph loop for issue CTL.
24
+ Worker session: <worker-name or choose one>
25
+ Manager session: <manager-name or choose one>
26
+ Template: <template name, or choose the best one>
27
+ Max iterations: <number, default 3>
28
+ Require adversarial proof before another worker iteration.
29
+ ```
30
+
31
+ Skill behavior:
32
+
33
+ 1. Work from `/Users/neonwatty/Desktop/codex-terminal-manager`.
34
+ 2. Run `conveyor doctor` and `conveyor db-doctor`; fix or
35
+ report blockers.
36
+ 3. Choose concise task, worker, manager, and run names when the user does not
37
+ provide them. Do not ask the user to invent generated names.
38
+ 4. Create the no-tmux binding with `conveyor create-disposable-binding`
39
+ using `--template` when a template is known, `--adversarial`, a bounded
40
+ `--max-iterations`, and `--json`.
41
+ 5. Ensure Dispatch is running or tell the user the single command to start it:
42
+ `conveyor dispatch --watch --dispatcher-id dispatch-local`.
43
+ 6. Read the returned `communication` blocks. A worker or manager with
44
+ `session_kind=tmux` and `receive_style=push` can receive direct tmux pushes;
45
+ one with `session_kind=codex_app` and `receive_style=pull` must poll the
46
+ printed inbox command.
47
+ 7. Give the worker Codex app session the generated `worker_handoff` prompt.
48
+ It should keep polling `conveyor worker-inbox <task> --consume-next --wait
49
+ --timeout 60 --json` through the bounded loop until no inbox item remains
50
+ or `max_iterations` is reached.
51
+ 8. After each worker pass, require concrete evidence and structured
52
+ `loop-evidence adversarial-check` proof before queueing another
53
+ `enqueue-continue-iteration`.
54
+ 9. Use `conveyor loop-status <task> --run <run> --json` and telemetry/audit
55
+ receipts before declaring the loop ready for manager review.
56
+
57
+ Reference docs:
58
+
59
+ - `README.md` command reference
60
+ - `docs/qa/ralph-loop-operator-guide.md`
61
+ - `docs/agent-evidence-playbook.md`
62
+
63
+ ## Supervision Model
64
+
65
+ Supervision is built on three primitives: **sessions**, **tasks**, and
66
+ **bindings**.
67
+
68
+ - A **worker session** is a Codex session registered with Agent Conveyor. It may be a
69
+ tmux-backed session or a Codex app/no-tmux session. Its rollout JSONL on disk
70
+ (`~/.codex/sessions/.../rollout-*.jsonl`, or a disposable rollout file) is
71
+ the source of truth for ingest.
72
+ - A **manager session** is a Codex session that can run anywhere — Ghostty,
73
+ iTerm2, Terminal.app, a web terminal. The manager does not need tmux. Its
74
+ job is to call `conveyor` commands, read their JSON output, and decide what
75
+ to do next.
76
+ - Registration, `sessions`, `discover`, and disposable binding JSON include a
77
+ `communication` block. Use it to decide the receive style for both worker and
78
+ manager: tmux sessions are push-capable, while Codex app/no-tmux sessions
79
+ receive through `manager-inbox` or `worker-inbox` polling.
80
+ - A **task** is a unit of supervised work with a goal.
81
+ - A **binding** ties one worker session and one manager session to one task.
82
+
83
+ The manager Codex drives the supervision loop by calling
84
+ `conveyor cycle <task>` repeatedly. Each cycle ingests new rollout events,
85
+ captures the worker's tmux pane as a shadow signal, persists a `manager_cycles`
86
+ row, and returns structured JSON. The manager reads that JSON and decides.
87
+
88
+ Dispatch is core infrastructure for supervised pairs. The `pair` workflow starts
89
+ a detached Dispatch watch process by default after worker/manager setup and
90
+ bind. For manually bound pairs, keep Dispatch running in another shell with:
91
+
92
+ ```bash
93
+ conveyor dispatch --watch --dispatcher-id dispatch-local
94
+ ```
95
+
96
+ Dispatch wakes the bound manager on worker completion and executes queued
97
+ `notify_manager` / `nudge_worker` commands. It does not decide whether the task
98
+ is correct or finished.
99
+
100
+ ## Preflight
101
+
102
+ 1. Work from the control repo:
103
+ ```bash
104
+ cd /Users/neonwatty/Desktop/codex-terminal-manager
105
+ ```
106
+ 2. Verify dependencies:
107
+ ```bash
108
+ conveyor doctor
109
+ ```
110
+ 3. Verify the SQLite control plane is healthy:
111
+ ```bash
112
+ conveyor db-doctor
113
+ ```
114
+ 4. From the current Codex session, check whether it can register itself:
115
+ ```bash
116
+ conveyor doctor-self
117
+ ```
118
+ `supported: true` means the session is inside a live tmux session and can
119
+ be registered as a worker. A non-tmux session can still be registered as a
120
+ manager.
121
+
122
+ ## Discovery For Q&A
123
+
124
+ When the user asks which worker, manager, task, or binding to connect, search
125
+ the control plane first and present likely choices instead of asking for
126
+ generated names:
127
+
128
+ ```bash
129
+ conveyor discover <query>
130
+ conveyor search <query>
131
+ ```
132
+
133
+ Use an empty query to list active candidates. Add `--all` only when the user is
134
+ looking for completed tasks or gone sessions. The JSON output includes
135
+ `tasks`, `sessions`, `bindings`, `telemetry`, and `suggestions`; use
136
+ `suggestions` to offer concise next steps such as a `conveyor bind` command or
137
+ the prompt to register a missing worker or manager.
138
+
139
+ ## Preferred Manual Handoff Workflow
140
+
141
+ When the user wants to hand off an already-open Codex session, do not start
142
+ with a long `pair` command. Use the skill in each session:
143
+
144
+ 1. In the intended worker session, ask Codex:
145
+ ```text
146
+ Use the manage-codex-workers skill.
147
+
148
+ Register this current Codex session as the worker for this dashboard setup.
149
+
150
+ Dashboard setup code: <setup-code>
151
+ Working directory: <target-repo>
152
+
153
+ Let the skill derive the task and session names from the setup code. Do not
154
+ ask me to type generated worker, manager, or task names.
155
+
156
+ After registration, wait for the manager. Do not start work until the
157
+ manager has created or bound the task and provided acceptance criteria.
158
+ ```
159
+ 2. In a separate manager session, ask Codex:
160
+ ```text
161
+ Use the manage-codex-workers skill.
162
+
163
+ Register this current Codex session as the manager for this dashboard setup.
164
+
165
+ Dashboard setup code: <setup-code>
166
+ Working directory: <target-repo>
167
+ Goal: <goal>
168
+
169
+ Let the skill derive the task and session names from the setup code, find
170
+ the matching worker, create/configure the task if needed, and bind the
171
+ worker and manager.
172
+
173
+ Run cycles, inspect criteria and telemetry, nudge only when useful, require
174
+ evidence, and finish/export the task when done.
175
+ ```
176
+ 3. The manager session should then drive the loop with:
177
+ ```bash
178
+ conveyor cycle <task-name>
179
+ conveyor criteria <task-name> --list
180
+ conveyor telemetry --summary --task <task-name>
181
+ conveyor telemetry --task <task-name>
182
+ conveyor replay <task-name>
183
+ ```
184
+ For `pair`-started workflows, Dispatch is started automatically unless
185
+ `--no-dispatch` is passed. For manually bound pairs, keep
186
+ `conveyor dispatch --watch --dispatcher-id dispatch-local` running
187
+ in a separate shell while the pair is active, or run a bounded verification
188
+ pass with `conveyor dispatch --watch --watch-iterations 2 --dry-run
189
+ --json`.
190
+
191
+ The skill should translate those prompts into explicit `conveyor` commands.
192
+ For the worker, run `doctor-self`; if supported, register the current session
193
+ with `register-worker`. For the manager, register the current session with
194
+ `register-manager`, create/configure the task if needed, then `bind`.
195
+
196
+ When the prompt includes a dashboard setup code, derive names without asking the
197
+ user:
198
+
199
+ ```text
200
+ task: dashboard-<setup-code>
201
+ worker: dashboard-<setup-code>-worker
202
+ manager: dashboard-<setup-code>-manager
203
+ ```
204
+
205
+ If there is already a registered worker for the derived worker name, reuse it
206
+ when binding the manager. If the derived name collides with an active unrelated
207
+ session, append a short suffix yourself and continue; do not ask the user to
208
+ invent names.
209
+
210
+ If the prompt has no setup code and no explicit names, choose concise names from
211
+ the task goal or current date yourself. Ask the user only when the target repo or
212
+ goal is missing or ambiguous.
213
+
214
+ This is the ergonomic manual workflow. Use `pair` only when the user wants
215
+ conveyor to spawn both sessions in one automated command.
216
+
217
+ ## Register Sessions
218
+
219
+ Register an already-running Codex worker (rollout JSONL is auto-discovered
220
+ from the pid via `lsof`):
221
+
222
+ ```bash
223
+ conveyor register-worker --name foo --pid <WORKER_PID> \
224
+ --cwd "$PWD" --tmux-session codex-foo
225
+ ```
226
+
227
+ If `lsof` discovery fails, pass the rollout path explicitly:
228
+
229
+ ```bash
230
+ conveyor register-worker --name foo --pid <WORKER_PID> \
231
+ --cwd "$PWD" --tmux-session codex-foo \
232
+ --codex-session ~/.codex/sessions/.../rollout-...-<uuid>.jsonl
233
+ ```
234
+
235
+ Register a manager (tmux not required):
236
+
237
+ ```bash
238
+ conveyor register-manager --name foo-mgr --pid <MGR_PID> --cwd "$PWD"
239
+ ```
240
+
241
+ For new manager sessions started by Agent Conveyor, prefer `start-manager` or
242
+ `pair`. These send a manager bootstrap prompt to Codex so the rollout JSONL is
243
+ opened during startup and the manager has setup context. In `pair`, the manager
244
+ prompt includes the task name, goal, worker session, `manager-config
245
+ <task> --questions`, and `cycle <task>`.
246
+
247
+ For late attach, pass the known task context directly:
248
+
249
+ ```bash
250
+ conveyor start-manager --name foo-mgr --cwd "$PWD" \
251
+ --task foo-task --task-goal "..." --worker foo-worker
252
+ ```
253
+
254
+ That bootstrap starts with concrete `manager-config`, `cycle`, `manager-ack`,
255
+ and `worker-ack` commands instead of `<task>` placeholders. If manager config
256
+ has already been recorded for the task, the bootstrap tells the manager to start
257
+ with `cycle`.
258
+
259
+ List registered sessions:
260
+
261
+ ```bash
262
+ conveyor sessions
263
+ conveyor sessions --role worker
264
+ conveyor sessions --role manager
265
+ ```
266
+
267
+ ## Create A Task And Bind
268
+
269
+ For automated bootstrap of a fresh supervised worker/manager pair, use `pair`
270
+ instead of manually starting and binding sessions:
271
+
272
+ ```bash
273
+ conveyor pair \
274
+ --task <task-slug> \
275
+ --worker-name <worker-name> \
276
+ --manager-name <manager-name> \
277
+ --dispatcher-id dispatch-pair \
278
+ --cwd <target-repo> \
279
+ --codex-profile yolo \
280
+ --manager-mode strict \
281
+ --task-goal "<one-line goal>" \
282
+ --task-prompt "<worker prompt>" \
283
+ --manager-objective "<manager objective>" \
284
+ --manager-acceptance "<finish criterion>"
285
+ ```
286
+
287
+ Use `--no-dispatch` only for isolated tests or manual Dispatch supervision.
288
+
289
+ Use this for external dogfood runs. Keep the control repo as the command cwd,
290
+ but set `--cwd` to the downstream project:
291
+
292
+ ```bash
293
+ cd /Users/neonwatty/Desktop/codex-terminal-manager
294
+ export DOGFOOD_CWD="/path/to/external/project"
295
+ export TASK="external-dogfood-$(date +%Y%m%d)"
296
+ export WORKER="dogfood-worker-$(date +%Y%m%d)"
297
+ export MANAGER="dogfood-manager-$(date +%Y%m%d)"
298
+
299
+ conveyor pair \
300
+ --task "$TASK" \
301
+ --worker-name "$WORKER" \
302
+ --manager-name "$MANAGER" \
303
+ --cwd "$DOGFOOD_CWD" \
304
+ --codex-profile yolo \
305
+ --manager-mode strict \
306
+ --task-goal "Complete one small real task in the external project." \
307
+ --task-prompt "Pick one small, concrete improvement. Keep changes scoped. Run verification. Report files changed and commands run." \
308
+ --manager-objective "Supervise the worker, request acceptance criteria and evidence, and finish only when verified." \
309
+ --manager-acceptance "The task is complete, verified, and summarized with files changed and commands run."
310
+ ```
311
+
312
+ During external dogfood, review telemetry every few cycles:
313
+
314
+ ```bash
315
+ conveyor cycle "$TASK"
316
+ conveyor criteria "$TASK" --list
317
+ conveyor telemetry --summary --task "$TASK"
318
+ conveyor telemetry --task "$TASK"
319
+ conveyor telemetry --search manager --task "$TASK"
320
+ conveyor replay "$TASK"
321
+ ```
322
+
323
+ Finish and export evidence:
324
+
325
+ ```bash
326
+ conveyor finish-task "$TASK" \
327
+ --capture-transcript-before-stop \
328
+ --require-transcript-segment \
329
+ --require-criteria-audit \
330
+ --stop-manager \
331
+ --stop-worker
332
+
333
+ conveyor export-task "$TASK" \
334
+ --output "/tmp/$TASK-export" \
335
+ --zip \
336
+ --include-transcripts
337
+
338
+ conveyor sessions --state active
339
+ conveyor reconcile --stale-cycles-seconds 1
340
+ ```
341
+
342
+ ```bash
343
+ conveyor tasks --create my-task --goal "Refactor auth"
344
+ conveyor handoff my-task \
345
+ --summary "Worker explored the current auth flow and found middleware drift." \
346
+ --next-step "Implement the middleware cleanup from docs/auth-plan.md"
347
+ conveyor manager-config my-task \
348
+ --mode guided \
349
+ --objective "Keep the worker aligned to docs/auth-plan.md" \
350
+ --reference docs/auth-plan.md \
351
+ --acceptance "Tests pass" \
352
+ --guideline "Nudge only when the worker is idle, stale, or blocked"
353
+ conveyor bind --task my-task --worker foo --manager foo-mgr
354
+ ```
355
+
356
+ `tasks` lists or creates rows. `bind` ties the worker and manager sessions to
357
+ the task. The task is now active.
358
+
359
+ Use `handoff` before or during management promotion to save the worker's
360
+ compact progress summary and likely next steps in SQLite. Use `manager-config`
361
+ to save what the manager should check against, how structured supervision
362
+ should be, acceptance criteria, planning/PRD/mockup references, and permissions
363
+ such as `--allow-pr`, `--allow-merge-green`, and
364
+ `--allow-worker-compact-clear`.
365
+
366
+ When setting up a manager from inside a manager Codex session, prefer:
367
+
368
+ ```bash
369
+ conveyor manager-config my-task --questions
370
+ ```
371
+
372
+ Read the JSON question schema, ask the user those questions in the manager
373
+ conversation, then persist the answers with `manager-config` flags. This keeps
374
+ the human interaction in the Codex chat where the user is already working and
375
+ keeps SQLite writes explicit. Use `manager-config --interactive` only as a
376
+ terminal fallback for a human running `conveyor` directly.
377
+
378
+ Before instructing high-level actions such as PR creation, green PR merge, or
379
+ worker compact/clear, check the saved policy:
380
+
381
+ ```bash
382
+ conveyor manager-permission my-task worker_compact_clear \
383
+ --require-handoff --require
384
+ ```
385
+
386
+ Use `--require` for fail-closed behavior. Use `--require-handoff` before
387
+ compact/clear so the worker's visible progress is saved first.
388
+
389
+ To request worker compaction/clear through the audited path, prefer the
390
+ one-command wrapper:
391
+
392
+ ```bash
393
+ conveyor compact-worker my-task \
394
+ --reason "Worker context should be compacted after handoff"
395
+ ```
396
+
397
+ Use `--clear` for `/clear`. For lower-level control, first record a `nudge`
398
+ manager decision, then run:
399
+
400
+ ```bash
401
+ decision_id=$(conveyor record-decision my-task nudge \
402
+ --reason "Worker context should be compacted after handoff" \
403
+ | python3 -c 'import json,sys; print(json.load(sys.stdin)["id"])')
404
+ conveyor request-worker-compact my-task \
405
+ --decision-id "$decision_id" --strict-decisions
406
+ ```
407
+
408
+ This command checks `worker_compact_clear`, requires a saved handoff, records a
409
+ durable command, and sends Codex `/compact` to the worker's tmux pane. Use
410
+ `--clear` for `/clear`, or `--prompt-only` to send the conservative
411
+ verify/update-handoff prompt instead of a slash command.
412
+
413
+ ## Manager Loop Pattern
414
+
415
+ The manager Codex drives supervision by calling `conveyor cycle <task>` in a
416
+ loop. Each cycle is idempotent: it ingests only new bytes from the rollout
417
+ JSONL, computes worker state from the JSON event stream, captures the worker
418
+ tmux pane, and returns a JSON dict.
419
+
420
+ Before declaring work complete, try to disprove the change. Identify the
421
+ strongest realistic failure mode, verify it with a command, test, trace,
422
+ screenshot, audit record, diff, or direct inspection, and include that evidence
423
+ in the handoff. Do not accept worker claims, passing happy-path tests, generated
424
+ summaries, or optimistic UI as proof by themselves. Treat unverified assumptions
425
+ as blockers or explicit follow-ups.
426
+
427
+ When the repository being managed is CTM, see
428
+ `docs/agent-evidence-playbook.md` for CTM-specific evidence choices and final
429
+ handoff format.
430
+
431
+ Natural-language requests such as "run this as an adversarially gated loop",
432
+ "require adversarial proof before another worker iteration", or "do not finish
433
+ until you have tried to disprove it" should be treated as operational gate
434
+ requests only after `conveyor loop-triggers --classify "<prompt>"
435
+ --json` matches a controlled trigger. For Ralph-loop work, create or use a loop
436
+ policy whose `required_before_continue` includes `adversarial_check`, then
437
+ record each proof receipt with `conveyor loop-evidence
438
+ adversarial-check <task> --loop-run <run-id> --iteration <n> --failure-mode ...
439
+ --check ... --result ...`. For final completion, use `conveyor
440
+ finish-task <task> --require-adversarial-proof` so the task cannot be marked
441
+ done until structured proof exists. Use `conveyor qa-run
442
+ adversarial-triggers --receipt-output /tmp/adversarial-triggers-receipt.json
443
+ --json` to verify the controlled trigger path.
444
+
445
+ Natural-language requests such as "create an autonomous GoalBuddy conveyor" or
446
+ "split this into vertical-slice child GoalBuddy boards and continue until all
447
+ are merged or proven satisfied" should be treated as conveyor requests, not as a
448
+ flat task list. Use `conveyor qa-plan goalbuddy-conveyor` to retrieve
449
+ the reusable starter prompt, authority boundaries, acceptance criteria,
450
+ correlation markers, and negative QA checks. The manager should keep exactly one
451
+ child board active, require PR/CI/merge or `satisfied_on_main` proof before
452
+ marking a child done, and update the parent receipt before activating the next
453
+ child.
454
+
455
+ ```bash
456
+ conveyor cycle my-task
457
+ # {
458
+ # "kind": "session_cycle",
459
+ # "task": "my-task",
460
+ # "state": "busy" | "idle" | "unknown",
461
+ # "staleness_seconds": 4.2,
462
+ # "notable_pane_pattern": "trust_prompt" | null,
463
+ # "pane_signal": { "captured": true, "classifier": {...} },
464
+ # "manager_context": {
465
+ # "manager_config": {...},
466
+ # "worker_handoff": {...},
467
+ # "acceptance_criteria": {
468
+ # "summary": {"proposed": 1, "accepted": 2, "satisfied": 0, "deferred": 1, "rejected": 0},
469
+ # "open": [...],
470
+ # "proposed": [...],
471
+ # "satisfied": [...],
472
+ # "deferred": [...],
473
+ # "rejected": [...]
474
+ # },
475
+ # "criteria_negotiation": {
476
+ # "needed": true,
477
+ # "reason": "no_criteria",
478
+ # "prompt": "Please propose 2-4 acceptance criteria for the current slice...",
479
+ # "suggested_actions": [...]
480
+ # }
481
+ # },
482
+ # "ingest": { "new_events": 3, "new_offset": 12345 },
483
+ # "cycle_id": 17,
484
+ # ...
485
+ # }
486
+ ```
487
+
488
+ Loop pseudo-pattern:
489
+
490
+ ```
491
+ while task is active:
492
+ result = conveyor cycle <task> # observe
493
+ interpret result.state, result.staleness_seconds, result.notable_pane_pattern
494
+ decide:
495
+ - "wait" -> sleep, then loop
496
+ - "nudge" -> conveyor session-nudge <worker> "<text>"
497
+ - "interrupt" -> conveyor session-interrupt <worker>
498
+ - "escalate" -> conveyor finish-task <task> --reason "<why>"
499
+ ```
500
+
501
+ Interpretation guidance:
502
+
503
+ - `state: "busy"` and recent activity: wait.
504
+ - `state: "idle"` and the worker is at a prompt: send a `session-nudge` with
505
+ the next instruction.
506
+ - `notable_pane_pattern` is non-null: branch on it directly. For example, a
507
+ `trust_prompt` or `enter_to_confirm` may want a single Enter sent via
508
+ `session-nudge "" ` (Enter is always appended).
509
+ - Long `staleness_seconds` with no notable pattern: send a status nudge before
510
+ interrupting.
511
+ - Clear busy-wait pattern or explicit user request: `session-interrupt`.
512
+
513
+ Acceptance criteria are living supervision state, not just setup text. Inspect
514
+ `manager_context.acceptance_criteria` every cycle:
515
+
516
+ - Treat `open` as accepted criteria that still need worker proof before the
517
+ task can finish.
518
+ - Inspect `manager_context.criteria_negotiation` every cycle. When `needed` is
519
+ true, use its `prompt` as the worker nudge or adapt it to the situation before
520
+ recording criteria.
521
+ - When worker progress reveals new edge cases, missing tests, polish needs, or
522
+ scope boundaries, ask the worker to propose which criteria are must-have now
523
+ versus follow-up.
524
+ - Record current-task criteria as proposed or accepted, and record follow-up
525
+ criteria as deferred.
526
+ - Use `conveyor criteria` to accept, satisfy, defer, or reject
527
+ criteria as evidence accumulates.
528
+ - Before finishing, compare the worker's receipts and verification against all
529
+ accepted open criteria.
530
+
531
+ Criteria command examples:
532
+
533
+ ```bash
534
+ conveyor criteria my-task --list
535
+ conveyor criteria my-task --add --criterion "..." --source worker_proposed --status proposed
536
+ conveyor criteria my-task --accept 12 --rationale "Must-have for this task"
537
+ conveyor criteria my-task --satisfy 12 --evidence-json '{"command":"python3 -m unittest tests.test_workerctl.ManagerBootstrapPromptTests -v","status":"pass"}'
538
+ conveyor criteria my-task --defer 13 --rationale "Follow-up after this task"
539
+ conveyor criteria my-task --reject 14 --rationale "Duplicate or out of scope"
540
+ ```
541
+
542
+ Replace placeholder `...` values with the actual criterion and verification
543
+ command. To add a criterion and satisfy that same row after verification:
544
+
545
+ ```bash
546
+ criterion_id=$(conveyor criteria my-task --add --criterion "Targeted prompt tests pass" --source worker_proposed --status proposed | python3 -c 'import json,sys; print(json.load(sys.stdin)["affected_criterion"]["id"])')
547
+ conveyor criteria my-task --satisfy "$criterion_id" --evidence-json '{"command":"python3 -m unittest tests.test_workerctl.ManagerBootstrapPromptTests -v","status":"pass"}'
548
+ ```
549
+
550
+ When making multiple criteria changes, use each mutation response's
551
+ `affected_criterion` as the row receipt, then run `conveyor criteria
552
+ <task> --list` before finishing or making an audit decision.
553
+
554
+ Sample nudge:
555
+
556
+ ```bash
557
+ conveyor session-nudge foo \
558
+ "Your latest progress exposed extra edge cases. Please propose acceptance criteria split into must-have for this task versus follow-up, and include the verification you expect for each."
559
+ ```
560
+
561
+ ## Actuation
562
+
563
+ Nudge the worker (sends text plus Enter to the worker's tmux pane). Only
564
+ worker sessions can be nudged this way; managers running outside tmux cannot:
565
+
566
+ ```bash
567
+ conveyor session-nudge foo "Please update status and continue."
568
+ conveyor session-nudge foo "Status?" --dry-run
569
+ ```
570
+
571
+ Send an interrupt key (default `C-c`):
572
+
573
+ ```bash
574
+ conveyor session-interrupt foo
575
+ conveyor session-interrupt foo --key C-c --followup "continue with the smaller refactor"
576
+ ```
577
+
578
+ ## Inspect, Replay, Audit
579
+
580
+ ```bash
581
+ conveyor tail foo --limit 30
582
+ conveyor tail foo --subtype agent_message
583
+ conveyor divergences my-task --limit 20
584
+ conveyor audit my-task
585
+ conveyor replay my-task
586
+ conveyor replay my-task --format transcript --limit 40
587
+ conveyor replay my-task --format full-transcript --include-content --limit 40 > /tmp/my-task-full-transcript.txt
588
+ ```
589
+
590
+ - `tail` prints recent ingested rollout events for a session.
591
+ - `divergences` lists cycles where the shadow pane signal flagged a notable
592
+ pattern (trust prompt, rate-limit prompt, approval prompt, ...).
593
+ - Raw transcript/log content should not be printed inside an active Codex
594
+ terminal. Prefer compact/timeline/transcript summaries; redirect any command
595
+ using `--include-content` to a file.
596
+ - `audit` lists `events` rows for the task; cycle observations show up via
597
+ `replay` and the `manager_cycles` table.
598
+ - `replay` reconstructs the task chronologically. Use `--format compact` for
599
+ decisions and side effects, `--format transcript` for deduplicated terminal
600
+ excerpts, `--format full-transcript` only for debugging.
601
+
602
+ ## Finish, Unbind, Deregister
603
+
604
+ When the task is complete:
605
+
606
+ ```bash
607
+ conveyor finish-task my-task --reason "auth refactor merged"
608
+ conveyor finish-task my-task --reason "..." --require-criteria-audit
609
+ conveyor finish-task my-task --reason "..." --stop-manager
610
+ conveyor finish-task my-task --reason "..." --stop-worker
611
+ ```
612
+
613
+ `finish-task` marks the task done and leaves both sessions running by default.
614
+ Use `--require-criteria-audit` when final acceptance criteria should be enforced:
615
+ it fails before finishing if any task criteria remain `accepted`; `proposed`,
616
+ `satisfied`, `deferred`, and `rejected` criteria do not block.
617
+ Add `--stop-manager` / `--stop-worker` only when the user explicitly wants the
618
+ tmux session torn down.
619
+
620
+ Clean up the binding and session registrations:
621
+
622
+ ```bash
623
+ conveyor unbind --task my-task
624
+ conveyor deregister foo
625
+ conveyor deregister foo-mgr
626
+ ```
627
+
628
+ `deregister` refuses if a session is still bound to an active task; run
629
+ `unbind` first.
630
+
631
+ ## Reconcile Runtime Drift
632
+
633
+ If something looks wrong — a worker process exited, a manager left a session
634
+ behind, a task has stopped getting cycle rows — run reconcile:
635
+
636
+ ```bash
637
+ conveyor reconcile
638
+ conveyor reconcile --apply
639
+ ```
640
+
641
+ Without `--apply` it prints a JSON report of dead-pid sessions, dangling
642
+ bindings, and stuck tasks. With `--apply` it marks dead-pid sessions
643
+ `state='gone'` and dangling bindings `state='invalid'`, writing audit events
644
+ for each mutation. Stuck tasks are reported but never auto-closed.
645
+
646
+ For schema-level checks (legacy `workers`/`managers` tables, missing tables,
647
+ etc.) run `conveyor db-doctor --live`.
648
+
649
+ ## Natural-Language Command Mapping
650
+
651
+ - "register this Codex session as the worker for dashboard setup <CODE>":
652
+ derive `dashboard-<CODE>-worker`, run `conveyor doctor-self`, then
653
+ `conveyor register-worker --name dashboard-<CODE>-worker --pid <PID> --cwd <CWD> --tmux-session <SESSION>`.
654
+ - "register this session as the manager for dashboard setup <CODE>":
655
+ derive `dashboard-<CODE>-manager`, run
656
+ `conveyor register-manager --name dashboard-<CODE>-manager --pid <PID> --cwd <CWD>`.
657
+ - "register this Codex session as a worker": choose a concise worker name if
658
+ none was provided, then run `conveyor doctor-self` and `register-worker`.
659
+ - "register a manager": choose a concise manager name if none was provided,
660
+ then run `conveyor register-manager`.
661
+ - "create a task and bind these sessions":
662
+ `conveyor tasks --create <TASK> --goal "<goal>"` then
663
+ `conveyor bind --task <TASK> --worker <W> --manager <M>`.
664
+ - "watch the worker", "supervise this task", "run a cycle":
665
+ `conveyor cycle <TASK>` (in a loop).
666
+ - "send a nudge", "ask the worker something":
667
+ `conveyor session-nudge <WORKER> "<text>"`.
668
+ - "interrupt the worker": `conveyor session-interrupt <WORKER>`.
669
+ - "what happened in this task", "show the replay":
670
+ `conveyor replay <TASK>` (optionally with `--format`).
671
+ - "finish this task": `conveyor finish-task <TASK> --reason "<why>"`.
672
+ - "unbind", "deregister this session": `conveyor unbind --task <TASK>`
673
+ followed by `conveyor deregister <NAME>` per session.
674
+ - "reconcile drift", "something looks stale":
675
+ `conveyor reconcile` (add `--apply` if the dry-run report looks correct).
676
+
677
+ ## QA Plan
678
+
679
+ For a repeatable end-to-end checklist:
680
+
681
+ ```bash
682
+ conveyor qa-plan self-management
683
+ conveyor qa-plan self-management --json
684
+ conveyor qa-plan emergent-criteria
685
+ conveyor qa-plan emergent-criteria --json
686
+ conveyor qa-plan tmux-errors
687
+ conveyor qa-plan tmux-errors --json
688
+ ```
689
+
690
+ Use `emergent-criteria` when validating a real worker/manager pair through
691
+ criteria negotiation, audited finish gating, replay/export, and
692
+ `--stop-manager --stop-worker` cleanup.
693
+
694
+ Use `tmux-errors` when validating read-only JSON degradation, mutating command
695
+ failures, pane capture degradation, stop failures, and reconcile recovery for
696
+ disposable tmux failure scenarios.
@@ -0,0 +1,5 @@
1
+ interface:
2
+ display_name: "Manage Codex Workers"
3
+ short_description: "Manage tmux-backed Codex workers"
4
+ default_prompt: "Use $manage-codex-workers to start, supervise, or stop a Codex worker with conveyor."
5
+ brand_color: "#2563EB"