@kbediako/codex-orchestrator 0.1.30 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -176,6 +176,7 @@ codex-orchestrator doctor --usage
176
176
  ## Downstream usage cheatsheet (agent-first)
177
177
 
178
178
  - Bootstrap + wire everything: `codex-orchestrator setup --yes`
179
+ - Low-friction docs->implementation guardrails: `codex-orchestrator flow --task <task-id>`
179
180
  - Validate + measure adoption locally: `codex-orchestrator doctor --usage --format json`
180
181
  - Delegation: `codex-orchestrator doctor --apply --yes`, then enable for a Codex run with: `codex -c 'mcp_servers.delegation.enabled=true' ...`
181
182
  - Collab (symbolic RLM subagents): `codex-orchestrator rlm --collab auto "<goal>"` (requires collab feature enabled in Codex)
@@ -189,6 +190,7 @@ codex-orchestrator devtools setup
189
190
  ## Common commands
190
191
 
191
192
  - `codex-orchestrator start <pipeline>` — run a pipeline.
193
+ - `codex-orchestrator flow --task <task-id>` — run `docs-review` then `implementation-gate` in sequence.
192
194
  - `codex-orchestrator plan <pipeline>` — preview pipeline stages.
193
195
  - `codex-orchestrator exec <cmd>` — run a one-off command with the exec runtime.
194
196
  - `codex-orchestrator init codex` — install starter templates (`mcp-client.json`, `AGENTS.md`) into a repo.
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env node
2
2
  import { existsSync } from 'node:fs';
3
- import { readFile } from 'node:fs/promises';
3
+ import { opendir, readFile } from 'node:fs/promises';
4
4
  import { basename, join } from 'node:path';
5
5
  import process from 'node:process';
6
6
  import { CodexOrchestrator } from '../orchestrator/src/cli/orchestrator.js';
@@ -44,6 +44,9 @@ async function main() {
44
44
  case 'frontend-test':
45
45
  await handleFrontendTest(orchestrator, args);
46
46
  break;
47
+ case 'flow':
48
+ await handleFlow(orchestrator, args);
49
+ break;
47
50
  case 'plan':
48
51
  await handlePlan(orchestrator, args);
49
52
  break;
@@ -145,6 +148,132 @@ function resolveTargetStageId(flags) {
145
148
  }
146
149
  return undefined;
147
150
  }
151
+ const FLOW_TARGET_PIPELINE_SCOPES = new Set(['docs-review', 'implementation-gate']);
152
+ function isFlowTargetPipelineScope(scope) {
153
+ return FLOW_TARGET_PIPELINE_SCOPES.has(scope);
154
+ }
155
+ function normalizeFlowTargetToken(candidate) {
156
+ const trimmed = candidate.trim();
157
+ if (!trimmed) {
158
+ return null;
159
+ }
160
+ const tokens = trimmed.split(':');
161
+ if (tokens.length > 1 && !(tokens[0] ?? '').trim()) {
162
+ return null;
163
+ }
164
+ let scoped = false;
165
+ let scopeToken = null;
166
+ let suffixToken = trimmed;
167
+ if (tokens.length > 1) {
168
+ const candidateScope = (tokens[0] ?? '').trim().toLowerCase();
169
+ if (isFlowTargetPipelineScope(candidateScope)) {
170
+ scoped = true;
171
+ scopeToken = candidateScope;
172
+ suffixToken = (tokens[tokens.length - 1] ?? '').trim();
173
+ }
174
+ }
175
+ if (!suffixToken) {
176
+ return null;
177
+ }
178
+ return {
179
+ literal: trimmed,
180
+ literalLower: trimmed.toLowerCase(),
181
+ stageTokenLower: suffixToken.toLowerCase(),
182
+ scopeLower: scopeToken,
183
+ scoped
184
+ };
185
+ }
186
+ function flowPlanItemPipelineId(item) {
187
+ const metadataPipelineId = item.metadata && typeof item.metadata['pipelineId'] === 'string'
188
+ ? item.metadata['pipelineId'].trim().toLowerCase()
189
+ : '';
190
+ if (metadataPipelineId) {
191
+ return metadataPipelineId;
192
+ }
193
+ const delimiterIndex = item.id.indexOf(':');
194
+ if (delimiterIndex <= 0) {
195
+ return null;
196
+ }
197
+ return item.id.slice(0, delimiterIndex).trim().toLowerCase() || null;
198
+ }
199
+ function flowPlanItemMatchesTarget(item, candidate) {
200
+ const normalized = normalizeFlowTargetToken(candidate);
201
+ if (!normalized) {
202
+ return false;
203
+ }
204
+ if (item.id.toLowerCase() === normalized.literalLower) {
205
+ return true;
206
+ }
207
+ if (normalized.scoped && normalized.scopeLower) {
208
+ const itemPipelineId = flowPlanItemPipelineId(item);
209
+ if (itemPipelineId && itemPipelineId !== normalized.scopeLower) {
210
+ return false;
211
+ }
212
+ }
213
+ const metadataStageId = item.metadata && typeof item.metadata['stageId'] === 'string'
214
+ ? item.metadata['stageId'].toLowerCase()
215
+ : null;
216
+ const aliases = Array.isArray(item.metadata?.['aliases'])
217
+ ? item.metadata?.['aliases']
218
+ : [];
219
+ const aliasTokens = aliases.filter((alias) => typeof alias === 'string')
220
+ .map((alias) => alias.toLowerCase());
221
+ if (normalized.scoped) {
222
+ if (metadataStageId
223
+ && (metadataStageId === normalized.literalLower || metadataStageId === normalized.stageTokenLower)) {
224
+ return true;
225
+ }
226
+ return aliasTokens.some((alias) => alias === normalized.literalLower || alias === normalized.stageTokenLower);
227
+ }
228
+ if (item.id.toLowerCase().endsWith(`:${normalized.stageTokenLower}`)) {
229
+ return true;
230
+ }
231
+ if (metadataStageId
232
+ && (metadataStageId === normalized.stageTokenLower
233
+ || metadataStageId.endsWith(`:${normalized.stageTokenLower}`))) {
234
+ return true;
235
+ }
236
+ return aliasTokens.some((alias) => alias === normalized.stageTokenLower || alias.endsWith(`:${normalized.stageTokenLower}`));
237
+ }
238
+ function planIncludesStageId(plan, stageId) {
239
+ if (!stageId.trim()) {
240
+ return false;
241
+ }
242
+ return plan.plan.items.some((item) => flowPlanItemMatchesTarget(item, stageId));
243
+ }
244
+ function resolveFlowTargetScope(stageId) {
245
+ const delimiterIndex = stageId.indexOf(':');
246
+ if (delimiterIndex <= 0) {
247
+ return null;
248
+ }
249
+ const scope = stageId.slice(0, delimiterIndex).trim().toLowerCase();
250
+ if (!isFlowTargetPipelineScope(scope)) {
251
+ return null;
252
+ }
253
+ return scope;
254
+ }
255
+ async function resolveFlowTargetStageSelection(orchestrator, taskId, requestedTargetStageId) {
256
+ if (!requestedTargetStageId) {
257
+ return {};
258
+ }
259
+ const [docsPlan, implementationPlan] = (await Promise.all([
260
+ orchestrator.plan({ pipelineId: 'docs-review', taskId }),
261
+ orchestrator.plan({ pipelineId: 'implementation-gate', taskId })
262
+ ]));
263
+ const requestedScope = resolveFlowTargetScope(requestedTargetStageId);
264
+ const docsScopeMatch = !requestedScope || requestedScope === 'docs-review';
265
+ const implementationScopeMatch = !requestedScope || requestedScope === 'implementation-gate';
266
+ const docsReviewTargetStageId = docsScopeMatch && planIncludesStageId(docsPlan, requestedTargetStageId)
267
+ ? requestedTargetStageId
268
+ : undefined;
269
+ const implementationGateTargetStageId = implementationScopeMatch && planIncludesStageId(implementationPlan, requestedTargetStageId)
270
+ ? requestedTargetStageId
271
+ : undefined;
272
+ if (!docsReviewTargetStageId && !implementationGateTargetStageId) {
273
+ throw new Error(`Target stage "${requestedTargetStageId}" is not defined in docs-review or implementation-gate.`);
274
+ }
275
+ return { docsReviewTargetStageId, implementationGateTargetStageId };
276
+ }
148
277
  function readStringFlag(flags, key) {
149
278
  const value = flags[key];
150
279
  if (typeof value !== 'string') {
@@ -313,6 +442,84 @@ async function handleFrontendTest(orchestrator, rawArgs) {
313
442
  }
314
443
  }
315
444
  }
445
+ async function handleFlow(orchestrator, rawArgs) {
446
+ const { positionals, flags } = parseArgs(rawArgs);
447
+ if (isHelpRequest(positionals, flags)) {
448
+ printFlowHelp();
449
+ return;
450
+ }
451
+ if (positionals.length > 0) {
452
+ throw new Error(`flow does not accept positional arguments: ${positionals.join(' ')}`);
453
+ }
454
+ const format = flags['format'] === 'json' ? 'json' : 'text';
455
+ const executionMode = resolveExecutionModeFlag(flags);
456
+ const taskId = typeof flags['task'] === 'string' ? flags['task'] : undefined;
457
+ const parentRunId = typeof flags['parent-run'] === 'string' ? flags['parent-run'] : undefined;
458
+ const approvalPolicy = typeof flags['approval-policy'] === 'string' ? flags['approval-policy'] : undefined;
459
+ const targetStageId = resolveTargetStageId(flags);
460
+ const { docsReviewTargetStageId, implementationGateTargetStageId } = await resolveFlowTargetStageSelection(orchestrator, taskId, targetStageId);
461
+ await withRunUi(flags, format, async (runEvents) => {
462
+ const docsReviewResult = await orchestrator.start({
463
+ pipelineId: 'docs-review',
464
+ taskId,
465
+ parentRunId,
466
+ approvalPolicy,
467
+ targetStageId: docsReviewTargetStageId,
468
+ executionMode,
469
+ runEvents
470
+ });
471
+ const docsPayload = toRunOutputPayload(docsReviewResult);
472
+ if (format === 'text') {
473
+ emitRunOutput(docsReviewResult, format, 'Docs-review run');
474
+ }
475
+ if (docsReviewResult.manifest.status !== 'succeeded') {
476
+ process.exitCode = 1;
477
+ if (format === 'json') {
478
+ const payload = {
479
+ status: docsReviewResult.manifest.status,
480
+ failed_stage: 'docs-review',
481
+ docs_review: docsPayload,
482
+ implementation_gate: null
483
+ };
484
+ console.log(JSON.stringify(payload, null, 2));
485
+ }
486
+ else {
487
+ console.log('Flow halted: docs-review failed.');
488
+ }
489
+ return;
490
+ }
491
+ const implementationGateResult = await orchestrator.start({
492
+ pipelineId: 'implementation-gate',
493
+ taskId,
494
+ parentRunId: docsReviewResult.manifest.run_id,
495
+ approvalPolicy,
496
+ targetStageId: implementationGateTargetStageId,
497
+ executionMode,
498
+ runEvents
499
+ });
500
+ const implementationPayload = toRunOutputPayload(implementationGateResult);
501
+ if (format === 'json') {
502
+ const payload = {
503
+ status: implementationGateResult.manifest.status,
504
+ failed_stage: implementationGateResult.manifest.status === 'succeeded' ? null : 'implementation-gate',
505
+ docs_review: docsPayload,
506
+ implementation_gate: implementationPayload
507
+ };
508
+ console.log(JSON.stringify(payload, null, 2));
509
+ if (implementationGateResult.manifest.status !== 'succeeded') {
510
+ process.exitCode = 1;
511
+ }
512
+ return;
513
+ }
514
+ emitRunOutput(implementationGateResult, format, 'Implementation-gate run');
515
+ if (implementationGateResult.manifest.status !== 'succeeded') {
516
+ process.exitCode = 1;
517
+ console.log('Flow halted: implementation-gate failed.');
518
+ return;
519
+ }
520
+ console.log('Flow complete: docs-review -> implementation-gate.');
521
+ });
522
+ }
316
523
  async function handlePlan(orchestrator, rawArgs) {
317
524
  const { positionals, flags } = parseArgs(rawArgs);
318
525
  const pipelineId = positionals[0];
@@ -464,14 +671,7 @@ async function withRunUi(flags, format, action) {
464
671
  }
465
672
  }
466
673
  function emitRunOutput(result, format, label) {
467
- const payload = {
468
- run_id: result.manifest.run_id,
469
- status: result.manifest.status,
470
- artifact_root: result.manifest.artifact_root,
471
- manifest: `${result.manifest.artifact_root}/manifest.json`,
472
- log_path: result.manifest.log_path,
473
- summary: result.manifest.summary ?? null
474
- };
674
+ const payload = toRunOutputPayload(result);
475
675
  if (format === 'json') {
476
676
  console.log(JSON.stringify(payload, null, 2));
477
677
  return;
@@ -487,6 +687,16 @@ function emitRunOutput(result, format, label) {
487
687
  }
488
688
  }
489
689
  }
690
+ function toRunOutputPayload(result) {
691
+ return {
692
+ run_id: result.manifest.run_id,
693
+ status: result.manifest.status,
694
+ artifact_root: result.manifest.artifact_root,
695
+ manifest: `${result.manifest.artifact_root}/manifest.json`,
696
+ log_path: result.manifest.log_path,
697
+ summary: result.manifest.summary ?? null
698
+ };
699
+ }
490
700
  async function handleExec(rawArgs) {
491
701
  const parsed = parseExecArgs(rawArgs);
492
702
  if (parsed.commandTokens.length === 0) {
@@ -519,6 +729,55 @@ async function handleExec(rawArgs) {
519
729
  else if (result.status !== 'succeeded') {
520
730
  process.exitCode = 1;
521
731
  }
732
+ if (outputMode === 'interactive') {
733
+ await maybeEmitExecAdoptionHint(env.taskId);
734
+ }
735
+ }
736
+ async function shouldScanExecAdoptionHint(taskFilter) {
737
+ if (!taskFilter) {
738
+ return false;
739
+ }
740
+ const env = resolveEnvironmentPaths();
741
+ const taskCliRunsRoot = join(env.runsRoot, taskFilter, 'cli');
742
+ let handle = null;
743
+ try {
744
+ handle = await opendir(taskCliRunsRoot);
745
+ let runCount = 0;
746
+ for await (const entry of handle) {
747
+ if (!entry.isDirectory()) {
748
+ continue;
749
+ }
750
+ runCount += 1;
751
+ if (runCount > 150) {
752
+ return false;
753
+ }
754
+ }
755
+ return true;
756
+ }
757
+ catch {
758
+ return false;
759
+ }
760
+ finally {
761
+ if (handle) {
762
+ await handle.close().catch(() => undefined);
763
+ }
764
+ }
765
+ }
766
+ async function maybeEmitExecAdoptionHint(taskFilter) {
767
+ try {
768
+ if (!(await shouldScanExecAdoptionHint(taskFilter))) {
769
+ return;
770
+ }
771
+ const usage = await runDoctorUsage({ windowDays: 7, taskFilter });
772
+ const recommendation = usage.adoption.recommendations[0];
773
+ if (!recommendation) {
774
+ return;
775
+ }
776
+ console.log(`Adoption hint: ${recommendation}`);
777
+ }
778
+ catch {
779
+ // Exec command behavior should not fail when usage telemetry cannot be read.
780
+ }
522
781
  }
523
782
  async function handleSelfCheck(rawArgs) {
524
783
  const { flags } = parseArgs(rawArgs);
@@ -599,6 +858,7 @@ Options:
599
858
  throw new Error('No bundled skills detected; cannot run setup.');
600
859
  }
601
860
  const forceSkills = bundledSkills.filter((skill) => skill !== 'chrome-devtools');
861
+ const guidance = buildSetupGuidance();
602
862
  if (!apply) {
603
863
  const forceOnly = forceSkills.join(',');
604
864
  const forceCommand = forceOnly ? `codex-orchestrator skills install --force --only ${forceOnly}` : null;
@@ -615,7 +875,8 @@ Options:
615
875
  note: 'Installs bundled skills into $CODEX_HOME/skills (setup avoids overwriting chrome-devtools when already present).'
616
876
  },
617
877
  delegation,
618
- devtools
878
+ devtools,
879
+ guidance
619
880
  }
620
881
  };
621
882
  if (format === 'json') {
@@ -629,6 +890,9 @@ Options:
629
890
  }
630
891
  console.log(`- Delegation: codex-orchestrator delegation setup --yes${delegationRepoArg}`);
631
892
  console.log('- DevTools: codex-orchestrator devtools setup --yes');
893
+ for (const line of formatSetupGuidanceSummary(guidance)) {
894
+ console.log(line);
895
+ }
632
896
  console.log('Run with --yes to apply this setup.');
633
897
  return;
634
898
  }
@@ -659,8 +923,41 @@ Options:
659
923
  for (const line of formatDevtoolsSetupSummary(devtools)) {
660
924
  console.log(line);
661
925
  }
926
+ for (const line of formatSetupGuidanceSummary(guidance)) {
927
+ console.log(line);
928
+ }
662
929
  console.log('Next: codex-orchestrator doctor --usage');
663
930
  }
931
+ function buildSetupGuidance() {
932
+ return {
933
+ note: 'Agent-first default: run docs-review before implementation and implementation-gate before handoff.',
934
+ references: [
935
+ 'https://github.com/Kbediako/CO#downstream-usage-cheatsheet-agent-first',
936
+ 'https://github.com/Kbediako/CO/blob/main/docs/AGENTS.md',
937
+ 'https://github.com/Kbediako/CO/blob/main/docs/guides/collab-vs-mcp.md'
938
+ ],
939
+ recommended_commands: [
940
+ 'codex-orchestrator flow --task <task-id>',
941
+ 'codex-orchestrator doctor --usage'
942
+ ]
943
+ };
944
+ }
945
+ function formatSetupGuidanceSummary(guidance) {
946
+ const lines = ['Setup guidance:', `- ${guidance.note}`];
947
+ if (guidance.recommended_commands.length > 0) {
948
+ lines.push('- Recommended commands:');
949
+ for (const command of guidance.recommended_commands) {
950
+ lines.push(` - ${command}`);
951
+ }
952
+ }
953
+ if (guidance.references.length > 0) {
954
+ lines.push('- References:');
955
+ for (const reference of guidance.references) {
956
+ lines.push(` - ${reference}`);
957
+ }
958
+ }
959
+ return lines;
960
+ }
664
961
  async function handleDoctor(rawArgs) {
665
962
  const { flags } = parseArgs(rawArgs);
666
963
  const format = flags['format'] === 'json' ? 'json' : 'text';
@@ -1159,6 +1456,17 @@ Commands:
1159
1456
  --interactive | --ui Enable read-only HUD when running in a TTY.
1160
1457
  --no-interactive Force disable HUD (default is off unless requested).
1161
1458
 
1459
+ flow Run docs-review then implementation-gate sequentially.
1460
+ --task <id> Override task identifier (defaults to MCP_RUNNER_TASK_ID).
1461
+ --parent-run <id> Link docs-review run to parent run id.
1462
+ --approval-policy <p> Record approval policy metadata.
1463
+ --format json Emit machine-readable output summary for both runs.
1464
+ --execution-mode <mcp|cloud> Force execution mode for both runs.
1465
+ --cloud Shortcut for --execution-mode cloud.
1466
+ --target <stage-id> Focus plan/build metadata on a specific stage (alias: --target-stage).
1467
+ --interactive | --ui Enable read-only HUD when running in a TTY.
1468
+ --no-interactive Force disable HUD (default is off unless requested).
1469
+
1162
1470
  plan [pipeline] Preview pipeline stages without executing.
1163
1471
  --task <id> Override task identifier.
1164
1472
  --format json Emit machine-readable output.
@@ -1324,3 +1632,20 @@ Options:
1324
1632
  --help Show this message.
1325
1633
  `);
1326
1634
  }
1635
+ function printFlowHelp() {
1636
+ console.log(`Usage: codex-orchestrator flow [options]
1637
+
1638
+ Runs docs-review first, then implementation-gate. Stops on the first failure.
1639
+
1640
+ Options:
1641
+ --task <id> Override task identifier.
1642
+ --parent-run <id> Link docs-review run to parent run id.
1643
+ --approval-policy <p> Record approval policy metadata.
1644
+ --format json Emit machine-readable output for both runs.
1645
+ --execution-mode <mcp|cloud> Force execution mode for both runs.
1646
+ --cloud Shortcut for --execution-mode cloud.
1647
+ --target <stage-id> Focus plan/build metadata (applies where the stage exists).
1648
+ --interactive | --ui Enable read-only HUD when running in a TTY.
1649
+ --no-interactive Force disable HUD.
1650
+ `);
1651
+ }
@@ -132,6 +132,18 @@ export async function runDoctorUsage(options = {}) {
132
132
  .sort((a, b) => b[1] - a[1])
133
133
  .slice(0, 10)
134
134
  .map(([id, runs]) => ({ id, runs }));
135
+ const execRuns = pipelines.get('exec') ?? 0;
136
+ const gateRuns = (pipelines.get('docs-review') ?? 0) + (pipelines.get('implementation-gate') ?? 0);
137
+ const execSharePct = statusCounts.total > 0 ? Math.round((execRuns / statusCounts.total) * 1000) / 10 : 0;
138
+ const gateSharePct = statusCounts.total > 0 ? Math.round((gateRuns / statusCounts.total) * 1000) / 10 : 0;
139
+ const adoptionRecommendations = buildAdoptionRecommendations({
140
+ totalRuns: statusCounts.total,
141
+ execRuns,
142
+ gateRuns,
143
+ rlmRuns,
144
+ cloudRuns,
145
+ collabRunsWithToolCalls
146
+ });
135
147
  const delegationErrors = [];
136
148
  let activeWithSubagents = 0;
137
149
  let totalSubagentManifests = 0;
@@ -193,6 +205,13 @@ export async function runDoctorUsage(options = {}) {
193
205
  pipelines: {
194
206
  total: pipelines.size,
195
207
  top: pipelineTop
208
+ },
209
+ adoption: {
210
+ exec_runs: execRuns,
211
+ exec_share_pct: execSharePct,
212
+ gate_runs: gateRuns,
213
+ gate_share_pct: gateSharePct,
214
+ recommendations: adoptionRecommendations
196
215
  }
197
216
  };
198
217
  }
@@ -235,6 +254,14 @@ export function formatDoctorUsageSummary(result) {
235
254
  lines.push(` - ${entry.id}: ${entry.runs}`);
236
255
  }
237
256
  }
257
+ lines.push(`Pipeline adoption: exec=${result.adoption.exec_runs} (${result.adoption.exec_share_pct}%), ` +
258
+ `docs-review+implementation-gate=${result.adoption.gate_runs} (${result.adoption.gate_share_pct}%)`);
259
+ if (result.adoption.recommendations.length > 0) {
260
+ lines.push('Adoption hints:');
261
+ for (const recommendation of result.adoption.recommendations) {
262
+ lines.push(` - ${recommendation}`);
263
+ }
264
+ }
238
265
  if (result.delegation.errors.length > 0) {
239
266
  lines.push('Delegation scan warnings:');
240
267
  for (const warning of result.delegation.errors.slice(0, 3)) {
@@ -243,6 +270,29 @@ export function formatDoctorUsageSummary(result) {
243
270
  }
244
271
  return lines;
245
272
  }
273
+ function buildAdoptionRecommendations(params) {
274
+ if (params.totalRuns <= 0) {
275
+ return [];
276
+ }
277
+ const hints = [];
278
+ const execShare = params.execRuns / params.totalRuns;
279
+ if (execShare >= 0.6) {
280
+ hints.push('Most runs are plain exec; prefer `codex-orchestrator start docs-review` or `start implementation-gate` for manifest-backed guardrails.');
281
+ }
282
+ if (params.gateRuns === 0) {
283
+ hints.push('No gate pipelines detected; use docs-review before implementation and implementation-gate before handoff.');
284
+ }
285
+ if (params.rlmRuns === 0) {
286
+ hints.push('No RLM runs detected; try `codex-orchestrator rlm --collab auto "<goal>"` for long-horizon or ambiguous tasks.');
287
+ }
288
+ if (params.cloudRuns === 0) {
289
+ hints.push('No cloud runs detected; configure CODEX_CLOUD_ENV_ID and run `codex-orchestrator start <pipeline> --cloud --target <stage-id>` for long-running stages.');
290
+ }
291
+ if (params.rlmRuns > 0 && params.collabRunsWithToolCalls === 0) {
292
+ hints.push('RLM is used without collab activity; ensure collab is enabled (`codex features enable collab`).');
293
+ }
294
+ return hints.slice(0, 3);
295
+ }
246
296
  function extractRunIdFromManifestPath(manifestPath) {
247
297
  if (!manifestPath) {
248
298
  return null;
@@ -9,6 +9,7 @@ import { resolveEnvironmentPaths } from '../../../scripts/lib/run-manifests.js';
9
9
  import { normalizeEnvironmentPaths } from './run/environment.js';
10
10
  import { bootstrapManifest, loadManifest, updateHeartbeat, finalizeStatus, appendSummary, ensureGuardrailStatus, resetForResume, recordResumeEvent } from './run/manifest.js';
11
11
  import { ManifestPersister, persistManifest } from './run/manifestPersister.js';
12
+ import { resolveRuntimeActivitySnapshot } from './run/runtimeActivity.js';
12
13
  import { generateRunId } from './utils/runId.js';
13
14
  import { runCommandStage } from './services/commandRunner.js';
14
15
  import { appendMetricsEntry } from './metrics/metricsRecorder.js';
@@ -437,12 +438,13 @@ export class CodexOrchestrator {
437
438
  async status(options) {
438
439
  const env = this.baseEnv;
439
440
  const { manifest, paths } = await loadManifest(env, options.runId);
441
+ const activity = await resolveRuntimeActivitySnapshot(manifest, paths);
440
442
  if (options.format === 'json') {
441
- const payload = this.buildStatusPayload(env, manifest, paths);
443
+ const payload = this.buildStatusPayload(env, manifest, paths, activity);
442
444
  process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
443
445
  return manifest;
444
446
  }
445
- this.renderStatus(manifest);
447
+ this.renderStatus(manifest, activity);
446
448
  return manifest;
447
449
  }
448
450
  async plan(options = {}) {
@@ -1121,7 +1123,7 @@ export class CodexOrchestrator {
1121
1123
  throw new Error('Resume token mismatch.');
1122
1124
  }
1123
1125
  }
1124
- buildStatusPayload(env, manifest, paths) {
1126
+ buildStatusPayload(env, manifest, paths, activity) {
1125
1127
  return {
1126
1128
  run_id: manifest.run_id,
1127
1129
  status: manifest.status,
@@ -1132,17 +1134,24 @@ export class CodexOrchestrator {
1132
1134
  artifact_root: manifest.artifact_root,
1133
1135
  log_path: manifest.log_path,
1134
1136
  heartbeat_at: manifest.heartbeat_at,
1137
+ activity,
1135
1138
  commands: manifest.commands,
1136
1139
  child_runs: manifest.child_runs,
1137
1140
  cloud_execution: manifest.cloud_execution ?? null
1138
1141
  };
1139
1142
  }
1140
- renderStatus(manifest) {
1143
+ renderStatus(manifest, activity) {
1141
1144
  logger.info(`Run: ${manifest.run_id}`);
1142
1145
  logger.info(`Status: ${manifest.status}${manifest.status_detail ? ` (${manifest.status_detail})` : ''}`);
1143
1146
  logger.info(`Started: ${manifest.started_at}`);
1144
1147
  logger.info(`Completed: ${manifest.completed_at ?? 'in-progress'}`);
1145
1148
  logger.info(`Manifest: ${manifest.artifact_root}/manifest.json`);
1149
+ if (activity.observed_at) {
1150
+ const staleSuffix = activity.stale === null ? '' : activity.stale ? ' [stale]' : ' [active]';
1151
+ const sourceLabel = activity.observed_source ? ` via ${activity.observed_source}` : '';
1152
+ const ageLabel = activity.age_seconds === null ? '' : ` age=${activity.age_seconds}s`;
1153
+ logger.info(`Activity: ${activity.observed_at}${sourceLabel}${ageLabel}${staleSuffix}`);
1154
+ }
1146
1155
  if (manifest.cloud_execution?.task_id) {
1147
1156
  logger.info(`Cloud: ${manifest.cloud_execution.task_id} [${manifest.cloud_execution.status}]` +
1148
1157
  (manifest.cloud_execution.status_url ? ` ${manifest.cloud_execution.status_url}` : ''));
@@ -0,0 +1,79 @@
1
+ import { readFile, stat } from 'node:fs/promises';
2
+ export async function resolveRuntimeActivitySnapshot(manifest, paths, options = {}) {
3
+ const manifestHeartbeat = normalizeTimestamp(manifest.heartbeat_at);
4
+ const heartbeatFileAt = await readHeartbeatTimestamp(paths.heartbeatPath);
5
+ const runnerLogMtime = await readMtimeIso(paths.logPath);
6
+ const candidates = [];
7
+ if (manifestHeartbeat) {
8
+ candidates.push({ source: 'manifest', ...manifestHeartbeat });
9
+ }
10
+ const heartbeatCandidate = normalizeTimestamp(heartbeatFileAt);
11
+ if (heartbeatCandidate) {
12
+ candidates.push({ source: 'heartbeat_file', ...heartbeatCandidate });
13
+ }
14
+ const logCandidate = normalizeTimestamp(runnerLogMtime);
15
+ if (logCandidate) {
16
+ candidates.push({ source: 'runner_log', ...logCandidate });
17
+ }
18
+ const latest = pickLatest(candidates);
19
+ const nowMs = Number.isFinite(options.nowMs) ? Number(options.nowMs) : Date.now();
20
+ const staleThresholdSeconds = Number.isFinite(manifest.heartbeat_stale_after_seconds) && manifest.heartbeat_stale_after_seconds > 0
21
+ ? Math.floor(manifest.heartbeat_stale_after_seconds)
22
+ : null;
23
+ let stale = null;
24
+ let ageSeconds = null;
25
+ if (manifest.status === 'in_progress' && latest && staleThresholdSeconds !== null) {
26
+ ageSeconds = Math.max(0, Math.floor((nowMs - latest.ms) / 1000));
27
+ stale = ageSeconds > staleThresholdSeconds;
28
+ }
29
+ return {
30
+ manifest_heartbeat_at: manifestHeartbeat?.iso ?? null,
31
+ heartbeat_file_at: heartbeatCandidate?.iso ?? null,
32
+ runner_log_mtime_at: logCandidate?.iso ?? null,
33
+ observed_at: latest?.iso ?? null,
34
+ observed_source: latest?.source ?? null,
35
+ stale,
36
+ stale_threshold_seconds: staleThresholdSeconds,
37
+ age_seconds: ageSeconds
38
+ };
39
+ }
40
+ async function readHeartbeatTimestamp(heartbeatPath) {
41
+ try {
42
+ const raw = await readFile(heartbeatPath, 'utf8');
43
+ const trimmed = raw.trim();
44
+ return trimmed.length > 0 ? trimmed : null;
45
+ }
46
+ catch {
47
+ return null;
48
+ }
49
+ }
50
+ async function readMtimeIso(filePath) {
51
+ try {
52
+ const fileStat = await stat(filePath);
53
+ return fileStat.mtime.toISOString();
54
+ }
55
+ catch {
56
+ return null;
57
+ }
58
+ }
59
+ function normalizeTimestamp(value) {
60
+ if (typeof value !== 'string') {
61
+ return null;
62
+ }
63
+ const trimmed = value.trim();
64
+ if (!trimmed) {
65
+ return null;
66
+ }
67
+ const ms = Date.parse(trimmed);
68
+ if (!Number.isFinite(ms)) {
69
+ return null;
70
+ }
71
+ return { iso: new Date(ms).toISOString(), ms };
72
+ }
73
+ function pickLatest(candidates) {
74
+ if (candidates.length === 0) {
75
+ return null;
76
+ }
77
+ candidates.sort((a, b) => b.ms - a.ms);
78
+ return candidates[0] ?? null;
79
+ }
@@ -171,7 +171,6 @@ export async function runCommandStage(context, hooks = {}) {
171
171
  try {
172
172
  result = await runner.run({
173
173
  command: stage.command,
174
- args: [],
175
174
  cwd: stage.cwd ?? env.repoRoot,
176
175
  env: execEnv,
177
176
  sessionId: sessionId ?? undefined,
@@ -19,10 +19,13 @@ const sessionManager = new ExecSessionManager({
19
19
  const privacyGuard = new PrivacyGuard({ mode: resolvePrivacyGuardMode() });
20
20
  const handleService = new RemoteExecHandleService({ guard: privacyGuard, now: () => new Date() });
21
21
  const cliExecutor = async (request) => {
22
+ const hasExplicitArgs = Array.isArray(request.args);
22
23
  const child = spawn(request.command, request.args ?? [], {
23
24
  cwd: request.cwd,
24
25
  env: request.env,
25
- shell: true,
26
+ // Use shell mode only for string-style commands. When args are provided we
27
+ // want argv semantics (`cmd arg1 arg2`) rather than `sh -c cmd` behavior.
28
+ shell: !hasExplicitArgs,
26
29
  stdio: ['ignore', 'pipe', 'pipe']
27
30
  });
28
31
  if (!child.stdout || !child.stderr) {
@@ -27,7 +27,8 @@ export class UnifiedExecRunner {
27
27
  };
28
28
  }
29
29
  async run(options) {
30
- const args = options.args ?? [];
30
+ const args = options.args;
31
+ const resolvedArgs = args ?? [];
31
32
  const invocationId = options.invocationId ?? this.idGenerator();
32
33
  const correlationId = this.idGenerator();
33
34
  const issuedHandle = this.handleService ? this.handleService.issueHandle(correlationId) : undefined;
@@ -49,7 +50,7 @@ export class UnifiedExecRunner {
49
50
  const metadata = {
50
51
  ...options.metadata,
51
52
  command: options.command,
52
- args,
53
+ args: resolvedArgs,
53
54
  cwd: options.cwd,
54
55
  sessionId: lease.id,
55
56
  correlationId,
@@ -81,7 +82,7 @@ export class UnifiedExecRunner {
81
82
  attempt,
82
83
  correlationId,
83
84
  command: options.command,
84
- args,
85
+ args: resolvedArgs,
85
86
  cwd: options.cwd,
86
87
  sandboxState,
87
88
  sessionId: lease.id,
@@ -403,7 +404,7 @@ function getErrorMessage(error) {
403
404
  return String(error);
404
405
  }
405
406
  const defaultExecutor = async (request) => {
406
- const child = spawn(request.command, request.args, {
407
+ const child = spawn(request.command, request.args ?? [], {
407
408
  cwd: request.cwd,
408
409
  env: request.env,
409
410
  stdio: ['ignore', 'pipe', 'pipe']
@@ -15,6 +15,12 @@ const REQUIRED_BUCKET_FAILED = new Set(['fail', 'cancel', 'skipping']);
15
15
  const MERGEABLE_STATES = new Set(['CLEAN', 'HAS_HOOKS', 'UNSTABLE']);
16
16
  const BLOCKED_REVIEW_DECISIONS = new Set(['CHANGES_REQUESTED', 'REVIEW_REQUIRED']);
17
17
  const DO_NOT_MERGE_LABEL = /do[\s_-]*not[\s_-]*merge/i;
18
+ const ACTIONABLE_BOT_LOGINS = new Set([
19
+ 'chatgpt-codex-connector',
20
+ 'chatgpt-codex-connector[bot]',
21
+ 'coderabbitai',
22
+ 'coderabbitai[bot]'
23
+ ]);
18
24
  const PR_QUERY = `
19
25
  query($owner:String!, $repo:String!, $number:Int!) {
20
26
  repository(owner:$owner, name:$repo) {
@@ -73,6 +79,29 @@ function normalizeEnum(value) {
73
79
  function normalizeBucket(value) {
74
80
  return typeof value === 'string' ? value.trim().toLowerCase() : '';
75
81
  }
82
+ function normalizeLogin(value) {
83
+ return typeof value === 'string' ? value.trim().toLowerCase() : '';
84
+ }
85
+ function isActionableBot(login) {
86
+ return ACTIONABLE_BOT_LOGINS.has(normalizeLogin(login));
87
+ }
88
+ export function isHumanReviewActor(user) {
89
+ if (!user || typeof user !== 'object') {
90
+ return false;
91
+ }
92
+ const login = normalizeLogin(user.login);
93
+ if (!login) {
94
+ return false;
95
+ }
96
+ if (isActionableBot(login)) {
97
+ return false;
98
+ }
99
+ const accountType = typeof user.type === 'string' ? user.type.trim().toUpperCase() : '';
100
+ if (accountType) {
101
+ return accountType === 'USER';
102
+ }
103
+ return !login.endsWith('[bot]');
104
+ }
76
105
  function formatDuration(ms) {
77
106
  if (ms <= 0) {
78
107
  return '0s';
@@ -218,6 +247,15 @@ async function runGhJson(args) {
218
247
  throw new Error(`Failed to parse JSON from gh ${args.join(' ')}: ${error instanceof Error ? error.message : String(error)}`);
219
248
  }
220
249
  }
250
+ async function runGhJsonSlurped(args) {
251
+ const result = await runGh([...args, '--paginate', '--slurp']);
252
+ try {
253
+ return JSON.parse(result.stdout);
254
+ }
255
+ catch (error) {
256
+ throw new Error(`Failed to parse paginated JSON from gh ${args.join(' ')}: ${error instanceof Error ? error.message : String(error)}`);
257
+ }
258
+ }
221
259
  async function ensureGhAuth() {
222
260
  const result = await runGh(['auth', 'status', '-h', 'github.com'], { allowFailure: true });
223
261
  if (result.exitCode !== 0) {
@@ -378,7 +416,7 @@ export function resolveCachedRequiredChecksSummary(previousCache, currentHeadOid
378
416
  }
379
417
  return hasRequiredChecksSummary(previousCache.summary) ? previousCache.summary : null;
380
418
  }
381
- export function buildStatusSnapshot(response, requiredChecks = null) {
419
+ export function buildStatusSnapshot(response, requiredChecks = null, inlineBotFeedback = null) {
382
420
  const pr = response?.data?.repository?.pullRequest;
383
421
  if (!pr) {
384
422
  throw new Error('GraphQL response missing pullRequest payload.');
@@ -391,10 +429,15 @@ export function buildStatusSnapshot(response, requiredChecks = null) {
391
429
  const hasDoNotMergeLabel = labels.some((label) => DO_NOT_MERGE_LABEL.test(label));
392
430
  const threads = Array.isArray(pr.reviewThreads?.nodes) ? pr.reviewThreads.nodes : [];
393
431
  const unresolvedThreadCount = threads.filter((thread) => thread && !thread.isResolved && !thread.isOutdated).length;
432
+ const hasUnresolvedThread = unresolvedThreadCount > 0;
394
433
  const contexts = pr.commits?.nodes?.[0]?.commit?.statusCheckRollup?.contexts?.nodes;
395
434
  const checkNodes = Array.isArray(contexts) ? contexts : [];
396
435
  const checks = summarizeChecks(checkNodes);
397
436
  const requiredCheckSummary = requiredChecks && typeof requiredChecks === 'object' && requiredChecks.total > 0 ? requiredChecks : null;
437
+ const unacknowledgedBotFeedbackCount = inlineBotFeedback && typeof inlineBotFeedback.unacknowledgedCount === 'number'
438
+ ? inlineBotFeedback.unacknowledgedCount
439
+ : 0;
440
+ const botFeedbackFetchError = inlineBotFeedback?.fetchError === true;
398
441
  const gateChecks = requiredCheckSummary ?? checks;
399
442
  const gateChecksSource = requiredCheckSummary ? 'required' : 'rollup';
400
443
  const reviewDecision = normalizeEnum(pr.reviewDecision);
@@ -422,9 +465,15 @@ export function buildStatusSnapshot(response, requiredChecks = null) {
422
465
  if (BLOCKED_REVIEW_DECISIONS.has(reviewDecision)) {
423
466
  gateReasons.push(`review=${reviewDecision}`);
424
467
  }
425
- if (unresolvedThreadCount > 0) {
468
+ if (hasUnresolvedThread) {
426
469
  gateReasons.push(`unresolved_threads=${unresolvedThreadCount}`);
427
470
  }
471
+ if (botFeedbackFetchError) {
472
+ gateReasons.push('bot_feedback=unknown');
473
+ }
474
+ else if (unacknowledgedBotFeedbackCount > 0) {
475
+ gateReasons.push(`unacknowledged_bot_feedback=${unacknowledgedBotFeedbackCount}`);
476
+ }
428
477
  return {
429
478
  number: Number(pr.number),
430
479
  url: typeof pr.url === 'string' ? pr.url : null,
@@ -437,6 +486,8 @@ export function buildStatusSnapshot(response, requiredChecks = null) {
437
486
  labels,
438
487
  hasDoNotMergeLabel,
439
488
  unresolvedThreadCount,
489
+ unacknowledgedBotFeedbackCount,
490
+ botFeedbackFetchError,
440
491
  checks,
441
492
  requiredChecks: requiredCheckSummary,
442
493
  gateChecksSource,
@@ -467,6 +518,8 @@ function formatStatusLine(snapshot, quietRemainingMs) {
467
518
  `required_checks_pending=${requiredChecks ? requiredChecks.pending.length : 'n/a'}`,
468
519
  `required_checks_failed=${requiredChecks ? requiredChecks.failed.length : 'n/a'}`,
469
520
  `unresolved_threads=${snapshot.unresolvedThreadCount}`,
521
+ `unack_bot_feedback=${snapshot.unacknowledgedBotFeedbackCount}`,
522
+ `bot_feedback_fetch_error=${snapshot.botFeedbackFetchError ? 'yes' : 'no'}`,
470
523
  `quiet_remaining=${formatDuration(quietRemainingMs)}`,
471
524
  `blocked_by=${reasons}`,
472
525
  `pending=[${pendingNames}]`,
@@ -501,6 +554,77 @@ async function fetchRequiredChecks(owner, repo, prNumber) {
501
554
  };
502
555
  }
503
556
  }
557
+ function flattenReviewCommentPages(pagesPayload) {
558
+ if (!Array.isArray(pagesPayload)) {
559
+ return [];
560
+ }
561
+ const comments = [];
562
+ for (const page of pagesPayload) {
563
+ if (Array.isArray(page)) {
564
+ comments.push(...page);
565
+ continue;
566
+ }
567
+ if (page && typeof page === 'object') {
568
+ comments.push(page);
569
+ }
570
+ }
571
+ return comments;
572
+ }
573
+ async function fetchInlineBotFeedback(owner, repo, prNumber, headOid) {
574
+ if (!headOid) {
575
+ return { fetchError: false, unacknowledgedCount: 0 };
576
+ }
577
+ try {
578
+ const pagedPayload = await runGhJsonSlurped([
579
+ 'api',
580
+ `repos/${owner}/${repo}/pulls/${prNumber}/comments`
581
+ ]);
582
+ const comments = flattenReviewCommentPages(pagedPayload);
583
+ const repliesByParentId = new Map();
584
+ for (const comment of comments) {
585
+ if (!comment || typeof comment !== 'object') {
586
+ continue;
587
+ }
588
+ const parentId = Number(comment.in_reply_to_id);
589
+ if (!Number.isInteger(parentId) || parentId <= 0) {
590
+ continue;
591
+ }
592
+ const bucket = repliesByParentId.get(parentId) ?? [];
593
+ bucket.push(comment);
594
+ repliesByParentId.set(parentId, bucket);
595
+ }
596
+ let unacknowledgedCount = 0;
597
+ for (const comment of comments) {
598
+ if (!comment || typeof comment !== 'object') {
599
+ continue;
600
+ }
601
+ const commentId = Number(comment.id);
602
+ if (!Number.isInteger(commentId) || commentId <= 0) {
603
+ continue;
604
+ }
605
+ if (comment.in_reply_to_id !== null && comment.in_reply_to_id !== undefined) {
606
+ continue;
607
+ }
608
+ if (!isActionableBot(comment.user?.login)) {
609
+ continue;
610
+ }
611
+ const commitId = typeof comment.commit_id === 'string' ? comment.commit_id : null;
612
+ const originalCommitId = typeof comment.original_commit_id === 'string' ? comment.original_commit_id : null;
613
+ if (commitId !== headOid && originalCommitId !== headOid) {
614
+ continue;
615
+ }
616
+ const replies = repliesByParentId.get(commentId) ?? [];
617
+ const hasHumanReply = replies.some((reply) => isHumanReviewActor(reply?.user));
618
+ if (!hasHumanReply) {
619
+ unacknowledgedCount += 1;
620
+ }
621
+ }
622
+ return { fetchError: false, unacknowledgedCount };
623
+ }
624
+ catch {
625
+ return { fetchError: true, unacknowledgedCount: 0 };
626
+ }
627
+ }
504
628
  async function fetchSnapshot(owner, repo, prNumber, previousRequiredChecksCache = null) {
505
629
  const response = await runGhJson([
506
630
  'api',
@@ -518,8 +642,9 @@ async function fetchSnapshot(owner, repo, prNumber, previousRequiredChecksCache
518
642
  const previousRequiredChecks = resolveCachedRequiredChecksSummary(previousRequiredChecksCache, currentHeadOid);
519
643
  const requiredChecksResult = await fetchRequiredChecks(owner, repo, prNumber);
520
644
  const requiredChecks = resolveRequiredChecksSummary(requiredChecksResult.summary, previousRequiredChecks, requiredChecksResult.fetchError);
645
+ const inlineBotFeedback = await fetchInlineBotFeedback(owner, repo, prNumber, currentHeadOid);
521
646
  return {
522
- snapshot: buildStatusSnapshot(response, requiredChecks),
647
+ snapshot: buildStatusSnapshot(response, requiredChecks, inlineBotFeedback),
523
648
  requiredChecksForNextPoll: requiredChecks
524
649
  ? {
525
650
  headOid: currentHeadOid,
package/docs/README.md CHANGED
@@ -101,7 +101,8 @@ Use `npx @kbediako/codex-orchestrator resume --run <run-id>` to continue interru
101
101
  ## Companion Package Commands
102
102
  - `codex-orchestrator mcp serve [--repo <path>] [--dry-run] [-- <extra args>]`: launch the MCP stdio server (delegates to `codex mcp-server`; stdout guard keeps protocol-only output, logs to stderr).
103
103
  - `codex-orchestrator init codex [--cwd <path>] [--force]`: copy starter templates into a repo (includes `mcp-client.json` and `AGENTS.md`; no overwrite unless `--force`).
104
- - `codex-orchestrator setup [--yes]`: one-shot bootstrap for downstream users (installs bundled skills and configures delegation + DevTools wiring).
104
+ - `codex-orchestrator setup [--yes]`: one-shot bootstrap for downstream users (installs bundled skills, configures delegation + DevTools wiring, and prints policy/usage guidance).
105
+ - `codex-orchestrator flow [--task <task-id>]`: runs `docs-review` then `implementation-gate` in sequence; stops on the first failure.
105
106
  - `codex-orchestrator doctor [--format json] [--usage] [--apply]`: check optional tooling dependencies plus collab/cloud/delegation readiness and print enablement commands. `--usage` appends a local usage snapshot (scans `.runs/`). `--apply` plans/applies quick fixes (use with `--yes`).
106
107
  - `codex-orchestrator devtools setup [--yes]`: print DevTools MCP setup instructions (`--yes` applies `codex mcp add ...`).
107
108
  - `codex-orchestrator delegation setup [--yes]`: configure delegation MCP wiring (`--yes` applies `codex mcp add ...`).
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kbediako/codex-orchestrator",
3
- "version": "0.1.30",
3
+ "version": "0.1.32",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -52,6 +52,19 @@ Skip subagents when all conditions are true:
52
52
  - Include objective, scope, constraints, acceptance criteria, and expected output format.
53
53
  - Require concise summaries and evidence paths; avoid long logs in chat.
54
54
 
55
+ 4a) Declare write policy and track ownership against git status
56
+ - Capture a baseline before spawning: `git status --porcelain`.
57
+ - Declare each stream as either:
58
+ - `read-only` (research/scout/review), or
59
+ - `write-enabled` (implementation/tests).
60
+ - For `read-only` streams, include an explicit "no file edits" constraint.
61
+ - After each `wait`, compare status against baseline and map changed files to stream ownership.
62
+ - Treat in-scope edits from active write-enabled streams as expected delegated output.
63
+ - Escalate only for out-of-scope changes, overlapping ownership collisions, or edits appearing without an active stream owner.
64
+ - If the agent surfaces a generic "unexpected local edits" pause prompt, treat it as a classification step: keep and continue when edits are in-scope; escalate only violations.
65
+ - Prefer the built-in helper when available (`node scripts/subagent-edit-guard.mjs ...`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is not present in the current repo, use the same baseline/scope logic manually.
66
+ - If `finish` exits non-zero, escalate only the reported `out_of_scope_paths` / `violations`.
67
+
55
68
  5) Run streams in parallel when independent
56
69
  - Spawn multiple subagents for independent streams.
57
70
  - Wait for all subagents to finish before final synthesis.
@@ -159,6 +172,7 @@ Do not treat wrapper handoff-only output as a completed review.
159
172
  - Do not skip delegation solely because there is only one implementation stream; single-stream delegation is valid for context offload.
160
173
  - Do not rely on human-readable agent names in TUI labels for control flow; use stream ownership and evidence paths as source of truth.
161
174
  - Do not end the parent work with unclosed collab agent ids.
175
+ - Do not treat every delegated edit as "unexpected"; first verify whether the edit belongs to an active stream owner.
162
176
 
163
177
  ## Completion checklist
164
178
 
@@ -30,6 +30,7 @@ Out of scope:
30
30
  Ownership:
31
31
  - Files/paths you may edit: <paths>
32
32
  - Files/paths you must not edit: <paths>
33
+ - Write policy: read-only | write-enabled
33
34
 
34
35
  Acceptance criteria:
35
36
  - <bullet 1>
@@ -59,6 +60,7 @@ Keep the response concise. Put detailed notes in a file and return the path.
59
60
 
60
61
  - Include enough context so the subagent can act without back-and-forth.
61
62
  - Include explicit file ownership boundaries.
63
+ - Include explicit write policy (`read-only` or `write-enabled`).
62
64
  - Include a concrete output format and validation expectations.
63
65
  - Include at least one "do not do" constraint to prevent drift.
64
66
  - If task is review-only, explicitly prohibit implementation edits.
@@ -87,4 +89,3 @@ Objective: validate <existing change>.
87
89
  Deliverable: failing/passing checks, defect list by severity, and minimal fix suggestions.
88
90
  No broad refactors.
89
91
  ```
90
-
@@ -23,6 +23,12 @@ Collab multi-agent mode is separate from delegation. For symbolic RLM subcalls t
23
23
  - **Lifecycle is mandatory:** for every successful `spawn_agent`, run `wait` and then `close_agent` for that same id before task completion.
24
24
  - Keep a local list of spawned ids and run a final cleanup pass so no agent id is left unclosed on timeout/error paths.
25
25
  - If spawn fails with `agent thread limit reached`, stop spawning, close any known ids first, then surface a concise recovery note.
26
+ - In a shared checkout, spawned subagents may produce file edits. Treat edits inside that stream's declared ownership as expected delegated output, not external interference.
27
+ - Before spawning, capture a baseline (`git status --porcelain`). After `wait`, diff against baseline and classify file changes by stream ownership.
28
+ - Escalate "unexpected local edits" only when changed files are outside all active stream scopes (or when no subagent was active).
29
+ - If a generic safety prompt appears after delegation (for example "unexpected local edits"), run scope classification first; when edits are in-scope, keep them and continue without user escalation.
30
+ - For scout/research streams, set an explicit no-write constraint and verify the post-run status matches baseline.
31
+ - Prefer `scripts/subagent-edit-guard.mjs` for low-friction enforcement when the helper exists in the repo (`start` before spawn, `finish` after `wait`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is absent, apply the same baseline/scope checks manually.
26
32
 
27
33
  ## Quick-start workflow (canned)
28
34
 
@@ -186,3 +192,4 @@ repeat:
186
192
  - **Collab payload mismatch:** `spawn_agent` rejects calls that include both `message` and `items`.
187
193
  - **Collab UI assumptions:** agent rows/records are id-based today; use explicit stream role text in prompts/artifacts for operator clarity.
188
194
  - **Collab lifecycle leaks:** missing `close_agent` calls accumulate open threads and can trigger `agent thread limit reached`; always finish `spawn -> wait -> close_agent` per id.
195
+ - **False "unexpected edits" stops:** when a live subagent owns the touched files, treat those edits as expected output and continue with scope-aware review.
@@ -0,0 +1,62 @@
1
+ ---
2
+ name: elegance-review
3
+ description: Run an explicit post-implementation elegance/minimality pass to keep the smallest correct solution and remove avoidable complexity before handoff.
4
+ ---
5
+
6
+ # Elegance Review
7
+
8
+ ## Overview
9
+
10
+ Use this skill after non-trivial edits to verify the implementation is minimal, coherent, and easy to maintain. This is a simplification pass, not a feature-expansion pass.
11
+
12
+ ## Auto-trigger policy (required)
13
+
14
+ Run this skill whenever any condition is true:
15
+ - You changed behavior across about 2+ files.
16
+ - You added a new helper/module/pathway and could possibly collapse it.
17
+ - You finished addressing review feedback and are preparing to hand off.
18
+ - You are about to recommend merge/release.
19
+ - The user explicitly asks for elegance/minimality/overengineering checks.
20
+
21
+ ## Quick start
22
+
23
+ Focused uncommitted review:
24
+ ```bash
25
+ codex review --uncommitted "Find avoidable complexity, duplicate abstractions, and unnecessary indirection. Prioritize simplifications that preserve behavior."
26
+ ```
27
+
28
+ Diff-vs-base review:
29
+ ```bash
30
+ codex review --base <branch> "Focus on smallest viable design and maintenance cost."
31
+ ```
32
+
33
+ ## Workflow
34
+
35
+ 1) Lock invariants first
36
+ - State what behavior cannot change.
37
+ - Keep tests/acceptance criteria as the guardrail.
38
+
39
+ 2) Identify complexity hotspots
40
+ - Unused abstractions, wrappers, or config layers.
41
+ - Duplicate logic that can be consolidated safely.
42
+ - Over-generalized interfaces used in one place only.
43
+ - Extra branching/state that can be simplified.
44
+
45
+ 3) Simplify in smallest safe steps
46
+ - Prefer deleting code over adding knobs.
47
+ - Collapse one-off abstractions into local logic when clearer.
48
+ - Keep naming and control flow direct.
49
+
50
+ 4) Re-validate
51
+ - Run targeted tests/lint for touched areas.
52
+ - Confirm no behavior regressions.
53
+
54
+ 5) Record result
55
+ - Report what was simplified.
56
+ - Report residual complexity that is intentionally kept and why.
57
+
58
+ ## Guardrails
59
+
60
+ - Do not broaden scope into unrelated refactors.
61
+ - Do not trade readability for cleverness.
62
+ - If `codex review` is unavailable, run a manual checklist using the same criteria and note that fallback.