@kbediako/codex-orchestrator 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -132,6 +132,18 @@ export async function runDoctorUsage(options = {}) {
132
132
  .sort((a, b) => b[1] - a[1])
133
133
  .slice(0, 10)
134
134
  .map(([id, runs]) => ({ id, runs }));
135
+ const execRuns = pipelines.get('exec') ?? 0;
136
+ const gateRuns = (pipelines.get('docs-review') ?? 0) + (pipelines.get('implementation-gate') ?? 0);
137
+ const execSharePct = statusCounts.total > 0 ? Math.round((execRuns / statusCounts.total) * 1000) / 10 : 0;
138
+ const gateSharePct = statusCounts.total > 0 ? Math.round((gateRuns / statusCounts.total) * 1000) / 10 : 0;
139
+ const adoptionRecommendations = buildAdoptionRecommendations({
140
+ totalRuns: statusCounts.total,
141
+ execRuns,
142
+ gateRuns,
143
+ rlmRuns,
144
+ cloudRuns,
145
+ collabRunsWithToolCalls
146
+ });
135
147
  const delegationErrors = [];
136
148
  let activeWithSubagents = 0;
137
149
  let totalSubagentManifests = 0;
@@ -193,6 +205,13 @@ export async function runDoctorUsage(options = {}) {
193
205
  pipelines: {
194
206
  total: pipelines.size,
195
207
  top: pipelineTop
208
+ },
209
+ adoption: {
210
+ exec_runs: execRuns,
211
+ exec_share_pct: execSharePct,
212
+ gate_runs: gateRuns,
213
+ gate_share_pct: gateSharePct,
214
+ recommendations: adoptionRecommendations
196
215
  }
197
216
  };
198
217
  }
@@ -235,6 +254,14 @@ export function formatDoctorUsageSummary(result) {
235
254
  lines.push(` - ${entry.id}: ${entry.runs}`);
236
255
  }
237
256
  }
257
+ lines.push(`Pipeline adoption: exec=${result.adoption.exec_runs} (${result.adoption.exec_share_pct}%), ` +
258
+ `docs-review+implementation-gate=${result.adoption.gate_runs} (${result.adoption.gate_share_pct}%)`);
259
+ if (result.adoption.recommendations.length > 0) {
260
+ lines.push('Adoption hints:');
261
+ for (const recommendation of result.adoption.recommendations) {
262
+ lines.push(` - ${recommendation}`);
263
+ }
264
+ }
238
265
  if (result.delegation.errors.length > 0) {
239
266
  lines.push('Delegation scan warnings:');
240
267
  for (const warning of result.delegation.errors.slice(0, 3)) {
@@ -243,6 +270,29 @@ export function formatDoctorUsageSummary(result) {
243
270
  }
244
271
  return lines;
245
272
  }
273
+ function buildAdoptionRecommendations(params) {
274
+ if (params.totalRuns <= 0) {
275
+ return [];
276
+ }
277
+ const hints = [];
278
+ const execShare = params.execRuns / params.totalRuns;
279
+ if (execShare >= 0.6) {
280
+ hints.push('Most runs are plain exec; prefer `codex-orchestrator start docs-review` or `start implementation-gate` for manifest-backed guardrails.');
281
+ }
282
+ if (params.gateRuns === 0) {
283
+ hints.push('No gate pipelines detected; use docs-review before implementation and implementation-gate before handoff.');
284
+ }
285
+ if (params.rlmRuns === 0) {
286
+ hints.push('No RLM runs detected; try `codex-orchestrator rlm --collab auto "<goal>"` for long-horizon or ambiguous tasks.');
287
+ }
288
+ if (params.cloudRuns === 0) {
289
+ hints.push('No cloud runs detected; configure CODEX_CLOUD_ENV_ID and run `codex-orchestrator start <pipeline> --cloud --target <stage-id>` for long-running stages.');
290
+ }
291
+ if (params.rlmRuns > 0 && params.collabRunsWithToolCalls === 0) {
292
+ hints.push('RLM is used without collab activity; ensure collab is enabled (`codex features enable collab`).');
293
+ }
294
+ return hints.slice(0, 3);
295
+ }
246
296
  function extractRunIdFromManifestPath(manifestPath) {
247
297
  if (!manifestPath) {
248
298
  return null;
@@ -9,6 +9,7 @@ import { resolveEnvironmentPaths } from '../../../scripts/lib/run-manifests.js';
9
9
  import { normalizeEnvironmentPaths } from './run/environment.js';
10
10
  import { bootstrapManifest, loadManifest, updateHeartbeat, finalizeStatus, appendSummary, ensureGuardrailStatus, resetForResume, recordResumeEvent } from './run/manifest.js';
11
11
  import { ManifestPersister, persistManifest } from './run/manifestPersister.js';
12
+ import { resolveRuntimeActivitySnapshot } from './run/runtimeActivity.js';
12
13
  import { generateRunId } from './utils/runId.js';
13
14
  import { runCommandStage } from './services/commandRunner.js';
14
15
  import { appendMetricsEntry } from './metrics/metricsRecorder.js';
@@ -437,12 +438,13 @@ export class CodexOrchestrator {
437
438
  async status(options) {
438
439
  const env = this.baseEnv;
439
440
  const { manifest, paths } = await loadManifest(env, options.runId);
441
+ const activity = await resolveRuntimeActivitySnapshot(manifest, paths);
440
442
  if (options.format === 'json') {
441
- const payload = this.buildStatusPayload(env, manifest, paths);
443
+ const payload = this.buildStatusPayload(env, manifest, paths, activity);
442
444
  process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
443
445
  return manifest;
444
446
  }
445
- this.renderStatus(manifest);
447
+ this.renderStatus(manifest, activity);
446
448
  return manifest;
447
449
  }
448
450
  async plan(options = {}) {
@@ -1121,7 +1123,7 @@ export class CodexOrchestrator {
1121
1123
  throw new Error('Resume token mismatch.');
1122
1124
  }
1123
1125
  }
1124
- buildStatusPayload(env, manifest, paths) {
1126
+ buildStatusPayload(env, manifest, paths, activity) {
1125
1127
  return {
1126
1128
  run_id: manifest.run_id,
1127
1129
  status: manifest.status,
@@ -1132,17 +1134,24 @@ export class CodexOrchestrator {
1132
1134
  artifact_root: manifest.artifact_root,
1133
1135
  log_path: manifest.log_path,
1134
1136
  heartbeat_at: manifest.heartbeat_at,
1137
+ activity,
1135
1138
  commands: manifest.commands,
1136
1139
  child_runs: manifest.child_runs,
1137
1140
  cloud_execution: manifest.cloud_execution ?? null
1138
1141
  };
1139
1142
  }
1140
- renderStatus(manifest) {
1143
+ renderStatus(manifest, activity) {
1141
1144
  logger.info(`Run: ${manifest.run_id}`);
1142
1145
  logger.info(`Status: ${manifest.status}${manifest.status_detail ? ` (${manifest.status_detail})` : ''}`);
1143
1146
  logger.info(`Started: ${manifest.started_at}`);
1144
1147
  logger.info(`Completed: ${manifest.completed_at ?? 'in-progress'}`);
1145
1148
  logger.info(`Manifest: ${manifest.artifact_root}/manifest.json`);
1149
+ if (activity.observed_at) {
1150
+ const staleSuffix = activity.stale === null ? '' : activity.stale ? ' [stale]' : ' [active]';
1151
+ const sourceLabel = activity.observed_source ? ` via ${activity.observed_source}` : '';
1152
+ const ageLabel = activity.age_seconds === null ? '' : ` age=${activity.age_seconds}s`;
1153
+ logger.info(`Activity: ${activity.observed_at}${sourceLabel}${ageLabel}${staleSuffix}`);
1154
+ }
1146
1155
  if (manifest.cloud_execution?.task_id) {
1147
1156
  logger.info(`Cloud: ${manifest.cloud_execution.task_id} [${manifest.cloud_execution.status}]` +
1148
1157
  (manifest.cloud_execution.status_url ? ` ${manifest.cloud_execution.status_url}` : ''));
@@ -0,0 +1,79 @@
1
+ import { readFile, stat } from 'node:fs/promises';
2
+ export async function resolveRuntimeActivitySnapshot(manifest, paths, options = {}) {
3
+ const manifestHeartbeat = normalizeTimestamp(manifest.heartbeat_at);
4
+ const heartbeatFileAt = await readHeartbeatTimestamp(paths.heartbeatPath);
5
+ const runnerLogMtime = await readMtimeIso(paths.logPath);
6
+ const candidates = [];
7
+ if (manifestHeartbeat) {
8
+ candidates.push({ source: 'manifest', ...manifestHeartbeat });
9
+ }
10
+ const heartbeatCandidate = normalizeTimestamp(heartbeatFileAt);
11
+ if (heartbeatCandidate) {
12
+ candidates.push({ source: 'heartbeat_file', ...heartbeatCandidate });
13
+ }
14
+ const logCandidate = normalizeTimestamp(runnerLogMtime);
15
+ if (logCandidate) {
16
+ candidates.push({ source: 'runner_log', ...logCandidate });
17
+ }
18
+ const latest = pickLatest(candidates);
19
+ const nowMs = Number.isFinite(options.nowMs) ? Number(options.nowMs) : Date.now();
20
+ const staleThresholdSeconds = Number.isFinite(manifest.heartbeat_stale_after_seconds) && manifest.heartbeat_stale_after_seconds > 0
21
+ ? Math.floor(manifest.heartbeat_stale_after_seconds)
22
+ : null;
23
+ let stale = null;
24
+ let ageSeconds = null;
25
+ if (manifest.status === 'in_progress' && latest && staleThresholdSeconds !== null) {
26
+ ageSeconds = Math.max(0, Math.floor((nowMs - latest.ms) / 1000));
27
+ stale = ageSeconds > staleThresholdSeconds;
28
+ }
29
+ return {
30
+ manifest_heartbeat_at: manifestHeartbeat?.iso ?? null,
31
+ heartbeat_file_at: heartbeatCandidate?.iso ?? null,
32
+ runner_log_mtime_at: logCandidate?.iso ?? null,
33
+ observed_at: latest?.iso ?? null,
34
+ observed_source: latest?.source ?? null,
35
+ stale,
36
+ stale_threshold_seconds: staleThresholdSeconds,
37
+ age_seconds: ageSeconds
38
+ };
39
+ }
40
+ async function readHeartbeatTimestamp(heartbeatPath) {
41
+ try {
42
+ const raw = await readFile(heartbeatPath, 'utf8');
43
+ const trimmed = raw.trim();
44
+ return trimmed.length > 0 ? trimmed : null;
45
+ }
46
+ catch {
47
+ return null;
48
+ }
49
+ }
50
+ async function readMtimeIso(filePath) {
51
+ try {
52
+ const fileStat = await stat(filePath);
53
+ return fileStat.mtime.toISOString();
54
+ }
55
+ catch {
56
+ return null;
57
+ }
58
+ }
59
+ function normalizeTimestamp(value) {
60
+ if (typeof value !== 'string') {
61
+ return null;
62
+ }
63
+ const trimmed = value.trim();
64
+ if (!trimmed) {
65
+ return null;
66
+ }
67
+ const ms = Date.parse(trimmed);
68
+ if (!Number.isFinite(ms)) {
69
+ return null;
70
+ }
71
+ return { iso: new Date(ms).toISOString(), ms };
72
+ }
73
+ function pickLatest(candidates) {
74
+ if (candidates.length === 0) {
75
+ return null;
76
+ }
77
+ candidates.sort((a, b) => b.ms - a.ms);
78
+ return candidates[0] ?? null;
79
+ }
@@ -171,7 +171,6 @@ export async function runCommandStage(context, hooks = {}) {
171
171
  try {
172
172
  result = await runner.run({
173
173
  command: stage.command,
174
- args: [],
175
174
  cwd: stage.cwd ?? env.repoRoot,
176
175
  env: execEnv,
177
176
  sessionId: sessionId ?? undefined,
@@ -19,10 +19,13 @@ const sessionManager = new ExecSessionManager({
19
19
  const privacyGuard = new PrivacyGuard({ mode: resolvePrivacyGuardMode() });
20
20
  const handleService = new RemoteExecHandleService({ guard: privacyGuard, now: () => new Date() });
21
21
  const cliExecutor = async (request) => {
22
+ const hasExplicitArgs = Array.isArray(request.args);
22
23
  const child = spawn(request.command, request.args ?? [], {
23
24
  cwd: request.cwd,
24
25
  env: request.env,
25
- shell: true,
26
+ // Use shell mode only for string-style commands. When args are provided we
27
+ // want argv semantics (`cmd arg1 arg2`) rather than `sh -c cmd` behavior.
28
+ shell: !hasExplicitArgs,
26
29
  stdio: ['ignore', 'pipe', 'pipe']
27
30
  });
28
31
  if (!child.stdout || !child.stderr) {
@@ -27,7 +27,8 @@ export class UnifiedExecRunner {
27
27
  };
28
28
  }
29
29
  async run(options) {
30
- const args = options.args ?? [];
30
+ const args = options.args;
31
+ const resolvedArgs = args ?? [];
31
32
  const invocationId = options.invocationId ?? this.idGenerator();
32
33
  const correlationId = this.idGenerator();
33
34
  const issuedHandle = this.handleService ? this.handleService.issueHandle(correlationId) : undefined;
@@ -49,7 +50,7 @@ export class UnifiedExecRunner {
49
50
  const metadata = {
50
51
  ...options.metadata,
51
52
  command: options.command,
52
- args,
53
+ args: resolvedArgs,
53
54
  cwd: options.cwd,
54
55
  sessionId: lease.id,
55
56
  correlationId,
@@ -81,7 +82,7 @@ export class UnifiedExecRunner {
81
82
  attempt,
82
83
  correlationId,
83
84
  command: options.command,
84
- args,
85
+ args: resolvedArgs,
85
86
  cwd: options.cwd,
86
87
  sandboxState,
87
88
  sessionId: lease.id,
@@ -403,7 +404,7 @@ function getErrorMessage(error) {
403
404
  return String(error);
404
405
  }
405
406
  const defaultExecutor = async (request) => {
406
- const child = spawn(request.command, request.args, {
407
+ const child = spawn(request.command, request.args ?? [], {
407
408
  cwd: request.cwd,
408
409
  env: request.env,
409
410
  stdio: ['ignore', 'pipe', 'pipe']
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kbediako/codex-orchestrator",
3
- "version": "0.1.30",
3
+ "version": "0.1.31",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -52,6 +52,19 @@ Skip subagents when all conditions are true:
52
52
  - Include objective, scope, constraints, acceptance criteria, and expected output format.
53
53
  - Require concise summaries and evidence paths; avoid long logs in chat.
54
54
 
55
+ 4a) Declare write policy and track ownership against git status
56
+ - Capture a baseline before spawning: `git status --porcelain`.
57
+ - Declare each stream as either:
58
+ - `read-only` (research/scout/review), or
59
+ - `write-enabled` (implementation/tests).
60
+ - For `read-only` streams, include an explicit "no file edits" constraint.
61
+ - After each `wait`, compare status against baseline and map changed files to stream ownership.
62
+ - Treat in-scope edits from active write-enabled streams as expected delegated output.
63
+ - Escalate only for out-of-scope changes, overlapping ownership collisions, or edits appearing without an active stream owner.
64
+ - If the agent surfaces a generic "unexpected local edits" pause prompt, treat it as a classification step: keep and continue when edits are in-scope; escalate only violations.
65
+ - Prefer the built-in helper when available (`node scripts/subagent-edit-guard.mjs ...`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is not present in the current repo, use the same baseline/scope logic manually.
66
+ - If `finish` exits non-zero, escalate only the reported `out_of_scope_paths` / `violations`.
67
+
55
68
  5) Run streams in parallel when independent
56
69
  - Spawn multiple subagents for independent streams.
57
70
  - Wait for all subagents to finish before final synthesis.
@@ -159,6 +172,7 @@ Do not treat wrapper handoff-only output as a completed review.
159
172
  - Do not skip delegation solely because there is only one implementation stream; single-stream delegation is valid for context offload.
160
173
  - Do not rely on human-readable agent names in TUI labels for control flow; use stream ownership and evidence paths as source of truth.
161
174
  - Do not end the parent work with unclosed collab agent ids.
175
+ - Do not treat every delegated edit as "unexpected"; first verify whether the edit belongs to an active stream owner.
162
176
 
163
177
  ## Completion checklist
164
178
 
@@ -30,6 +30,7 @@ Out of scope:
30
30
  Ownership:
31
31
  - Files/paths you may edit: <paths>
32
32
  - Files/paths you must not edit: <paths>
33
+ - Write policy: read-only | write-enabled
33
34
 
34
35
  Acceptance criteria:
35
36
  - <bullet 1>
@@ -59,6 +60,7 @@ Keep the response concise. Put detailed notes in a file and return the path.
59
60
 
60
61
  - Include enough context so the subagent can act without back-and-forth.
61
62
  - Include explicit file ownership boundaries.
63
+ - Include explicit write policy (`read-only` or `write-enabled`).
62
64
  - Include a concrete output format and validation expectations.
63
65
  - Include at least one "do not do" constraint to prevent drift.
64
66
  - If task is review-only, explicitly prohibit implementation edits.
@@ -87,4 +89,3 @@ Objective: validate <existing change>.
87
89
  Deliverable: failing/passing checks, defect list by severity, and minimal fix suggestions.
88
90
  No broad refactors.
89
91
  ```
90
-
@@ -23,6 +23,12 @@ Collab multi-agent mode is separate from delegation. For symbolic RLM subcalls t
23
23
  - **Lifecycle is mandatory:** for every successful `spawn_agent`, run `wait` and then `close_agent` for that same id before task completion.
24
24
  - Keep a local list of spawned ids and run a final cleanup pass so no agent id is left unclosed on timeout/error paths.
25
25
  - If spawn fails with `agent thread limit reached`, stop spawning, close any known ids first, then surface a concise recovery note.
26
+ - In a shared checkout, spawned subagents may produce file edits. Treat edits inside that stream's declared ownership as expected delegated output, not external interference.
27
+ - Before spawning, capture a baseline (`git status --porcelain`). After `wait`, diff against baseline and classify file changes by stream ownership.
28
+ - Escalate "unexpected local edits" only when changed files are outside all active stream scopes (or when no subagent was active).
29
+ - If a generic safety prompt appears after delegation (for example "unexpected local edits"), run scope classification first; when edits are in-scope, keep them and continue without user escalation.
30
+ - For scout/research streams, set an explicit no-write constraint and verify the post-run status matches baseline.
31
+ - Prefer `scripts/subagent-edit-guard.mjs` for low-friction enforcement when the helper exists in the repo (`start` before spawn, `finish` after `wait`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is absent, apply the same baseline/scope checks manually.
26
32
 
27
33
  ## Quick-start workflow (canned)
28
34
 
@@ -186,3 +192,4 @@ repeat:
186
192
  - **Collab payload mismatch:** `spawn_agent` rejects calls that include both `message` and `items`.
187
193
  - **Collab UI assumptions:** agent rows/records are id-based today; use explicit stream role text in prompts/artifacts for operator clarity.
188
194
  - **Collab lifecycle leaks:** missing `close_agent` calls accumulate open threads and can trigger `agent thread limit reached`; always finish `spawn -> wait -> close_agent` per id.
195
+ - **False "unexpected edits" stops:** when a live subagent owns the touched files, treat those edits as expected output and continue with scope-aware review.
@@ -0,0 +1,62 @@
1
+ ---
2
+ name: elegance-review
3
+ description: Run an explicit post-implementation elegance/minimality pass to keep the smallest correct solution and remove avoidable complexity before handoff.
4
+ ---
5
+
6
+ # Elegance Review
7
+
8
+ ## Overview
9
+
10
+ Use this skill after non-trivial edits to verify the implementation is minimal, coherent, and easy to maintain. This is a simplification pass, not a feature-expansion pass.
11
+
12
+ ## Auto-trigger policy (required)
13
+
14
+ Run this skill whenever any condition is true:
15
+ - You changed behavior across about 2+ files.
16
+ - You added a new helper/module/pathway and could possibly collapse it.
17
+ - You finished addressing review feedback and are preparing to hand off.
18
+ - You are about to recommend merge/release.
19
+ - The user explicitly asks for elegance/minimality/overengineering checks.
20
+
21
+ ## Quick start
22
+
23
+ Focused uncommitted review:
24
+ ```bash
25
+ codex review --uncommitted "Find avoidable complexity, duplicate abstractions, and unnecessary indirection. Prioritize simplifications that preserve behavior."
26
+ ```
27
+
28
+ Diff-vs-base review:
29
+ ```bash
30
+ codex review --base <branch> "Focus on smallest viable design and maintenance cost."
31
+ ```
32
+
33
+ ## Workflow
34
+
35
+ 1) Lock invariants first
36
+ - State what behavior cannot change.
37
+ - Keep tests/acceptance criteria as the guardrail.
38
+
39
+ 2) Identify complexity hotspots
40
+ - Unused abstractions, wrappers, or config layers.
41
+ - Duplicate logic that can be consolidated safely.
42
+ - Over-generalized interfaces used in one place only.
43
+ - Extra branching/state that can be simplified.
44
+
45
+ 3) Simplify in smallest safe steps
46
+ - Prefer deleting code over adding knobs.
47
+ - Collapse one-off abstractions into local logic when clearer.
48
+ - Keep naming and control flow direct.
49
+
50
+ 4) Re-validate
51
+ - Run targeted tests/lint for touched areas.
52
+ - Confirm no behavior regressions.
53
+
54
+ 5) Record result
55
+ - Report what was simplified.
56
+ - Report residual complexity that is intentionally kept and why.
57
+
58
+ ## Guardrails
59
+
60
+ - Do not broaden scope into unrelated refactors.
61
+ - Do not trade readability for cleverness.
62
+ - If `codex review` is unavailable, run a manual checklist using the same criteria and note that fallback.