npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.29 → 0.1.31 - Mend

@kbediako/codex-orchestrator 0.1.29 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/orchestrator/src/cli/doctorUsage.js CHANGED Viewed

@@ -132,6 +132,18 @@ export async function runDoctorUsage(options = {}) {
         .sort((a, b) => b[1] - a[1])
         .slice(0, 10)
         .map(([id, runs]) => ({ id, runs }));
+    const execRuns = pipelines.get('exec') ?? 0;
+    const gateRuns = (pipelines.get('docs-review') ?? 0) + (pipelines.get('implementation-gate') ?? 0);
+    const execSharePct = statusCounts.total > 0 ? Math.round((execRuns / statusCounts.total) * 1000) / 10 : 0;
+    const gateSharePct = statusCounts.total > 0 ? Math.round((gateRuns / statusCounts.total) * 1000) / 10 : 0;
+    const adoptionRecommendations = buildAdoptionRecommendations({
+        totalRuns: statusCounts.total,
+        execRuns,
+        gateRuns,
+        rlmRuns,
+        cloudRuns,
+        collabRunsWithToolCalls
+    });
     const delegationErrors = [];
     let activeWithSubagents = 0;
     let totalSubagentManifests = 0;
@@ -193,6 +205,13 @@ export async function runDoctorUsage(options = {}) {
         pipelines: {
             total: pipelines.size,
             top: pipelineTop
+        },
+        adoption: {
+            exec_runs: execRuns,
+            exec_share_pct: execSharePct,
+            gate_runs: gateRuns,
+            gate_share_pct: gateSharePct,
+            recommendations: adoptionRecommendations
         }
     };
 }
@@ -235,6 +254,14 @@ export function formatDoctorUsageSummary(result) {
             lines.push(`  - ${entry.id}: ${entry.runs}`);
         }
     }
+    lines.push(`Pipeline adoption: exec=${result.adoption.exec_runs} (${result.adoption.exec_share_pct}%), ` +
+        `docs-review+implementation-gate=${result.adoption.gate_runs} (${result.adoption.gate_share_pct}%)`);
+    if (result.adoption.recommendations.length > 0) {
+        lines.push('Adoption hints:');
+        for (const recommendation of result.adoption.recommendations) {
+            lines.push(`  - ${recommendation}`);
+        }
+    }
     if (result.delegation.errors.length > 0) {
         lines.push('Delegation scan warnings:');
         for (const warning of result.delegation.errors.slice(0, 3)) {
@@ -243,6 +270,29 @@ export function formatDoctorUsageSummary(result) {
     }
     return lines;
 }
+function buildAdoptionRecommendations(params) {
+    if (params.totalRuns <= 0) {
+        return [];
+    }
+    const hints = [];
+    const execShare = params.execRuns / params.totalRuns;
+    if (execShare >= 0.6) {
+        hints.push('Most runs are plain exec; prefer `codex-orchestrator start docs-review` or `start implementation-gate` for manifest-backed guardrails.');
+    }
+    if (params.gateRuns === 0) {
+        hints.push('No gate pipelines detected; use docs-review before implementation and implementation-gate before handoff.');
+    }
+    if (params.rlmRuns === 0) {
+        hints.push('No RLM runs detected; try `codex-orchestrator rlm --collab auto "<goal>"` for long-horizon or ambiguous tasks.');
+    }
+    if (params.cloudRuns === 0) {
+        hints.push('No cloud runs detected; configure CODEX_CLOUD_ENV_ID and run `codex-orchestrator start <pipeline> --cloud --target <stage-id>` for long-running stages.');
+    }
+    if (params.rlmRuns > 0 && params.collabRunsWithToolCalls === 0) {
+        hints.push('RLM is used without collab activity; ensure collab is enabled (`codex features enable collab`).');
+    }
+    return hints.slice(0, 3);
+}
 function extractRunIdFromManifestPath(manifestPath) {
     if (!manifestPath) {
         return null;

package/dist/orchestrator/src/cli/orchestrator.js CHANGED Viewed

@@ -9,6 +9,7 @@ import { resolveEnvironmentPaths } from '../../../scripts/lib/run-manifests.js';
 import { normalizeEnvironmentPaths } from './run/environment.js';
 import { bootstrapManifest, loadManifest, updateHeartbeat, finalizeStatus, appendSummary, ensureGuardrailStatus, resetForResume, recordResumeEvent } from './run/manifest.js';
 import { ManifestPersister, persistManifest } from './run/manifestPersister.js';
+import { resolveRuntimeActivitySnapshot } from './run/runtimeActivity.js';
 import { generateRunId } from './utils/runId.js';
 import { runCommandStage } from './services/commandRunner.js';
 import { appendMetricsEntry } from './metrics/metricsRecorder.js';
@@ -437,12 +438,13 @@ export class CodexOrchestrator {
     async status(options) {
         const env = this.baseEnv;
         const { manifest, paths } = await loadManifest(env, options.runId);
+        const activity = await resolveRuntimeActivitySnapshot(manifest, paths);
         if (options.format === 'json') {
-            const payload = this.buildStatusPayload(env, manifest, paths);
+            const payload = this.buildStatusPayload(env, manifest, paths, activity);
             process.stdout.write(`${JSON.stringify(payload, null, 2)}\n`);
             return manifest;
         }
-        this.renderStatus(manifest);
+        this.renderStatus(manifest, activity);
         return manifest;
     }
     async plan(options = {}) {
@@ -1121,7 +1123,7 @@ export class CodexOrchestrator {
             throw new Error('Resume token mismatch.');
         }
     }
-    buildStatusPayload(env, manifest, paths) {
+    buildStatusPayload(env, manifest, paths, activity) {
         return {
             run_id: manifest.run_id,
             status: manifest.status,
@@ -1132,17 +1134,24 @@ export class CodexOrchestrator {
             artifact_root: manifest.artifact_root,
             log_path: manifest.log_path,
             heartbeat_at: manifest.heartbeat_at,
+            activity,
             commands: manifest.commands,
             child_runs: manifest.child_runs,
             cloud_execution: manifest.cloud_execution ?? null
         };
     }
-    renderStatus(manifest) {
+    renderStatus(manifest, activity) {
         logger.info(`Run: ${manifest.run_id}`);
         logger.info(`Status: ${manifest.status}${manifest.status_detail ? ` (${manifest.status_detail})` : ''}`);
         logger.info(`Started: ${manifest.started_at}`);
         logger.info(`Completed: ${manifest.completed_at ?? 'in-progress'}`);
         logger.info(`Manifest: ${manifest.artifact_root}/manifest.json`);
+        if (activity.observed_at) {
+            const staleSuffix = activity.stale === null ? '' : activity.stale ? ' [stale]' : ' [active]';
+            const sourceLabel = activity.observed_source ? ` via ${activity.observed_source}` : '';
+            const ageLabel = activity.age_seconds === null ? '' : ` age=${activity.age_seconds}s`;
+            logger.info(`Activity: ${activity.observed_at}${sourceLabel}${ageLabel}${staleSuffix}`);
+        }
         if (manifest.cloud_execution?.task_id) {
             logger.info(`Cloud: ${manifest.cloud_execution.task_id} [${manifest.cloud_execution.status}]` +
                 (manifest.cloud_execution.status_url ? ` ${manifest.cloud_execution.status_url}` : ''));

package/dist/orchestrator/src/cli/run/runtimeActivity.js ADDED Viewed

@@ -0,0 +1,79 @@
+import { readFile, stat } from 'node:fs/promises';
+export async function resolveRuntimeActivitySnapshot(manifest, paths, options = {}) {
+    const manifestHeartbeat = normalizeTimestamp(manifest.heartbeat_at);
+    const heartbeatFileAt = await readHeartbeatTimestamp(paths.heartbeatPath);
+    const runnerLogMtime = await readMtimeIso(paths.logPath);
+    const candidates = [];
+    if (manifestHeartbeat) {
+        candidates.push({ source: 'manifest', ...manifestHeartbeat });
+    }
+    const heartbeatCandidate = normalizeTimestamp(heartbeatFileAt);
+    if (heartbeatCandidate) {
+        candidates.push({ source: 'heartbeat_file', ...heartbeatCandidate });
+    }
+    const logCandidate = normalizeTimestamp(runnerLogMtime);
+    if (logCandidate) {
+        candidates.push({ source: 'runner_log', ...logCandidate });
+    }
+    const latest = pickLatest(candidates);
+    const nowMs = Number.isFinite(options.nowMs) ? Number(options.nowMs) : Date.now();
+    const staleThresholdSeconds = Number.isFinite(manifest.heartbeat_stale_after_seconds) && manifest.heartbeat_stale_after_seconds > 0
+        ? Math.floor(manifest.heartbeat_stale_after_seconds)
+        : null;
+    let stale = null;
+    let ageSeconds = null;
+    if (manifest.status === 'in_progress' && latest && staleThresholdSeconds !== null) {
+        ageSeconds = Math.max(0, Math.floor((nowMs - latest.ms) / 1000));
+        stale = ageSeconds > staleThresholdSeconds;
+    }
+    return {
+        manifest_heartbeat_at: manifestHeartbeat?.iso ?? null,
+        heartbeat_file_at: heartbeatCandidate?.iso ?? null,
+        runner_log_mtime_at: logCandidate?.iso ?? null,
+        observed_at: latest?.iso ?? null,
+        observed_source: latest?.source ?? null,
+        stale,
+        stale_threshold_seconds: staleThresholdSeconds,
+        age_seconds: ageSeconds
+    };
+}
+async function readHeartbeatTimestamp(heartbeatPath) {
+    try {
+        const raw = await readFile(heartbeatPath, 'utf8');
+        const trimmed = raw.trim();
+        return trimmed.length > 0 ? trimmed : null;
+    }
+    catch {
+        return null;
+    }
+}
+async function readMtimeIso(filePath) {
+    try {
+        const fileStat = await stat(filePath);
+        return fileStat.mtime.toISOString();
+    }
+    catch {
+        return null;
+    }
+}
+function normalizeTimestamp(value) {
+    if (typeof value !== 'string') {
+        return null;
+    }
+    const trimmed = value.trim();
+    if (!trimmed) {
+        return null;
+    }
+    const ms = Date.parse(trimmed);
+    if (!Number.isFinite(ms)) {
+        return null;
+    }
+    return { iso: new Date(ms).toISOString(), ms };
+}
+function pickLatest(candidates) {
+    if (candidates.length === 0) {
+        return null;
+    }
+    candidates.sort((a, b) => b.ms - a.ms);
+    return candidates[0] ?? null;
+}

package/dist/orchestrator/src/cli/services/commandRunner.js CHANGED Viewed

@@ -171,7 +171,6 @@ export async function runCommandStage(context, hooks = {}) {
         try {
             result = await runner.run({
                 command: stage.command,
-                args: [],
                 cwd: stage.cwd ?? env.repoRoot,
                 env: execEnv,
                 sessionId: sessionId ?? undefined,

package/dist/orchestrator/src/cli/services/execRuntime.js CHANGED Viewed

@@ -19,10 +19,13 @@ const sessionManager = new ExecSessionManager({
 const privacyGuard = new PrivacyGuard({ mode: resolvePrivacyGuardMode() });
 const handleService = new RemoteExecHandleService({ guard: privacyGuard, now: () => new Date() });
 const cliExecutor = async (request) => {
+    const hasExplicitArgs = Array.isArray(request.args);
     const child = spawn(request.command, request.args ?? [], {
         cwd: request.cwd,
         env: request.env,
-        shell: true,
+        // Use shell mode only for string-style commands. When args are provided we
+        // want argv semantics (`cmd arg1 arg2`) rather than `sh -c cmd` behavior.
+        shell: !hasExplicitArgs,
         stdio: ['ignore', 'pipe', 'pipe']
     });
     if (!child.stdout || !child.stderr) {

package/dist/packages/orchestrator/src/exec/unified-exec.js CHANGED Viewed

@@ -27,7 +27,8 @@ export class UnifiedExecRunner {
         };
     }
     async run(options) {
-        const args = options.args ?? [];
+        const args = options.args;
+        const resolvedArgs = args ?? [];
         const invocationId = options.invocationId ?? this.idGenerator();
         const correlationId = this.idGenerator();
         const issuedHandle = this.handleService ? this.handleService.issueHandle(correlationId) : undefined;
@@ -49,7 +50,7 @@ export class UnifiedExecRunner {
         const metadata = {
             ...options.metadata,
             command: options.command,
-            args,
+            args: resolvedArgs,
             cwd: options.cwd,
             sessionId: lease.id,
             correlationId,
@@ -81,7 +82,7 @@ export class UnifiedExecRunner {
                         attempt,
                         correlationId,
                         command: options.command,
-                        args,
+                        args: resolvedArgs,
                         cwd: options.cwd,
                         sandboxState,
                         sessionId: lease.id,
@@ -403,7 +404,7 @@ function getErrorMessage(error) {
     return String(error);
 }
 const defaultExecutor = async (request) => {
-    const child = spawn(request.command, request.args, {
+    const child = spawn(request.command, request.args ?? [], {
         cwd: request.cwd,
         env: request.env,
         stdio: ['ignore', 'pipe', 'pipe']

package/docs/README.md CHANGED Viewed

@@ -155,6 +155,7 @@ Notes:
 - `/prompts:review-handoff` takes `TASK=<task-id> MANIFEST=<path> NOTES=<goal + summary + risks + optional questions>`, re-exports `MCP_RUNNER_TASK_ID`, and (repo-only) runs `node scripts/delegation-guard.mjs`, `node scripts/spec-guard.mjs --dry-run`, `npm run lint`, `npm run test`, optional `npm run eval:test`, plus `npm run review` (wraps `codex review` against the current diff and includes the latest run manifest path as evidence). It also reminds you to log approvals in `$MANIFEST` and mirror the evidence to the same docs/metrics/state targets.
 - In CI / `--no-interactive` pipelines (or when stdin is not a TTY, or `CODEX_REVIEW_NON_INTERACTIVE=1` / `CODEX_NON_INTERACTIVE=1` / `CODEX_NO_INTERACTIVE=1`), `npm run review` prints the review handoff prompt (including evidence paths) and exits successfully instead of invoking `codex review`. Set `FORCE_CODEX_REVIEW=1` to run `codex review` in those environments.
 - When forcing non-interactive review execution, `npm run review` enforces a timeout (`CODEX_REVIEW_TIMEOUT_SECONDS`, default `900`). Set `CODEX_REVIEW_TIMEOUT_SECONDS=0` to disable the timeout.
+- Forced non-interactive review execution also enforces a no-output stall timeout (`CODEX_REVIEW_STALL_TIMEOUT_SECONDS`, default `600`). Set `CODEX_REVIEW_STALL_TIMEOUT_SECONDS=0` to disable the stall guard.
 - Always trigger diagnostics and review workflows through these prompts whenever you run the orchestrator so contributors consistently execute the required command sequences and capture auditable manifests.
 ### Identifier Guardrails

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kbediako/codex-orchestrator",
-  "version": "0.1.29",
+  "version": "0.1.31",
   "license": "MIT",
   "repository": {
     "type": "git",

package/skills/collab-subagents-first/SKILL.md CHANGED Viewed

@@ -52,6 +52,19 @@ Skip subagents when all conditions are true:
 - Include objective, scope, constraints, acceptance criteria, and expected output format.
 - Require concise summaries and evidence paths; avoid long logs in chat.
+4a) Declare write policy and track ownership against git status
+- Capture a baseline before spawning: `git status --porcelain`.
+- Declare each stream as either:
+  - `read-only` (research/scout/review), or
+  - `write-enabled` (implementation/tests).
+- For `read-only` streams, include an explicit "no file edits" constraint.
+- After each `wait`, compare status against baseline and map changed files to stream ownership.
+- Treat in-scope edits from active write-enabled streams as expected delegated output.
+- Escalate only for out-of-scope changes, overlapping ownership collisions, or edits appearing without an active stream owner.
+- If the agent surfaces a generic "unexpected local edits" pause prompt, treat it as a classification step: keep and continue when edits are in-scope; escalate only violations.
+- Prefer the built-in helper when available (`node scripts/subagent-edit-guard.mjs ...`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is not present in the current repo, use the same baseline/scope logic manually.
+- If `finish` exits non-zero, escalate only the reported `out_of_scope_paths` / `violations`.
 5) Run streams in parallel when independent
 - Spawn multiple subagents for independent streams.
 - Wait for all subagents to finish before final synthesis.
@@ -159,6 +172,7 @@ Do not treat wrapper handoff-only output as a completed review.
 - Do not skip delegation solely because there is only one implementation stream; single-stream delegation is valid for context offload.
 - Do not rely on human-readable agent names in TUI labels for control flow; use stream ownership and evidence paths as source of truth.
 - Do not end the parent work with unclosed collab agent ids.
+- Do not treat every delegated edit as "unexpected"; first verify whether the edit belongs to an active stream owner.
 ## Completion checklist

package/skills/collab-subagents-first/references/subagent-brief-template.md CHANGED Viewed

@@ -30,6 +30,7 @@ Out of scope:
 Ownership:
 - Files/paths you may edit: <paths>
 - Files/paths you must not edit: <paths>
+- Write policy: read-only | write-enabled
 Acceptance criteria:
 - <bullet 1>
@@ -59,6 +60,7 @@ Keep the response concise. Put detailed notes in a file and return the path.
 - Include enough context so the subagent can act without back-and-forth.
 - Include explicit file ownership boundaries.
+- Include explicit write policy (`read-only` or `write-enabled`).
 - Include a concrete output format and validation expectations.
 - Include at least one "do not do" constraint to prevent drift.
 - If task is review-only, explicitly prohibit implementation edits.
@@ -87,4 +89,3 @@ Objective: validate <existing change>.
 Deliverable: failing/passing checks, defect list by severity, and minimal fix suggestions.
 No broad refactors.
 ```

package/skills/delegation-usage/SKILL.md CHANGED Viewed

@@ -23,6 +23,12 @@ Collab multi-agent mode is separate from delegation. For symbolic RLM subcalls t
 - **Lifecycle is mandatory:** for every successful `spawn_agent`, run `wait` and then `close_agent` for that same id before task completion.
 - Keep a local list of spawned ids and run a final cleanup pass so no agent id is left unclosed on timeout/error paths.
 - If spawn fails with `agent thread limit reached`, stop spawning, close any known ids first, then surface a concise recovery note.
+- In a shared checkout, spawned subagents may produce file edits. Treat edits inside that stream's declared ownership as expected delegated output, not external interference.
+- Before spawning, capture a baseline (`git status --porcelain`). After `wait`, diff against baseline and classify file changes by stream ownership.
+- Escalate "unexpected local edits" only when changed files are outside all active stream scopes (or when no subagent was active).
+- If a generic safety prompt appears after delegation (for example "unexpected local edits"), run scope classification first; when edits are in-scope, keep them and continue without user escalation.
+- For scout/research streams, set an explicit no-write constraint and verify the post-run status matches baseline.
+- Prefer `scripts/subagent-edit-guard.mjs` for low-friction enforcement when the helper exists in the repo (`start` before spawn, `finish` after `wait`); canonical command examples live in `docs/delegation-runner-workflow.md` (section `3a`). If the helper is absent, apply the same baseline/scope checks manually.
 ## Quick-start workflow (canned)
@@ -186,3 +192,4 @@ repeat:
 - **Collab payload mismatch:** `spawn_agent` rejects calls that include both `message` and `items`.
 - **Collab UI assumptions:** agent rows/records are id-based today; use explicit stream role text in prompts/artifacts for operator clarity.
 - **Collab lifecycle leaks:** missing `close_agent` calls accumulate open threads and can trigger `agent thread limit reached`; always finish `spawn -> wait -> close_agent` per id.
+- **False "unexpected edits" stops:** when a live subagent owns the touched files, treat those edits as expected output and continue with scope-aware review.

package/skills/elegance-review/SKILL.md ADDED Viewed

@@ -0,0 +1,62 @@
+---
+name: elegance-review
+description: Run an explicit post-implementation elegance/minimality pass to keep the smallest correct solution and remove avoidable complexity before handoff.
+---
+# Elegance Review
+## Overview
+Use this skill after non-trivial edits to verify the implementation is minimal, coherent, and easy to maintain. This is a simplification pass, not a feature-expansion pass.
+## Auto-trigger policy (required)
+Run this skill whenever any condition is true:
+- You changed behavior across about 2+ files.
+- You added a new helper/module/pathway and could possibly collapse it.
+- You finished addressing review feedback and are preparing to hand off.
+- You are about to recommend merge/release.
+- The user explicitly asks for elegance/minimality/overengineering checks.
+## Quick start
+Focused uncommitted review:
+```bash
+codex review --uncommitted "Find avoidable complexity, duplicate abstractions, and unnecessary indirection. Prioritize simplifications that preserve behavior."
+```
+Diff-vs-base review:
+```bash
+codex review --base <branch> "Focus on smallest viable design and maintenance cost."
+```
+## Workflow
+1) Lock invariants first
+- State what behavior cannot change.
+- Keep tests/acceptance criteria as the guardrail.
+2) Identify complexity hotspots
+- Unused abstractions, wrappers, or config layers.
+- Duplicate logic that can be consolidated safely.
+- Over-generalized interfaces used in one place only.
+- Extra branching/state that can be simplified.
+3) Simplify in smallest safe steps
+- Prefer deleting code over adding knobs.
+- Collapse one-off abstractions into local logic when clearer.
+- Keep naming and control flow direct.
+4) Re-validate
+- Run targeted tests/lint for touched areas.
+- Confirm no behavior regressions.
+5) Record result
+- Report what was simplified.
+- Report residual complexity that is intentionally kept and why.
+## Guardrails
+- Do not broaden scope into unrelated refactors.
+- Do not trade readability for cleverness.
+- If `codex review` is unavailable, run a manual checklist using the same criteria and note that fallback.

package/skills/standalone-review/SKILL.md CHANGED Viewed

@@ -64,6 +64,7 @@ codex review "Focus on correctness, regressions, edge cases; list missing tests.
 - If you need manifest evidence, use the review wrapper:
   `TASK=<task-id> NOTES="Goal: ... | Summary: ... | Risks: ... | Questions (optional): ..." MANIFEST=<path> npm run review -- --manifest <path>`
 - In non-interactive environments, add `FORCE_CODEX_REVIEW=1` as needed.
+- In non-interactive environments, prefer the wrapper over raw `codex review`; it enforces `CODEX_REVIEW_TIMEOUT_SECONDS` and `CODEX_REVIEW_STALL_TIMEOUT_SECONDS` guardrails.
 ## Expected outputs
 - A prioritized list of findings.