npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.17 → 0.1.18 - Mend

@kbediako/codex-orchestrator 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/orchestrator/src/cli/rlm/symbolic.js +79 -24
package/dist/orchestrator/src/cli/rlmRunner.js +13 -11
package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +9 -2
package/package.json +1 -1
package/skills/delegation-usage/DELEGATION_GUIDE.md +11 -0
package/skills/delegation-usage/SKILL.md +4 -0
package/skills/docs-first/SKILL.md +1 -0
package/skills/standalone-review/SKILL.md +13 -1

package/dist/orchestrator/src/cli/rlm/symbolic.js CHANGED Viewed

@@ -613,6 +613,33 @@ function formatDeliberationReason(reason) {
             return 'cadence';
     }
 }
+function attachDeliberationArtifactPaths(error, artifactPaths) {
+    const normalized = error instanceof Error ? error : new Error(String(error));
+    if (artifactPaths) {
+        normalized.artifactPaths = artifactPaths;
+    }
+    return normalized;
+}
+function extractDeliberationArtifactPaths(error) {
+    if (!error || typeof error !== 'object') {
+        return undefined;
+    }
+    const rawPaths = error.artifactPaths;
+    if (!rawPaths || typeof rawPaths !== 'object') {
+        return undefined;
+    }
+    const typed = rawPaths;
+    if (typeof typed.prompt !== 'string' ||
+        typeof typed.output !== 'string' ||
+        typeof typed.meta !== 'string') {
+        return undefined;
+    }
+    return {
+        prompt: typed.prompt,
+        output: typed.output,
+        meta: typed.meta
+    };
+}
 function selectDeliberationReason(params) {
     if (params.iteration === 1) {
         return 'bootstrap';
@@ -678,27 +705,58 @@ async function runDeliberationStep(params) {
         maxSummaryBytes: params.options.maxSummaryBytes
     });
     const promptBytes = byteLength(prompt);
-    const deliberationDir = join(params.runDir, 'deliberation');
-    await mkdir(deliberationDir, { recursive: true });
-    const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
-    const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
-    const outputPath = join(deliberationDir, `${baseName}-output.txt`);
-    const metaPath = join(deliberationDir, `${baseName}-meta.json`);
-    await writeFile(promptPath, prompt, 'utf8');
-    const output = await params.options.run(prompt, {
-        iteration: params.iteration,
-        reason: formatDeliberationReason(params.reason)
-    });
+    const shouldLogArtifacts = params.options.logArtifacts === true;
+    let artifactPaths;
+    let outputPath = null;
+    let metaPath = null;
+    if (shouldLogArtifacts) {
+        const deliberationDir = join(params.runDir, 'deliberation');
+        await mkdir(deliberationDir, { recursive: true });
+        const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
+        const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
+        outputPath = join(deliberationDir, `${baseName}-output.txt`);
+        metaPath = join(deliberationDir, `${baseName}-meta.json`);
+        await writeFile(promptPath, prompt, 'utf8');
+        artifactPaths = {
+            prompt: relative(params.repoRoot, promptPath),
+            output: relative(params.repoRoot, outputPath),
+            meta: relative(params.repoRoot, metaPath)
+        };
+    }
+    let output;
+    try {
+        output = await params.options.run(prompt, {
+            iteration: params.iteration,
+            reason: formatDeliberationReason(params.reason)
+        });
+    }
+    catch (error) {
+        if (shouldLogArtifacts && outputPath && metaPath) {
+            const errorMessage = error instanceof Error ? error.message : String(error);
+            await writeFile(outputPath, '', 'utf8');
+            await writeFile(metaPath, JSON.stringify({
+                iteration: params.iteration,
+                reason: formatDeliberationReason(params.reason),
+                strategy: params.options.strategy,
+                prompt_bytes: promptBytes,
+                output_bytes: 0,
+                error: errorMessage
+            }, null, 2), 'utf8');
+        }
+        throw attachDeliberationArtifactPaths(error, artifactPaths);
+    }
     const brief = truncateUtf8ToBytes(output ?? '', params.options.maxSummaryBytes);
     const outputBytes = byteLength(brief);
-    await writeFile(outputPath, brief, 'utf8');
-    await writeFile(metaPath, JSON.stringify({
-        iteration: params.iteration,
-        reason: formatDeliberationReason(params.reason),
-        strategy: params.options.strategy,
-        prompt_bytes: promptBytes,
-        output_bytes: outputBytes
-    }, null, 2), 'utf8');
+    if (shouldLogArtifacts && outputPath && metaPath) {
+        await writeFile(outputPath, brief, 'utf8');
+        await writeFile(metaPath, JSON.stringify({
+            iteration: params.iteration,
+            reason: formatDeliberationReason(params.reason),
+            strategy: params.options.strategy,
+            prompt_bytes: promptBytes,
+            output_bytes: outputBytes
+        }, null, 2), 'utf8');
+    }
     return {
         record: {
             status: 'ran',
@@ -706,11 +764,7 @@ async function runDeliberationStep(params) {
             strategy: params.options.strategy,
             prompt_bytes: promptBytes,
             output_bytes: outputBytes,
-            artifact_paths: {
-                prompt: relative(params.repoRoot, promptPath),
-                output: relative(params.repoRoot, outputPath),
-                meta: relative(params.repoRoot, metaPath)
-            }
+            artifact_paths: artifactPaths
         },
         brief
     };
@@ -814,6 +868,7 @@ export async function runSymbolicLoop(options) {
                             status: 'error',
                             reason: formatDeliberationReason(reason),
                             strategy: deliberationOptions.strategy,
+                            artifact_paths: extractDeliberationArtifactPaths(error),
                             error: error instanceof Error ? error.message : String(error)
                         };
                         log(`Deliberation ${formatDeliberationReason(reason)} failed for iteration ${iteration}: ${deliberation.error}`);

package/dist/orchestrator/src/cli/rlmRunner.js CHANGED Viewed

@@ -235,11 +235,11 @@ async function resolveContextSource(env, fallbackText) {
 async function promptForValidator(candidates) {
     const rl = createInterface({ input: process.stdin, output: process.stdout });
     try {
-        console.log('Validator auto-detect found multiple candidates:');
+        logger.info('Validator auto-detect found multiple candidates:');
         candidates.forEach((candidate, index) => {
-            console.log(`  ${index + 1}) ${candidate.command} (${candidate.reason})`);
+            logger.info(`  ${index + 1}) ${candidate.command} (${candidate.reason})`);
         });
-        console.log('  n) none');
+        logger.info('  n) none');
         const answer = (await rl.question('Select validator [1-n or n for none]: ')).trim().toLowerCase();
         if (!answer || answer === 'n' || answer === 'none') {
             return null;
@@ -576,7 +576,7 @@ async function main() {
         state.final = { status, exitCode };
         await writeTerminalState(runDir, state);
         if (message) {
-            console.error(message);
+            logger.error(message);
         }
         process.exitCode = exitCode;
     };
@@ -725,7 +725,7 @@ async function main() {
             const detection = await detectValidator(repoRoot);
             if (detection.status === 'selected' && detection.command) {
                 validatorCommand = detection.command;
-                console.log(`Validator: ${detection.command} (${detection.reason ?? 'auto-detect'})`);
+                logger.info(`Validator: ${detection.command} (${detection.reason ?? 'auto-detect'})`);
             }
             else if (detection.status === 'ambiguous') {
                 if (isInteractive) {
@@ -743,7 +743,7 @@ async function main() {
                         mode,
                         context: contextInfo
                     });
-                    console.error(candidates);
+                    logger.error(candidates);
                     return;
                 }
             }
@@ -766,10 +766,10 @@ async function main() {
         }
     }
     if (validatorCommand === null) {
-        console.log('Validator: none');
+        logger.info('Validator: none');
     }
     else {
-        console.log(`Validator: ${validatorCommand}`);
+        logger.info(`Validator: ${validatorCommand}`);
     }
     const subagentsEnabled = envFlagEnabled(env.CODEX_SUBAGENTS) || envFlagEnabled(env.RLM_SUBAGENTS);
     const symbolicCollabEnabled = envFlagEnabled(env.RLM_SYMBOLIC_COLLAB);
@@ -779,6 +779,7 @@ async function main() {
     const symbolicDeliberationIncludeInPlanner = env.RLM_SYMBOLIC_DELIBERATION_INCLUDE_IN_PLANNER === undefined
         ? true
         : envFlagEnabled(env.RLM_SYMBOLIC_DELIBERATION_INCLUDE_IN_PLANNER);
+    const symbolicDeliberationLogArtifacts = envFlagEnabled(env.RLM_SYMBOLIC_DELIBERATION_LOG);
     const nonInteractive = shouldForceNonInteractive(env);
     if (mode === 'symbolic') {
         const budgets = {
@@ -894,6 +895,7 @@ async function main() {
                 maxRuns: deliberationMaxRuns,
                 maxSummaryBytes: deliberationMaxSummaryBytes,
                 includeInPlannerPrompt: symbolicDeliberationIncludeInPlanner,
+                logArtifacts: symbolicDeliberationLogArtifacts,
                 run: (prompt, _meta) => {
                     void _meta;
                     if (!symbolicCollabEnabled) {
@@ -914,7 +916,7 @@ async function main() {
         });
         const finalStatus = result.state.final?.status ?? 'unknown';
         const iterationCount = result.state.symbolic_iterations.length;
-        console.log(`RLM completed: status=${finalStatus} symbolic_iterations=${iterationCount} exit=${result.exitCode}`);
+        logger.info(`RLM completed: status=${finalStatus} symbolic_iterations=${iterationCount} exit=${result.exitCode}`);
         process.exitCode = result.exitCode;
         return;
     }
@@ -935,11 +937,11 @@ async function main() {
     });
     const finalStatus = result.state.final?.status ?? 'unknown';
     const iterationCount = result.state.iterations.length;
-    console.log(`RLM completed: status=${finalStatus} iterations=${iterationCount} exit=${result.exitCode}`);
+    logger.info(`RLM completed: status=${finalStatus} iterations=${iterationCount} exit=${result.exitCode}`);
     const hasTimeCap = resolvedMaxMinutes !== null && resolvedMaxMinutes > 0;
     const unboundedBudgetInvalid = validatorCommand === null && maxIterations === 0 && !hasTimeCap;
     if (finalStatus === 'invalid_config' && unboundedBudgetInvalid) {
-        console.error('Invalid configuration: --validator none with unbounded iterations and --max-minutes 0 would run forever. Fix: set --max-minutes / RLM_MAX_MINUTES to a positive value (default 2880), set --max-iterations to a positive value, or provide a validator.');
+        logger.error('Invalid configuration: --validator none with unbounded iterations and --max-minutes 0 would run forever. Fix: set --max-minutes / RLM_MAX_MINUTES to a positive value (default 2880), set --max-iterations to a positive value, or provide a validator.');
     }
     process.exitCode = result.exitCode;
 }

package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js CHANGED Viewed

@@ -5,7 +5,7 @@ import { setTimeout as sleep } from 'node:timers/promises';
 import { isoTimestamp } from '../cli/utils/time.js';
 const TASK_ID_PATTERN = /\btask_[a-z]_[a-f0-9]+\b/i;
 const MAX_LOG_CHARS = 32 * 1024;
-const STATUS_RETRY_LIMIT = 3;
+const STATUS_RETRY_LIMIT = 12;
 const STATUS_RETRY_BACKOFF_MS = 1500;
 const DEFAULT_LIST_LIMIT = 20;
 export function extractCloudTaskId(text) {
@@ -129,6 +129,8 @@ export class CodexCloudTaskExecutor {
             }
             const timeoutAt = Date.now() + cloudExecution.timeout_seconds * 1000;
             let statusRetries = 0;
+            let lastKnownStatus = cloudExecution.status;
+            let loggedNonZeroStatus = false;
             while (Date.now() < timeoutAt) {
                 const statusResult = await runCloudCommand(['cloud', 'status', taskId]);
                 cloudExecution.last_polled_at = this.now();
@@ -145,9 +147,14 @@ export class CodexCloudTaskExecutor {
                     await this.sleepFn(STATUS_RETRY_BACKOFF_MS * statusRetries);
                     continue;
                 }
+                if (statusResult.exitCode !== 0 && mapped !== 'unknown' && !loggedNonZeroStatus) {
+                    notes.push(`Cloud status returned exit ${statusResult.exitCode} with remote status ${mapped}; continuing to poll.`);
+                    loggedNonZeroStatus = true;
+                }
                 statusRetries = 0;
                 if (mapped !== 'unknown') {
                     cloudExecution.status = mapped;
+                    lastKnownStatus = mapped;
                 }
                 if (mapped === 'ready') {
                     notes.push(`Cloud task completed: ${taskId}`);
@@ -161,7 +168,7 @@ export class CodexCloudTaskExecutor {
             }
             if (cloudExecution.status === 'running' || cloudExecution.status === 'queued') {
                 cloudExecution.status = 'failed';
-                cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s.`;
+                cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s (last remote status: ${lastKnownStatus}, polls: ${cloudExecution.poll_count}).`;
             }
             if (cloudExecution.status === 'ready') {
                 const diffResult = await runCloudCommand(['cloud', 'diff', taskId]);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@kbediako/codex-orchestrator",
-  "version": "0.1.17",
+  "version": "0.1.18",
   "license": "MIT",
   "repository": {
     "type": "git",

package/skills/delegation-usage/DELEGATION_GUIDE.md CHANGED Viewed

@@ -81,6 +81,16 @@ delegate.spawn({
 })
 ```
+## Collab lifecycle hygiene (required)
+When using collab tools (`spawn_agent` / `wait` / `close_agent`):
+- Treat each spawned `agent_id` as a resource that must be closed.
+- For every successful spawn, run `wait` then `close_agent` for the same id.
+- Keep a local list of spawned ids and run a final cleanup pass before returning.
+- On timeout/error paths, still close known ids before reporting failure.
+- If you see `agent thread limit reached`, stop spawning immediately, close known ids, and retry only after cleanup.
 ## RLM budget overrides (recommended defaults)
 If you want deeper recursion or longer wall-clock time for delegated runs, set RLM budgets on the delegation server:
@@ -123,3 +133,4 @@ Delegation MCP expects JSONL. Keep `codex-orchestrator` aligned with the current
 - **Run identifiers**: status/pause/cancel require `manifest_path`; question queue requires `parent_manifest_path`.
 - **Collab payload mismatch**: `spawn_agent` calls fail if they include both `message` and `items`.
 - **Collab depth limits**: recursive collab fan-out can fail near max depth; prefer shallow parent fan-out.
+- **Collab lifecycle leaks**: missing `close_agent` calls can exhaust thread slots and block future spawns (`agent thread limit reached`).

package/skills/delegation-usage/SKILL.md CHANGED Viewed

@@ -20,6 +20,9 @@ Collab multi-agent mode is separate from delegation. For symbolic RLM subcalls t
 - Spawn returns an `agent_id` (thread id). Current TUI collab rendering is id-based; do not depend on custom visible agent names.
 - Subagents spawned through collab run with approval effectively set to `never`; design child tasks to avoid approval/escalation requirements.
 - Collab spawn depth is bounded. Near/at max depth, recursive delegation can fail or collab can be disabled in children; prefer shallow parent fan-out.
+- **Lifecycle is mandatory:** for every successful `spawn_agent`, run `wait` and then `close_agent` for that same id before task completion.
+- Keep a local list of spawned ids and run a final cleanup pass so no agent id is left unclosed on timeout/error paths.
+- If spawn fails with `agent thread limit reached`, stop spawning, close any known ids first, then surface a concise recovery note.
 ## Quick-start workflow (canned)
@@ -174,3 +177,4 @@ repeat:
 - **Missing control files:** delegate tools rely on `control_endpoint.json` in the run directory; older runs may not have it.
 - **Collab payload mismatch:** `spawn_agent` rejects calls that include both `message` and `items`.
 - **Collab UI assumptions:** agent rows/records are id-based today; use explicit stream role text in prompts/artifacts for operator clarity.
+- **Collab lifecycle leaks:** missing `close_agent` calls accumulate open threads and can trigger `agent thread limit reached`; always finish `spawn -> wait -> close_agent` per id.

package/skills/docs-first/SKILL.md CHANGED Viewed

@@ -16,6 +16,7 @@ Use this skill when a task needs a spec-driven workflow. The objective is to cre
 - TECH_SPEC: capture technical requirements (use `.agent/task/templates/tech-spec-template.md`; stored under `tasks/specs/<id>-<slug>.md`).
 - ACTION_PLAN: capture sequencing/milestones (use `.agent/task/templates/action-plan-template.md`).
 - Depth scales with scope, but all three docs are required.
+- For low-risk tiny edits, follow the bounded shortcut in `docs/micro-task-path.md` instead of long-form rewrites (still requires task/spec evidence).
 2) Register the TECH_SPEC and task
 - Add the TECH_SPEC to `tasks/index.json` (including `last_review`).

package/skills/standalone-review/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: standalone-review
-description: Use for ad-hoc/standalone reviews outside pipelines (fast checks during implementation or before handoff) using `codex review`.
+description: Use for required periodic cross-check reviews during implementation and before handoff using `codex review`.
 ---
 # Standalone Review
@@ -10,6 +10,17 @@ description: Use for ad-hoc/standalone reviews outside pipelines (fast checks du
 Use this skill when you need a fast, ad-hoc review without running a pipeline or collecting a manifest. It is ideal during implementation or for quick pre-flight checks.
 Before implementation, use it to review the task/spec against the user’s intent and record the approval in the PRD/TECH_SPEC or task notes.
+## Auto-trigger policy (required)
+Run this skill automatically whenever any condition is true:
+- You made code/config/script/test edits since the last standalone review.
+- You finished a meaningful chunk of work (default: behavior change or about 2+ files touched).
+- You are about to report completion, propose merge, or answer "what's next?" with recommendations.
+- You addressed external feedback (PR reviews, bot comments, or CI-fix patches).
+- 45 minutes of active implementation elapsed without a standalone review.
+If review execution is blocked, record why in task notes, then do manual diff review plus targeted tests before proceeding.
 ## Quick start
 Uncommitted diff:
@@ -39,6 +50,7 @@ codex review "Focus on correctness, regressions, edge cases; list missing tests.
 - Keep prompts short, specific, and test-oriented.
 2) Run the review often
+- Follow the auto-trigger policy above (not optional).
 - Run after each meaningful chunk of work.
 - Prefer targeted focus prompts for WIP reviews.