@kbediako/codex-orchestrator 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -613,6 +613,33 @@ function formatDeliberationReason(reason) {
613
613
  return 'cadence';
614
614
  }
615
615
  }
616
+ function attachDeliberationArtifactPaths(error, artifactPaths) {
617
+ const normalized = error instanceof Error ? error : new Error(String(error));
618
+ if (artifactPaths) {
619
+ normalized.artifactPaths = artifactPaths;
620
+ }
621
+ return normalized;
622
+ }
623
+ function extractDeliberationArtifactPaths(error) {
624
+ if (!error || typeof error !== 'object') {
625
+ return undefined;
626
+ }
627
+ const rawPaths = error.artifactPaths;
628
+ if (!rawPaths || typeof rawPaths !== 'object') {
629
+ return undefined;
630
+ }
631
+ const typed = rawPaths;
632
+ if (typeof typed.prompt !== 'string' ||
633
+ typeof typed.output !== 'string' ||
634
+ typeof typed.meta !== 'string') {
635
+ return undefined;
636
+ }
637
+ return {
638
+ prompt: typed.prompt,
639
+ output: typed.output,
640
+ meta: typed.meta
641
+ };
642
+ }
616
643
  function selectDeliberationReason(params) {
617
644
  if (params.iteration === 1) {
618
645
  return 'bootstrap';
@@ -678,27 +705,58 @@ async function runDeliberationStep(params) {
678
705
  maxSummaryBytes: params.options.maxSummaryBytes
679
706
  });
680
707
  const promptBytes = byteLength(prompt);
681
- const deliberationDir = join(params.runDir, 'deliberation');
682
- await mkdir(deliberationDir, { recursive: true });
683
- const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
684
- const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
685
- const outputPath = join(deliberationDir, `${baseName}-output.txt`);
686
- const metaPath = join(deliberationDir, `${baseName}-meta.json`);
687
- await writeFile(promptPath, prompt, 'utf8');
688
- const output = await params.options.run(prompt, {
689
- iteration: params.iteration,
690
- reason: formatDeliberationReason(params.reason)
691
- });
708
+ const shouldLogArtifacts = params.options.logArtifacts === true;
709
+ let artifactPaths;
710
+ let outputPath = null;
711
+ let metaPath = null;
712
+ if (shouldLogArtifacts) {
713
+ const deliberationDir = join(params.runDir, 'deliberation');
714
+ await mkdir(deliberationDir, { recursive: true });
715
+ const baseName = `iteration-${String(params.iteration).padStart(4, '0')}`;
716
+ const promptPath = join(deliberationDir, `${baseName}-prompt.txt`);
717
+ outputPath = join(deliberationDir, `${baseName}-output.txt`);
718
+ metaPath = join(deliberationDir, `${baseName}-meta.json`);
719
+ await writeFile(promptPath, prompt, 'utf8');
720
+ artifactPaths = {
721
+ prompt: relative(params.repoRoot, promptPath),
722
+ output: relative(params.repoRoot, outputPath),
723
+ meta: relative(params.repoRoot, metaPath)
724
+ };
725
+ }
726
+ let output;
727
+ try {
728
+ output = await params.options.run(prompt, {
729
+ iteration: params.iteration,
730
+ reason: formatDeliberationReason(params.reason)
731
+ });
732
+ }
733
+ catch (error) {
734
+ if (shouldLogArtifacts && outputPath && metaPath) {
735
+ const errorMessage = error instanceof Error ? error.message : String(error);
736
+ await writeFile(outputPath, '', 'utf8');
737
+ await writeFile(metaPath, JSON.stringify({
738
+ iteration: params.iteration,
739
+ reason: formatDeliberationReason(params.reason),
740
+ strategy: params.options.strategy,
741
+ prompt_bytes: promptBytes,
742
+ output_bytes: 0,
743
+ error: errorMessage
744
+ }, null, 2), 'utf8');
745
+ }
746
+ throw attachDeliberationArtifactPaths(error, artifactPaths);
747
+ }
692
748
  const brief = truncateUtf8ToBytes(output ?? '', params.options.maxSummaryBytes);
693
749
  const outputBytes = byteLength(brief);
694
- await writeFile(outputPath, brief, 'utf8');
695
- await writeFile(metaPath, JSON.stringify({
696
- iteration: params.iteration,
697
- reason: formatDeliberationReason(params.reason),
698
- strategy: params.options.strategy,
699
- prompt_bytes: promptBytes,
700
- output_bytes: outputBytes
701
- }, null, 2), 'utf8');
750
+ if (shouldLogArtifacts && outputPath && metaPath) {
751
+ await writeFile(outputPath, brief, 'utf8');
752
+ await writeFile(metaPath, JSON.stringify({
753
+ iteration: params.iteration,
754
+ reason: formatDeliberationReason(params.reason),
755
+ strategy: params.options.strategy,
756
+ prompt_bytes: promptBytes,
757
+ output_bytes: outputBytes
758
+ }, null, 2), 'utf8');
759
+ }
702
760
  return {
703
761
  record: {
704
762
  status: 'ran',
@@ -706,11 +764,7 @@ async function runDeliberationStep(params) {
706
764
  strategy: params.options.strategy,
707
765
  prompt_bytes: promptBytes,
708
766
  output_bytes: outputBytes,
709
- artifact_paths: {
710
- prompt: relative(params.repoRoot, promptPath),
711
- output: relative(params.repoRoot, outputPath),
712
- meta: relative(params.repoRoot, metaPath)
713
- }
767
+ artifact_paths: artifactPaths
714
768
  },
715
769
  brief
716
770
  };
@@ -814,6 +868,7 @@ export async function runSymbolicLoop(options) {
814
868
  status: 'error',
815
869
  reason: formatDeliberationReason(reason),
816
870
  strategy: deliberationOptions.strategy,
871
+ artifact_paths: extractDeliberationArtifactPaths(error),
817
872
  error: error instanceof Error ? error.message : String(error)
818
873
  };
819
874
  log(`Deliberation ${formatDeliberationReason(reason)} failed for iteration ${iteration}: ${deliberation.error}`);
@@ -235,11 +235,11 @@ async function resolveContextSource(env, fallbackText) {
235
235
  async function promptForValidator(candidates) {
236
236
  const rl = createInterface({ input: process.stdin, output: process.stdout });
237
237
  try {
238
- console.log('Validator auto-detect found multiple candidates:');
238
+ logger.info('Validator auto-detect found multiple candidates:');
239
239
  candidates.forEach((candidate, index) => {
240
- console.log(` ${index + 1}) ${candidate.command} (${candidate.reason})`);
240
+ logger.info(` ${index + 1}) ${candidate.command} (${candidate.reason})`);
241
241
  });
242
- console.log(' n) none');
242
+ logger.info(' n) none');
243
243
  const answer = (await rl.question('Select validator [1-n or n for none]: ')).trim().toLowerCase();
244
244
  if (!answer || answer === 'n' || answer === 'none') {
245
245
  return null;
@@ -576,7 +576,7 @@ async function main() {
576
576
  state.final = { status, exitCode };
577
577
  await writeTerminalState(runDir, state);
578
578
  if (message) {
579
- console.error(message);
579
+ logger.error(message);
580
580
  }
581
581
  process.exitCode = exitCode;
582
582
  };
@@ -725,7 +725,7 @@ async function main() {
725
725
  const detection = await detectValidator(repoRoot);
726
726
  if (detection.status === 'selected' && detection.command) {
727
727
  validatorCommand = detection.command;
728
- console.log(`Validator: ${detection.command} (${detection.reason ?? 'auto-detect'})`);
728
+ logger.info(`Validator: ${detection.command} (${detection.reason ?? 'auto-detect'})`);
729
729
  }
730
730
  else if (detection.status === 'ambiguous') {
731
731
  if (isInteractive) {
@@ -743,7 +743,7 @@ async function main() {
743
743
  mode,
744
744
  context: contextInfo
745
745
  });
746
- console.error(candidates);
746
+ logger.error(candidates);
747
747
  return;
748
748
  }
749
749
  }
@@ -766,10 +766,10 @@ async function main() {
766
766
  }
767
767
  }
768
768
  if (validatorCommand === null) {
769
- console.log('Validator: none');
769
+ logger.info('Validator: none');
770
770
  }
771
771
  else {
772
- console.log(`Validator: ${validatorCommand}`);
772
+ logger.info(`Validator: ${validatorCommand}`);
773
773
  }
774
774
  const subagentsEnabled = envFlagEnabled(env.CODEX_SUBAGENTS) || envFlagEnabled(env.RLM_SUBAGENTS);
775
775
  const symbolicCollabEnabled = envFlagEnabled(env.RLM_SYMBOLIC_COLLAB);
@@ -779,6 +779,7 @@ async function main() {
779
779
  const symbolicDeliberationIncludeInPlanner = env.RLM_SYMBOLIC_DELIBERATION_INCLUDE_IN_PLANNER === undefined
780
780
  ? true
781
781
  : envFlagEnabled(env.RLM_SYMBOLIC_DELIBERATION_INCLUDE_IN_PLANNER);
782
+ const symbolicDeliberationLogArtifacts = envFlagEnabled(env.RLM_SYMBOLIC_DELIBERATION_LOG);
782
783
  const nonInteractive = shouldForceNonInteractive(env);
783
784
  if (mode === 'symbolic') {
784
785
  const budgets = {
@@ -894,6 +895,7 @@ async function main() {
894
895
  maxRuns: deliberationMaxRuns,
895
896
  maxSummaryBytes: deliberationMaxSummaryBytes,
896
897
  includeInPlannerPrompt: symbolicDeliberationIncludeInPlanner,
898
+ logArtifacts: symbolicDeliberationLogArtifacts,
897
899
  run: (prompt, _meta) => {
898
900
  void _meta;
899
901
  if (!symbolicCollabEnabled) {
@@ -914,7 +916,7 @@ async function main() {
914
916
  });
915
917
  const finalStatus = result.state.final?.status ?? 'unknown';
916
918
  const iterationCount = result.state.symbolic_iterations.length;
917
- console.log(`RLM completed: status=${finalStatus} symbolic_iterations=${iterationCount} exit=${result.exitCode}`);
919
+ logger.info(`RLM completed: status=${finalStatus} symbolic_iterations=${iterationCount} exit=${result.exitCode}`);
918
920
  process.exitCode = result.exitCode;
919
921
  return;
920
922
  }
@@ -935,11 +937,11 @@ async function main() {
935
937
  });
936
938
  const finalStatus = result.state.final?.status ?? 'unknown';
937
939
  const iterationCount = result.state.iterations.length;
938
- console.log(`RLM completed: status=${finalStatus} iterations=${iterationCount} exit=${result.exitCode}`);
940
+ logger.info(`RLM completed: status=${finalStatus} iterations=${iterationCount} exit=${result.exitCode}`);
939
941
  const hasTimeCap = resolvedMaxMinutes !== null && resolvedMaxMinutes > 0;
940
942
  const unboundedBudgetInvalid = validatorCommand === null && maxIterations === 0 && !hasTimeCap;
941
943
  if (finalStatus === 'invalid_config' && unboundedBudgetInvalid) {
942
- console.error('Invalid configuration: --validator none with unbounded iterations and --max-minutes 0 would run forever. Fix: set --max-minutes / RLM_MAX_MINUTES to a positive value (default 2880), set --max-iterations to a positive value, or provide a validator.');
944
+ logger.error('Invalid configuration: --validator none with unbounded iterations and --max-minutes 0 would run forever. Fix: set --max-minutes / RLM_MAX_MINUTES to a positive value (default 2880), set --max-iterations to a positive value, or provide a validator.');
943
945
  }
944
946
  process.exitCode = result.exitCode;
945
947
  }
@@ -5,7 +5,7 @@ import { setTimeout as sleep } from 'node:timers/promises';
5
5
  import { isoTimestamp } from '../cli/utils/time.js';
6
6
  const TASK_ID_PATTERN = /\btask_[a-z]_[a-f0-9]+\b/i;
7
7
  const MAX_LOG_CHARS = 32 * 1024;
8
- const STATUS_RETRY_LIMIT = 3;
8
+ const STATUS_RETRY_LIMIT = 12;
9
9
  const STATUS_RETRY_BACKOFF_MS = 1500;
10
10
  const DEFAULT_LIST_LIMIT = 20;
11
11
  export function extractCloudTaskId(text) {
@@ -129,6 +129,8 @@ export class CodexCloudTaskExecutor {
129
129
  }
130
130
  const timeoutAt = Date.now() + cloudExecution.timeout_seconds * 1000;
131
131
  let statusRetries = 0;
132
+ let lastKnownStatus = cloudExecution.status;
133
+ let loggedNonZeroStatus = false;
132
134
  while (Date.now() < timeoutAt) {
133
135
  const statusResult = await runCloudCommand(['cloud', 'status', taskId]);
134
136
  cloudExecution.last_polled_at = this.now();
@@ -145,9 +147,14 @@ export class CodexCloudTaskExecutor {
145
147
  await this.sleepFn(STATUS_RETRY_BACKOFF_MS * statusRetries);
146
148
  continue;
147
149
  }
150
+ if (statusResult.exitCode !== 0 && mapped !== 'unknown' && !loggedNonZeroStatus) {
151
+ notes.push(`Cloud status returned exit ${statusResult.exitCode} with remote status ${mapped}; continuing to poll.`);
152
+ loggedNonZeroStatus = true;
153
+ }
148
154
  statusRetries = 0;
149
155
  if (mapped !== 'unknown') {
150
156
  cloudExecution.status = mapped;
157
+ lastKnownStatus = mapped;
151
158
  }
152
159
  if (mapped === 'ready') {
153
160
  notes.push(`Cloud task completed: ${taskId}`);
@@ -161,7 +168,7 @@ export class CodexCloudTaskExecutor {
161
168
  }
162
169
  if (cloudExecution.status === 'running' || cloudExecution.status === 'queued') {
163
170
  cloudExecution.status = 'failed';
164
- cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s.`;
171
+ cloudExecution.error = `Timed out waiting for cloud task completion after ${cloudExecution.timeout_seconds}s (last remote status: ${lastKnownStatus}, polls: ${cloudExecution.poll_count}).`;
165
172
  }
166
173
  if (cloudExecution.status === 'ready') {
167
174
  const diffResult = await runCloudCommand(['cloud', 'diff', taskId]);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kbediako/codex-orchestrator",
3
- "version": "0.1.17",
3
+ "version": "0.1.18",
4
4
  "license": "MIT",
5
5
  "repository": {
6
6
  "type": "git",
@@ -81,6 +81,16 @@ delegate.spawn({
81
81
  })
82
82
  ```
83
83
 
84
+ ## Collab lifecycle hygiene (required)
85
+
86
+ When using collab tools (`spawn_agent` / `wait` / `close_agent`):
87
+
88
+ - Treat each spawned `agent_id` as a resource that must be closed.
89
+ - For every successful spawn, run `wait` then `close_agent` for the same id.
90
+ - Keep a local list of spawned ids and run a final cleanup pass before returning.
91
+ - On timeout/error paths, still close known ids before reporting failure.
92
+ - If you see `agent thread limit reached`, stop spawning immediately, close known ids, and retry only after cleanup.
93
+
84
94
  ## RLM budget overrides (recommended defaults)
85
95
 
86
96
  If you want deeper recursion or longer wall-clock time for delegated runs, set RLM budgets on the delegation server:
@@ -123,3 +133,4 @@ Delegation MCP expects JSONL. Keep `codex-orchestrator` aligned with the current
123
133
  - **Run identifiers**: status/pause/cancel require `manifest_path`; question queue requires `parent_manifest_path`.
124
134
  - **Collab payload mismatch**: `spawn_agent` calls fail if they include both `message` and `items`.
125
135
  - **Collab depth limits**: recursive collab fan-out can fail near max depth; prefer shallow parent fan-out.
136
+ - **Collab lifecycle leaks**: missing `close_agent` calls can exhaust thread slots and block future spawns (`agent thread limit reached`).
@@ -20,6 +20,9 @@ Collab multi-agent mode is separate from delegation. For symbolic RLM subcalls t
20
20
  - Spawn returns an `agent_id` (thread id). Current TUI collab rendering is id-based; do not depend on custom visible agent names.
21
21
  - Subagents spawned through collab run with approval effectively set to `never`; design child tasks to avoid approval/escalation requirements.
22
22
  - Collab spawn depth is bounded. Near/at max depth, recursive delegation can fail or collab can be disabled in children; prefer shallow parent fan-out.
23
+ - **Lifecycle is mandatory:** for every successful `spawn_agent`, run `wait` and then `close_agent` for that same id before task completion.
24
+ - Keep a local list of spawned ids and run a final cleanup pass so no agent id is left unclosed on timeout/error paths.
25
+ - If spawn fails with `agent thread limit reached`, stop spawning, close any known ids first, then surface a concise recovery note.
23
26
 
24
27
  ## Quick-start workflow (canned)
25
28
 
@@ -174,3 +177,4 @@ repeat:
174
177
  - **Missing control files:** delegate tools rely on `control_endpoint.json` in the run directory; older runs may not have it.
175
178
  - **Collab payload mismatch:** `spawn_agent` rejects calls that include both `message` and `items`.
176
179
  - **Collab UI assumptions:** agent rows/records are id-based today; use explicit stream role text in prompts/artifacts for operator clarity.
180
+ - **Collab lifecycle leaks:** missing `close_agent` calls accumulate open threads and can trigger `agent thread limit reached`; always finish `spawn -> wait -> close_agent` per id.
@@ -16,6 +16,7 @@ Use this skill when a task needs a spec-driven workflow. The objective is to cre
16
16
  - TECH_SPEC: capture technical requirements (use `.agent/task/templates/tech-spec-template.md`; stored under `tasks/specs/<id>-<slug>.md`).
17
17
  - ACTION_PLAN: capture sequencing/milestones (use `.agent/task/templates/action-plan-template.md`).
18
18
  - Depth scales with scope, but all three docs are required.
19
+ - For low-risk tiny edits, follow the bounded shortcut in `docs/micro-task-path.md` instead of long-form rewrites (still requires task/spec evidence).
19
20
 
20
21
  2) Register the TECH_SPEC and task
21
22
  - Add the TECH_SPEC to `tasks/index.json` (including `last_review`).
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: standalone-review
3
- description: Use for ad-hoc/standalone reviews outside pipelines (fast checks during implementation or before handoff) using `codex review`.
3
+ description: Use for required periodic cross-check reviews during implementation and before handoff using `codex review`.
4
4
  ---
5
5
 
6
6
  # Standalone Review
@@ -10,6 +10,17 @@ description: Use for ad-hoc/standalone reviews outside pipelines (fast checks du
10
10
  Use this skill when you need a fast, ad-hoc review without running a pipeline or collecting a manifest. It is ideal during implementation or for quick pre-flight checks.
11
11
  Before implementation, use it to review the task/spec against the user’s intent and record the approval in the PRD/TECH_SPEC or task notes.
12
12
 
13
+ ## Auto-trigger policy (required)
14
+
15
+ Run this skill automatically whenever any condition is true:
16
+ - You made code/config/script/test edits since the last standalone review.
17
+ - You finished a meaningful chunk of work (default: behavior change or about 2+ files touched).
18
+ - You are about to report completion, propose merge, or answer "what's next?" with recommendations.
19
+ - You addressed external feedback (PR reviews, bot comments, or CI-fix patches).
20
+ - 45 minutes of active implementation elapsed without a standalone review.
21
+
22
+ If review execution is blocked, record why in task notes, then do manual diff review plus targeted tests before proceeding.
23
+
13
24
  ## Quick start
14
25
 
15
26
  Uncommitted diff:
@@ -39,6 +50,7 @@ codex review "Focus on correctness, regressions, edge cases; list missing tests.
39
50
  - Keep prompts short, specific, and test-oriented.
40
51
 
41
52
  2) Run the review often
53
+ - Follow the auto-trigger policy above (not optional).
42
54
  - Run after each meaningful chunk of work.
43
55
  - Prefer targeted focus prompts for WIP reviews.
44
56