@kbediako/codex-orchestrator 0.1.12 → 0.1.14-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +19 -5
  2. package/README.md +47 -2
  3. package/dist/bin/codex-orchestrator.js +93 -0
  4. package/dist/orchestrator/src/cli/adapters/CommandBuilder.js +27 -3
  5. package/dist/orchestrator/src/cli/adapters/CommandPlanner.js +17 -1
  6. package/dist/orchestrator/src/cli/adapters/CommandReviewer.js +36 -1
  7. package/dist/orchestrator/src/cli/adapters/CommandTester.js +28 -0
  8. package/dist/orchestrator/src/cli/adapters/cloudFailureDiagnostics.js +45 -0
  9. package/dist/orchestrator/src/cli/codexCliSetup.js +294 -0
  10. package/dist/orchestrator/src/cli/init.js +3 -0
  11. package/dist/orchestrator/src/cli/mcp.js +4 -2
  12. package/dist/orchestrator/src/cli/orchestrator.js +298 -28
  13. package/dist/orchestrator/src/cli/rlm/context.js +31 -3
  14. package/dist/orchestrator/src/cli/rlm/symbolic.js +152 -15
  15. package/dist/orchestrator/src/cli/rlmRunner.js +59 -5
  16. package/dist/orchestrator/src/cli/run/manifest.js +3 -0
  17. package/dist/orchestrator/src/cli/services/commandRunner.js +87 -0
  18. package/dist/orchestrator/src/cli/services/runSummaryWriter.js +24 -0
  19. package/dist/orchestrator/src/cli/skills.js +1 -1
  20. package/dist/orchestrator/src/cli/utils/codexCli.js +94 -0
  21. package/dist/orchestrator/src/cli/utils/codexPaths.js +13 -0
  22. package/dist/orchestrator/src/cli/utils/devtools.js +9 -12
  23. package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +255 -0
  24. package/dist/orchestrator/src/learning/crystalizer.js +2 -1
  25. package/dist/orchestrator/src/manager.js +1 -0
  26. package/dist/orchestrator/src/sync/CloudSyncWorker.js +37 -7
  27. package/dist/scripts/design/pipeline/context.js +3 -2
  28. package/dist/scripts/lib/run-manifests.js +14 -0
  29. package/docs/README.md +22 -2
  30. package/package.json +6 -2
  31. package/schemas/manifest.json +83 -0
  32. package/skills/collab-deliberation/SKILL.md +21 -0
  33. package/skills/collab-evals/SKILL.md +32 -0
  34. package/skills/delegate-early/SKILL.md +47 -0
  35. package/skills/delegation-usage/DELEGATION_GUIDE.md +5 -4
  36. package/skills/delegation-usage/SKILL.md +11 -5
  37. package/skills/docs-first/SKILL.md +2 -1
  38. package/templates/README.md +4 -0
@@ -20,7 +20,7 @@ import { PipelineResolver } from './services/pipelineResolver.js';
20
20
  import { ControlPlaneService } from './services/controlPlaneService.js';
21
21
  import { ControlWatcher } from './control/controlWatcher.js';
22
22
  import { SchedulerService } from './services/schedulerService.js';
23
- import { applyHandlesToRunSummary, applyPrivacyToRunSummary, persistRunSummary } from './services/runSummaryWriter.js';
23
+ import { applyHandlesToRunSummary, applyPrivacyToRunSummary, applyCloudExecutionToRunSummary, persistRunSummary } from './services/runSummaryWriter.js';
24
24
  import { prepareRun, resolvePipelineForResume, overrideTaskEnvironment } from './services/runPreparation.js';
25
25
  import { loadPackageConfig, loadUserConfig } from './config/userConfig.js';
26
26
  import { loadDelegationConfigFiles, computeEffectiveDelegationConfig, parseDelegationConfigOverride, splitDelegationConfigOverrides } from './config/delegationConfig.js';
@@ -28,8 +28,13 @@ import { ControlServer } from './control/controlServer.js';
28
28
  import { RunEventEmitter, RunEventPublisher, snapshotStages } from './events/runEvents.js';
29
29
  import { RunEventStream, attachRunEventAdapter } from './events/runEventStream.js';
30
30
  import { CLI_EXECUTION_MODE_PARSER, resolveRequiresCloudPolicy } from '../utils/executionMode.js';
31
+ import { resolveCodexCliBin } from './utils/codexCli.js';
32
+ import { CodexCloudTaskExecutor } from '../cloud/CodexCloudTaskExecutor.js';
31
33
  const resolveBaseEnvironment = () => normalizeEnvironmentPaths(resolveEnvironmentPaths());
32
34
  const CONFIG_OVERRIDE_ENV_KEYS = ['CODEX_CONFIG_OVERRIDES', 'CODEX_MCP_CONFIG_OVERRIDES'];
35
+ const DEFAULT_CLOUD_POLL_INTERVAL_SECONDS = 10;
36
+ const DEFAULT_CLOUD_TIMEOUT_SECONDS = 1800;
37
+ const DEFAULT_CLOUD_ATTEMPTS = 1;
33
38
  function collectDelegationEnvOverrides(env = process.env) {
34
39
  const layers = [];
35
40
  for (const key of CONFIG_OVERRIDE_ENV_KEYS) {
@@ -52,6 +57,37 @@ function collectDelegationEnvOverrides(env = process.env) {
52
57
  }
53
58
  return layers;
54
59
  }
60
+ function readCloudString(value) {
61
+ return typeof value === 'string' && value.trim().length > 0 ? value.trim() : null;
62
+ }
63
+ function readCloudNumber(raw, fallback) {
64
+ if (!raw) {
65
+ return fallback;
66
+ }
67
+ const parsed = Number.parseInt(raw, 10);
68
+ if (!Number.isFinite(parsed) || parsed <= 0) {
69
+ return fallback;
70
+ }
71
+ return parsed;
72
+ }
73
+ function resolveCloudEnvironmentId(task, target, envOverrides) {
74
+ const metadata = (target.metadata ?? {});
75
+ const taskMetadata = (task.metadata ?? {});
76
+ const taskCloud = (taskMetadata.cloud ?? null);
77
+ const candidates = [
78
+ readCloudString(metadata.cloudEnvId),
79
+ readCloudString(metadata.cloud_env_id),
80
+ readCloudString(metadata.envId),
81
+ readCloudString(metadata.environmentId),
82
+ readCloudString(taskCloud?.envId),
83
+ readCloudString(taskCloud?.environmentId),
84
+ readCloudString(taskMetadata.cloudEnvId),
85
+ readCloudString(taskMetadata.cloud_env_id),
86
+ readCloudString(envOverrides?.CODEX_CLOUD_ENV_ID),
87
+ readCloudString(process.env.CODEX_CLOUD_ENV_ID)
88
+ ];
89
+ return candidates.find((candidate) => candidate !== null) ?? null;
90
+ }
55
91
  export class CodexOrchestrator {
56
92
  baseEnv;
57
93
  controlPlane = new ControlPlaneService();
@@ -136,7 +172,8 @@ export class CodexOrchestrator {
136
172
  eventStream: stream,
137
173
  onEventEntry,
138
174
  persister,
139
- envOverrides: preparation.envOverrides
175
+ envOverrides: preparation.envOverrides,
176
+ executionModeOverride: options.executionMode
140
177
  });
141
178
  }
142
179
  finally {
@@ -360,7 +397,7 @@ export class CodexOrchestrator {
360
397
  logPath: params.paths.logPath
361
398
  });
362
399
  }
363
- createTaskManager(runId, pipeline, executePipeline, getResult, plannerInstance, env) {
400
+ createTaskManager(runId, pipeline, executePipeline, getResult, plannerInstance, env, modeOverride) {
364
401
  const planner = plannerInstance ?? new CommandPlanner(pipeline);
365
402
  const builder = new CommandBuilder(executePipeline);
366
403
  const tester = new CommandTester(getResult);
@@ -373,12 +410,15 @@ export class CodexOrchestrator {
373
410
  tester,
374
411
  reviewer,
375
412
  runIdFactory: () => runId,
376
- modePolicy: (task, subtask) => this.determineMode(task, subtask),
413
+ modePolicy: (task, subtask) => this.determineMode(task, subtask, modeOverride),
377
414
  persistence: { autoStart: true, stateStore, manifestWriter }
378
415
  };
379
416
  return new TaskManager(options);
380
417
  }
381
- determineMode(task, subtask) {
418
+ determineMode(task, subtask, overrideMode) {
419
+ if (overrideMode) {
420
+ return overrideMode;
421
+ }
382
422
  if (this.requiresCloudExecution(task, subtask)) {
383
423
  return 'cloud';
384
424
  }
@@ -402,6 +442,9 @@ export class CodexOrchestrator {
402
442
  return Boolean(task.metadata?.execution?.parallel);
403
443
  }
404
444
  async executePipeline(options) {
445
+ if (options.mode === 'cloud') {
446
+ return await this.executeCloudPipeline(options);
447
+ }
405
448
  const { env, pipeline, manifest, paths, runEvents, envOverrides } = options;
406
449
  const notes = [];
407
450
  let success = true;
@@ -513,7 +556,8 @@ export class CodexOrchestrator {
513
556
  taskId: env.taskId,
514
557
  pipelineId: stage.pipeline,
515
558
  parentRunId: manifest.run_id,
516
- format: 'json'
559
+ format: 'json',
560
+ executionMode: options.executionModeOverride
517
561
  });
518
562
  entry.completed_at = isoTimestamp();
519
563
  entry.sub_run_id = child.manifest.run_id;
@@ -607,31 +651,251 @@ export class CodexOrchestrator {
607
651
  logPath: relativeToRepo(env, paths.logPath)
608
652
  };
609
653
  }
654
+ async executeCloudPipeline(options) {
655
+ const { env, pipeline, manifest, paths, runEvents, target, task, envOverrides } = options;
656
+ const notes = [];
657
+ let success = true;
658
+ manifest.guardrail_status = undefined;
659
+ const persister = options.persister ??
660
+ new ManifestPersister({
661
+ manifest,
662
+ paths,
663
+ persistIntervalMs: Math.max(1000, manifest.heartbeat_interval_seconds * 1000)
664
+ });
665
+ const schedulePersist = (persistOptions = {}) => persister.schedule(persistOptions);
666
+ const pushHeartbeat = (forceManifest = false) => {
667
+ updateHeartbeat(manifest);
668
+ return schedulePersist({ manifest: forceManifest, heartbeat: true, force: forceManifest });
669
+ };
670
+ const controlWatcher = new ControlWatcher({
671
+ paths,
672
+ manifest,
673
+ eventStream: options.eventStream,
674
+ onEntry: options.onEventEntry,
675
+ persist: () => schedulePersist({ manifest: true, force: true })
676
+ });
677
+ manifest.status = 'in_progress';
678
+ updateHeartbeat(manifest);
679
+ await schedulePersist({ manifest: true, heartbeat: true, force: true });
680
+ runEvents?.runStarted(snapshotStages(manifest, pipeline), manifest.status);
681
+ const heartbeatInterval = setInterval(() => {
682
+ void pushHeartbeat(false).catch((error) => {
683
+ logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
684
+ });
685
+ }, manifest.heartbeat_interval_seconds * 1000);
686
+ const targetStageId = this.resolveTargetStageId(target, pipeline);
687
+ const targetStage = targetStageId
688
+ ? pipeline.stages.find((stage) => stage.id === targetStageId)
689
+ : undefined;
690
+ const targetEntry = targetStageId
691
+ ? manifest.commands.find((command) => command.id === targetStageId)
692
+ : undefined;
693
+ try {
694
+ await controlWatcher.sync();
695
+ await controlWatcher.waitForResume();
696
+ if (controlWatcher.isCanceled()) {
697
+ manifest.status_detail = 'run-canceled';
698
+ success = false;
699
+ }
700
+ else if (!targetStage || targetStage.kind !== 'command' || !targetEntry) {
701
+ success = false;
702
+ manifest.status_detail = 'cloud-target-missing';
703
+ const detail = targetStageId
704
+ ? `Cloud execution target "${targetStageId}" could not be resolved to a command stage.`
705
+ : `Cloud execution target "${target.id}" could not be resolved.`;
706
+ appendSummary(manifest, detail);
707
+ notes.push(detail);
708
+ }
709
+ else {
710
+ for (let i = 0; i < manifest.commands.length; i += 1) {
711
+ const entry = manifest.commands[i];
712
+ if (!entry || entry.id === targetStageId) {
713
+ continue;
714
+ }
715
+ entry.status = 'skipped';
716
+ entry.started_at = entry.started_at ?? isoTimestamp();
717
+ entry.completed_at = isoTimestamp();
718
+ entry.summary = `Skipped in cloud mode (target stage: ${targetStageId}).`;
719
+ }
720
+ const environmentId = resolveCloudEnvironmentId(task, target, envOverrides);
721
+ if (!environmentId) {
722
+ success = false;
723
+ manifest.status_detail = 'cloud-env-missing';
724
+ const detail = 'Cloud execution requested but no environment id is configured. Set CODEX_CLOUD_ENV_ID or provide target metadata.cloudEnvId.';
725
+ manifest.cloud_execution = {
726
+ task_id: null,
727
+ environment_id: null,
728
+ status: 'failed',
729
+ status_url: null,
730
+ submitted_at: null,
731
+ completed_at: isoTimestamp(),
732
+ last_polled_at: null,
733
+ poll_count: 0,
734
+ poll_interval_seconds: DEFAULT_CLOUD_POLL_INTERVAL_SECONDS,
735
+ timeout_seconds: DEFAULT_CLOUD_TIMEOUT_SECONDS,
736
+ attempts: DEFAULT_CLOUD_ATTEMPTS,
737
+ diff_path: null,
738
+ diff_url: null,
739
+ diff_status: 'unavailable',
740
+ apply_status: 'not_requested',
741
+ log_path: null,
742
+ error: detail
743
+ };
744
+ appendSummary(manifest, detail);
745
+ notes.push(detail);
746
+ targetEntry.status = 'failed';
747
+ targetEntry.started_at = targetEntry.started_at ?? isoTimestamp();
748
+ targetEntry.completed_at = isoTimestamp();
749
+ targetEntry.exit_code = 1;
750
+ targetEntry.summary = detail;
751
+ }
752
+ else {
753
+ targetEntry.status = 'running';
754
+ targetEntry.started_at = isoTimestamp();
755
+ await schedulePersist({ manifest: true, force: true });
756
+ runEvents?.stageStarted({
757
+ stageId: targetStage.id,
758
+ stageIndex: targetEntry.index,
759
+ title: targetStage.title,
760
+ kind: 'command',
761
+ logPath: targetEntry.log_path,
762
+ status: targetEntry.status
763
+ });
764
+ const executor = new CodexCloudTaskExecutor();
765
+ const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage);
766
+ const pollIntervalSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_POLL_INTERVAL_SECONDS ?? process.env.CODEX_CLOUD_POLL_INTERVAL_SECONDS, DEFAULT_CLOUD_POLL_INTERVAL_SECONDS);
767
+ const timeoutSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_TIMEOUT_SECONDS ?? process.env.CODEX_CLOUD_TIMEOUT_SECONDS, DEFAULT_CLOUD_TIMEOUT_SECONDS);
768
+ const attempts = readCloudNumber(envOverrides?.CODEX_CLOUD_EXEC_ATTEMPTS ?? process.env.CODEX_CLOUD_EXEC_ATTEMPTS, DEFAULT_CLOUD_ATTEMPTS);
769
+ const branch = readCloudString(envOverrides?.CODEX_CLOUD_BRANCH) ??
770
+ readCloudString(process.env.CODEX_CLOUD_BRANCH);
771
+ const codexBin = resolveCodexCliBin({ ...process.env, ...(envOverrides ?? {}) });
772
+ const cloudResult = await executor.execute({
773
+ codexBin,
774
+ prompt,
775
+ environmentId,
776
+ repoRoot: env.repoRoot,
777
+ runDir: paths.runDir,
778
+ pollIntervalSeconds,
779
+ timeoutSeconds,
780
+ attempts,
781
+ branch,
782
+ env: envOverrides
783
+ });
784
+ success = cloudResult.success;
785
+ notes.push(...cloudResult.notes);
786
+ manifest.cloud_execution = cloudResult.cloudExecution;
787
+ targetEntry.log_path = cloudResult.cloudExecution.log_path;
788
+ targetEntry.completed_at = isoTimestamp();
789
+ targetEntry.exit_code = cloudResult.success ? 0 : 1;
790
+ targetEntry.status = cloudResult.success ? 'succeeded' : 'failed';
791
+ targetEntry.summary = cloudResult.summary;
792
+ if (!cloudResult.success) {
793
+ manifest.status_detail = `cloud:${targetStage.id}:failed`;
794
+ appendSummary(manifest, cloudResult.summary);
795
+ }
796
+ await schedulePersist({ manifest: true, force: true });
797
+ runEvents?.stageCompleted({
798
+ stageId: targetStage.id,
799
+ stageIndex: targetEntry.index,
800
+ title: targetStage.title,
801
+ kind: 'command',
802
+ status: targetEntry.status,
803
+ exitCode: targetEntry.exit_code,
804
+ summary: targetEntry.summary,
805
+ logPath: targetEntry.log_path
806
+ });
807
+ }
808
+ }
809
+ }
810
+ finally {
811
+ clearInterval(heartbeatInterval);
812
+ await schedulePersist({ force: true });
813
+ }
814
+ await controlWatcher.sync();
815
+ if (controlWatcher.isCanceled()) {
816
+ finalizeStatus(manifest, 'cancelled', manifest.status_detail ?? 'run-canceled');
817
+ }
818
+ else if (success) {
819
+ finalizeStatus(manifest, 'succeeded');
820
+ }
821
+ else {
822
+ finalizeStatus(manifest, 'failed', manifest.status_detail ?? 'cloud-execution-failed');
823
+ }
824
+ updateHeartbeat(manifest);
825
+ await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
826
+ logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
827
+ });
828
+ await schedulePersist({ force: true });
829
+ await appendMetricsEntry(env, paths, manifest, persister);
830
+ return {
831
+ success,
832
+ notes,
833
+ manifest,
834
+ manifestPath: relativeToRepo(env, paths.manifestPath),
835
+ logPath: relativeToRepo(env, paths.logPath)
836
+ };
837
+ }
838
+ resolveTargetStageId(target, pipeline) {
839
+ const metadataStageId = typeof target.metadata?.stageId === 'string' ? target.metadata.stageId : null;
840
+ if (metadataStageId && pipeline.stages.some((stage) => stage.id === metadataStageId)) {
841
+ return metadataStageId;
842
+ }
843
+ if (target.id.includes(':')) {
844
+ const suffix = target.id.split(':').pop() ?? null;
845
+ if (suffix && pipeline.stages.some((stage) => stage.id === suffix)) {
846
+ return suffix;
847
+ }
848
+ }
849
+ if (pipeline.stages.some((stage) => stage.id === target.id)) {
850
+ return target.id;
851
+ }
852
+ return null;
853
+ }
854
+ buildCloudPrompt(task, target, pipeline, stage) {
855
+ const lines = [
856
+ `Task ID: ${task.id}`,
857
+ `Task title: ${task.title}`,
858
+ task.description ? `Task description: ${task.description}` : null,
859
+ `Pipeline: ${pipeline.id}`,
860
+ `Target stage: ${stage.id} (${target.description})`,
861
+ '',
862
+ 'Apply the required repository changes for this target stage and produce a diff.'
863
+ ].filter((line) => Boolean(line));
864
+ return lines.join('\n');
865
+ }
610
866
  async performRunLifecycle(context) {
611
- const { env, pipeline, manifest, paths, planner, taskContext, runId, persister, envOverrides } = context;
612
- let pipelineResult = null;
613
- let executing = null;
614
- const executePipeline = async () => {
615
- if (!executing) {
616
- executing = this.executePipeline({
617
- env,
618
- pipeline,
619
- manifest,
620
- paths,
621
- runEvents: context.runEvents,
622
- eventStream: context.eventStream,
623
- onEventEntry: context.onEventEntry,
624
- persister,
625
- envOverrides
626
- }).then((result) => {
627
- pipelineResult = result;
628
- return result;
629
- });
867
+ const { env, pipeline, manifest, paths, planner, taskContext, runId, persister, envOverrides, executionModeOverride } = context;
868
+ let latestPipelineResult = null;
869
+ const executingByKey = new Map();
870
+ const executePipeline = async (input) => {
871
+ const key = `${input.mode}:${input.target.id}`;
872
+ const existing = executingByKey.get(key);
873
+ if (existing) {
874
+ return existing;
630
875
  }
876
+ const executing = this.executePipeline({
877
+ env,
878
+ pipeline,
879
+ manifest,
880
+ paths,
881
+ mode: input.mode,
882
+ executionModeOverride,
883
+ target: input.target,
884
+ task: taskContext,
885
+ runEvents: context.runEvents,
886
+ eventStream: context.eventStream,
887
+ onEventEntry: context.onEventEntry,
888
+ persister,
889
+ envOverrides
890
+ }).then((result) => {
891
+ latestPipelineResult = result;
892
+ return result;
893
+ });
894
+ executingByKey.set(key, executing);
631
895
  return executing;
632
896
  };
633
- const getResult = () => pipelineResult;
634
- const manager = this.createTaskManager(runId, pipeline, executePipeline, getResult, planner, env);
897
+ const getResult = () => latestPipelineResult;
898
+ const manager = this.createTaskManager(runId, pipeline, executePipeline, getResult, planner, env, executionModeOverride);
635
899
  this.attachPlanTargetTracker(manager, manifest, paths, persister);
636
900
  getPrivacyGuard().reset();
637
901
  const controlPlaneResult = await this.controlPlane.guard({
@@ -672,6 +936,7 @@ export class CodexOrchestrator {
672
936
  this.scheduler.applySchedulerToRunSummary(runSummary, schedulerPlan);
673
937
  applyHandlesToRunSummary(runSummary, manifest);
674
938
  applyPrivacyToRunSummary(runSummary, manifest);
939
+ applyCloudExecutionToRunSummary(runSummary, manifest);
675
940
  this.controlPlane.applyControlPlaneToRunSummary(runSummary, controlPlaneResult);
676
941
  await persistRunSummary(env, paths, manifest, runSummary, persister);
677
942
  context.runEvents?.runCompleted({
@@ -722,7 +987,8 @@ export class CodexOrchestrator {
722
987
  log_path: manifest.log_path,
723
988
  heartbeat_at: manifest.heartbeat_at,
724
989
  commands: manifest.commands,
725
- child_runs: manifest.child_runs
990
+ child_runs: manifest.child_runs,
991
+ cloud_execution: manifest.cloud_execution ?? null
726
992
  };
727
993
  }
728
994
  renderStatus(manifest) {
@@ -731,6 +997,10 @@ export class CodexOrchestrator {
731
997
  logger.info(`Started: ${manifest.started_at}`);
732
998
  logger.info(`Completed: ${manifest.completed_at ?? 'in-progress'}`);
733
999
  logger.info(`Manifest: ${manifest.artifact_root}/manifest.json`);
1000
+ if (manifest.cloud_execution?.task_id) {
1001
+ logger.info(`Cloud: ${manifest.cloud_execution.task_id} [${manifest.cloud_execution.status}]` +
1002
+ (manifest.cloud_execution.status_url ? ` ${manifest.cloud_execution.status_url}` : ''));
1003
+ }
734
1004
  logger.info('Commands:');
735
1005
  for (const command of manifest.commands) {
736
1006
  const summary = command.summary ? ` — ${command.summary}` : '';
@@ -216,6 +216,29 @@ export class ContextStore {
216
216
  get sourceByteLength() {
217
217
  return this.context.index.source.byte_length;
218
218
  }
219
+ resolveChunkId(chunkId) {
220
+ if (this.chunkMap.has(chunkId)) {
221
+ return chunkId;
222
+ }
223
+ if (!/^\d+$/.test(chunkId)) {
224
+ return null;
225
+ }
226
+ const index = Number.parseInt(chunkId, 10);
227
+ if (!Number.isFinite(index)) {
228
+ return null;
229
+ }
230
+ const chunks = this.context.index.chunks;
231
+ if (index >= 0 && index < chunks.length) {
232
+ return chunks[index]?.id ?? null;
233
+ }
234
+ // Some upstream pointers are 1-based. Tolerate that legacy form by
235
+ // mapping index=N to chunks[N-1] when the 0-based lookup is out of range.
236
+ const fallback = index - 1;
237
+ if (fallback >= 0 && fallback < chunks.length) {
238
+ return chunks[fallback]?.id ?? null;
239
+ }
240
+ return null;
241
+ }
219
242
  validatePointer(pointer) {
220
243
  const parsed = parseContextPointer(pointer);
221
244
  if (!parsed) {
@@ -224,10 +247,11 @@ export class ContextStore {
224
247
  if (parsed.objectId !== this.context.index.object_id) {
225
248
  return null;
226
249
  }
227
- if (!this.chunkMap.has(parsed.chunkId)) {
250
+ const resolvedChunkId = this.resolveChunkId(parsed.chunkId);
251
+ if (!resolvedChunkId) {
228
252
  return null;
229
253
  }
230
- return parsed;
254
+ return { objectId: parsed.objectId, chunkId: resolvedChunkId };
231
255
  }
232
256
  async read(pointer, offset, bytes) {
233
257
  const parsed = parseContextPointer(pointer);
@@ -237,7 +261,11 @@ export class ContextStore {
237
261
  if (parsed.objectId !== this.context.index.object_id) {
238
262
  throw new Error('context object mismatch');
239
263
  }
240
- const chunk = this.chunkMap.get(parsed.chunkId);
264
+ const resolvedChunkId = this.resolveChunkId(parsed.chunkId);
265
+ if (!resolvedChunkId) {
266
+ throw new Error('context chunk missing');
267
+ }
268
+ const chunk = this.chunkMap.get(resolvedChunkId);
241
269
  if (!chunk) {
242
270
  throw new Error('context chunk missing');
243
271
  }
@@ -69,28 +69,153 @@ function toNumber(value) {
69
69
  }
70
70
  return null;
71
71
  }
72
- function extractJsonCandidate(raw) {
72
+ function extractJsonCandidates(raw) {
73
73
  const trimmed = raw.trim();
74
74
  if (!trimmed) {
75
+ return [];
76
+ }
77
+ const candidates = [];
78
+ let start = -1;
79
+ let depth = 0;
80
+ let inString = false;
81
+ let escaped = false;
82
+ for (let index = 0; index < trimmed.length; index += 1) {
83
+ const char = trimmed[index];
84
+ if (inString) {
85
+ if (escaped) {
86
+ escaped = false;
87
+ continue;
88
+ }
89
+ if (char === '\\') {
90
+ escaped = true;
91
+ continue;
92
+ }
93
+ if (char === '"') {
94
+ inString = false;
95
+ }
96
+ continue;
97
+ }
98
+ if (char === '"') {
99
+ inString = true;
100
+ continue;
101
+ }
102
+ if (char === '{') {
103
+ if (depth === 0) {
104
+ start = index;
105
+ }
106
+ depth += 1;
107
+ continue;
108
+ }
109
+ if (char === '}') {
110
+ if (depth > 0) {
111
+ depth -= 1;
112
+ if (depth === 0 && start >= 0) {
113
+ candidates.push(trimmed.slice(start, index + 1));
114
+ start = -1;
115
+ }
116
+ }
117
+ }
118
+ }
119
+ return candidates;
120
+ }
121
+ function normalizePlannerPlan(value) {
122
+ if (!value || typeof value !== 'object') {
123
+ return null;
124
+ }
125
+ const record = value;
126
+ const schemaVersion = record.schema_version;
127
+ const normalizedSchemaVersion = typeof schemaVersion === 'string' ? Number(schemaVersion) : schemaVersion;
128
+ if (normalizedSchemaVersion !== 1) {
129
+ return null;
130
+ }
131
+ if (typeof record.intent !== 'string') {
75
132
  return null;
76
133
  }
77
- if (trimmed.startsWith('{') && trimmed.endsWith('}')) {
78
- return trimmed;
134
+ if (typeof schemaVersion === 'string' && Number.isFinite(normalizedSchemaVersion)) {
135
+ record.schema_version = normalizedSchemaVersion;
136
+ }
137
+ return record;
138
+ }
139
+ function unwrapPlannerPlan(value) {
140
+ const direct = normalizePlannerPlan(value);
141
+ if (direct) {
142
+ return direct;
143
+ }
144
+ if (Array.isArray(value)) {
145
+ for (let index = value.length - 1; index >= 0; index -= 1) {
146
+ const normalized = normalizePlannerPlan(value[index]);
147
+ if (normalized) {
148
+ return normalized;
149
+ }
150
+ }
79
151
  }
80
- const start = trimmed.indexOf('{');
81
- const end = trimmed.lastIndexOf('}');
82
- if (start >= 0 && end > start) {
83
- return trimmed.slice(start, end + 1);
152
+ if (value && typeof value === 'object') {
153
+ const record = value;
154
+ if (record.plan) {
155
+ const normalized = normalizePlannerPlan(record.plan);
156
+ if (normalized) {
157
+ return normalized;
158
+ }
159
+ }
160
+ const plans = record.plans;
161
+ if (Array.isArray(plans)) {
162
+ for (let index = plans.length - 1; index >= 0; index -= 1) {
163
+ const normalized = normalizePlannerPlan(plans[index]);
164
+ if (normalized) {
165
+ return normalized;
166
+ }
167
+ }
168
+ }
84
169
  }
85
170
  return null;
86
171
  }
87
172
  function parsePlannerOutput(raw) {
88
- const candidate = extractJsonCandidate(raw);
89
- if (!candidate) {
90
- throw new Error('plan_parse_error');
173
+ const candidates = extractJsonCandidates(raw);
174
+ for (let index = candidates.length - 1; index >= 0; index -= 1) {
175
+ const candidate = candidates[index];
176
+ try {
177
+ const parsed = JSON.parse(candidate);
178
+ const normalized = unwrapPlannerPlan(parsed);
179
+ if (normalized) {
180
+ return normalized;
181
+ }
182
+ }
183
+ catch {
184
+ // ignore parse errors and try earlier candidates
185
+ }
91
186
  }
92
- const parsed = JSON.parse(candidate);
93
- return parsed;
187
+ if (!candidates.length) {
188
+ try {
189
+ const parsed = JSON.parse(raw.trim());
190
+ const normalized = unwrapPlannerPlan(parsed);
191
+ if (normalized) {
192
+ return normalized;
193
+ }
194
+ }
195
+ catch {
196
+ // fall through
197
+ }
198
+ }
199
+ throw new Error('plan_parse_error');
200
+ }
201
+ function buildPlannerRetryPrompt(prompt, errors) {
202
+ const headerLines = ['Return valid JSON only.'];
203
+ if (errors.includes('final_requires_subcall')) {
204
+ headerLines.push('Do not return intent=final until after at least one subcall.');
205
+ }
206
+ if (errors.length > 0) {
207
+ headerLines.push(`Previous error: ${errors.join('; ')}`);
208
+ }
209
+ return `${headerLines.join(' ')}\n\n${prompt}`;
210
+ }
211
+ async function recordPlannerFailure(params) {
212
+ const raw = params.raw ?? '';
213
+ const plannerDir = join(params.runDir, 'planner');
214
+ await mkdir(plannerDir, { recursive: true });
215
+ const filename = `iteration-${params.iteration}-attempt-${params.attempt + 1}.txt`;
216
+ const header = params.errors.length ? `# errors: ${params.errors.join('; ')}\n` : '';
217
+ const body = raw.length > 0 ? raw : '[empty planner output]';
218
+ await writeFile(join(plannerDir, filename), `${header}${body}`, 'utf8');
94
219
  }
95
220
  function normalizePurpose(raw) {
96
221
  if (typeof raw === 'string' && DEFAULT_ALLOWED_PURPOSES.has(raw)) {
@@ -366,14 +491,19 @@ export async function runSymbolicLoop(options) {
366
491
  const plannerErrors = [];
367
492
  const hasPriorSubcalls = priorSubcalls.length > 0;
368
493
  for (let attempt = 0; attempt < 2; attempt += 1) {
369
- const raw = await options.runPlanner(attempt === 0
370
- ? plannerPrompt
371
- : `Return valid JSON only. Previous error: ${plannerErrors.join('; ')}\n\n${plannerPrompt}`, attempt);
494
+ const raw = await options.runPlanner(attempt === 0 ? plannerPrompt : buildPlannerRetryPrompt(plannerPrompt, plannerErrors), attempt);
372
495
  try {
373
496
  plan = parsePlannerOutput(raw);
374
497
  }
375
498
  catch {
376
499
  plannerErrors.push('plan_parse_error');
500
+ await recordPlannerFailure({
501
+ runDir: options.runDir,
502
+ iteration,
503
+ attempt,
504
+ errors: plannerErrors,
505
+ raw
506
+ });
377
507
  if (attempt === 0) {
378
508
  continue;
379
509
  }
@@ -392,6 +522,13 @@ export async function runSymbolicLoop(options) {
392
522
  }
393
523
  if (validationError) {
394
524
  plannerErrors.push(validationError);
525
+ await recordPlannerFailure({
526
+ runDir: options.runDir,
527
+ iteration,
528
+ attempt,
529
+ errors: plannerErrors,
530
+ raw
531
+ });
395
532
  if (attempt === 0) {
396
533
  continue;
397
534
  }