@kbediako/codex-orchestrator 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -144,7 +144,7 @@ Bundled skills (may vary by release):
144
144
  - `docs-first`
145
145
  - `collab-evals`
146
146
  - `collab-deliberation`
147
- - `delegate-early`
147
+ - `delegate-early` (compatibility alias; use `delegation-usage`)
148
148
 
149
149
  ## DevTools readiness
150
150
 
@@ -1,4 +1,5 @@
1
1
  #!/usr/bin/env node
2
+ import { existsSync } from 'node:fs';
2
3
  import { readFile } from 'node:fs/promises';
3
4
  import { basename, join } from 'node:path';
4
5
  import process from 'node:process';
@@ -339,6 +340,10 @@ async function handleRlm(orchestrator, rawArgs) {
339
340
  }
340
341
  async function handleResume(orchestrator, rawArgs) {
341
342
  const { positionals, flags } = parseArgs(rawArgs);
343
+ if (isHelpRequest(positionals, flags)) {
344
+ printResumeHelp();
345
+ return;
346
+ }
342
347
  const runId = (flags['run'] ?? positionals[0]);
343
348
  if (!runId) {
344
349
  throw new Error('resume requires --run <run-id>.');
@@ -358,6 +363,10 @@ async function handleResume(orchestrator, rawArgs) {
358
363
  }
359
364
  async function handleStatus(orchestrator, rawArgs) {
360
365
  const { positionals, flags } = parseArgs(rawArgs);
366
+ if (isHelpRequest(positionals, flags)) {
367
+ printStatusHelp();
368
+ return;
369
+ }
361
370
  const runId = (flags['run'] ?? positionals[0]);
362
371
  if (!runId) {
363
372
  throw new Error('status requires --run <run-id>.');
@@ -619,7 +628,11 @@ async function handleMcp(rawArgs) {
619
628
  await serveMcp({ repoRoot, dryRun, extraArgs: positionals });
620
629
  }
621
630
  async function handleDelegationServer(rawArgs) {
622
- const { flags } = parseArgs(rawArgs);
631
+ const { positionals, flags } = parseArgs(rawArgs);
632
+ if (isHelpRequest(positionals, flags)) {
633
+ printDelegationServerHelp();
634
+ return;
635
+ }
623
636
  const repoRoot = typeof flags['repo'] === 'string' ? flags['repo'] : process.cwd();
624
637
  const modeFlag = typeof flags['mode'] === 'string' ? flags['mode'] : undefined;
625
638
  const overrideFlag = typeof flags['config'] === 'string'
@@ -748,7 +761,7 @@ function parseExecArgs(rawArgs) {
748
761
  }
749
762
  }
750
763
  return {
751
- commandTokens,
764
+ commandTokens: normalizeExecCommandTokens(commandTokens, cwd),
752
765
  notifyTargets,
753
766
  otelEndpoint,
754
767
  requestedMode,
@@ -757,6 +770,76 @@ function parseExecArgs(rawArgs) {
757
770
  taskId
758
771
  };
759
772
  }
773
+ function normalizeExecCommandTokens(commandTokens, cwd) {
774
+ if (commandTokens.length !== 1) {
775
+ return commandTokens;
776
+ }
777
+ const token = commandTokens[0].trim();
778
+ if (token.length === 0 || !/\s/.test(token) || looksLikeExistingPath(token, cwd)) {
779
+ return commandTokens;
780
+ }
781
+ const parsed = splitShellLikeCommand(token);
782
+ return parsed.length > 0 ? parsed : commandTokens;
783
+ }
784
+ function looksLikeExistingPath(token, cwd) {
785
+ const probes = [token];
786
+ if (cwd) {
787
+ probes.push(join(cwd, token));
788
+ }
789
+ for (const probe of probes) {
790
+ if (existsSync(probe)) {
791
+ return true;
792
+ }
793
+ }
794
+ return false;
795
+ }
796
+ function splitShellLikeCommand(command) {
797
+ const tokens = [];
798
+ let current = '';
799
+ let quote = null;
800
+ for (let i = 0; i < command.length; i += 1) {
801
+ const char = command[i];
802
+ if (char === '\\' && quote !== null) {
803
+ const next = command[i + 1];
804
+ if (next === quote || next === '\\') {
805
+ current += next;
806
+ i += 1;
807
+ continue;
808
+ }
809
+ }
810
+ if (char === '"' || char === "'") {
811
+ if (quote === char) {
812
+ quote = null;
813
+ }
814
+ else if (quote === null) {
815
+ quote = char;
816
+ }
817
+ else {
818
+ current += char;
819
+ }
820
+ continue;
821
+ }
822
+ if (quote === null && /\s/u.test(char)) {
823
+ if (current.length > 0) {
824
+ tokens.push(current);
825
+ current = '';
826
+ }
827
+ continue;
828
+ }
829
+ current += char;
830
+ }
831
+ if (current.length > 0) {
832
+ tokens.push(current);
833
+ }
834
+ return tokens;
835
+ }
836
+ function isHelpRequest(positionals, flags) {
837
+ if (flags['help'] === true) {
838
+ return true;
839
+ }
840
+ const first = positionals[0];
841
+ return first === 'help' || first === '--help' || first === '-h';
842
+ }
760
843
  function printHelp() {
761
844
  console.log(`Usage: codex-orchestrator <command> [options]
762
845
 
@@ -872,3 +955,37 @@ Commands:
872
955
  --format json Emit machine-readable output.
873
956
  `);
874
957
  }
958
+ function printStatusHelp() {
959
+ console.log(`Usage: codex-orchestrator status --run <id> [--watch] [--interval N] [--format json]
960
+
961
+ Options:
962
+ --run <id> Run id to inspect.
963
+ --watch Poll until run reaches a terminal state.
964
+ --interval <sec> Poll interval when --watch is enabled (default 10).
965
+ --format json Emit machine-readable status output.
966
+ `);
967
+ }
968
+ function printResumeHelp() {
969
+ console.log(`Usage: codex-orchestrator resume --run <id> [options]
970
+
971
+ Options:
972
+ --run <id> Run id to resume.
973
+ --token <resume-token> Verify the resume token before restarting.
974
+ --actor <name> Record who resumed the run.
975
+ --reason <text> Record why the run was resumed.
976
+ --target <stage-id> Override stage selection before resuming.
977
+ --format json Emit machine-readable output.
978
+ --interactive | --ui Enable read-only HUD when running in a TTY.
979
+ --no-interactive Force disable HUD.
980
+ `);
981
+ }
982
+ function printDelegationServerHelp() {
983
+ console.log(`Usage: codex-orchestrator delegate-server [options]
984
+
985
+ Options:
986
+ --repo <path> Repo root for config + manifests (default cwd).
987
+ --mode <full|question_only> Limit tool surface for child runs.
988
+ --config "<key>=<value>[;...]" Apply config overrides.
989
+ --help Show this message.
990
+ `);
991
+ }
@@ -54,11 +54,14 @@ function buildTrajectorySummary(frames, fallback) {
54
54
  const terminal = frames[frames.length - 1];
55
55
  if (terminal?.event.type === 'exec:end') {
56
56
  const stdout = terminal.event.payload.stdout?.trim();
57
- if (stdout) {
57
+ if (stdout && !isLowSignalOutput(stdout)) {
58
58
  return stdout.split('\n').slice(0, 2).join(' ');
59
59
  }
60
60
  }
61
- return fallback ?? 'TF-GRPO trajectory summary unavailable.';
61
+ if (fallback && fallback.trim()) {
62
+ return fallback.trim();
63
+ }
64
+ return 'TF-GRPO trajectory summary unavailable.';
62
65
  }
63
66
  function toToolStat(frame) {
64
67
  return {
@@ -75,3 +78,18 @@ function truncateSummary(value, maxWords) {
75
78
  }
76
79
  return tokens.slice(0, maxWords).join(' ');
77
80
  }
81
+ function isLowSignalOutput(stdout) {
82
+ const trimmed = stdout.trim();
83
+ if (!trimmed) {
84
+ return true;
85
+ }
86
+ if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
87
+ return true;
88
+ }
89
+ const firstLine = trimmed.split('\n')[0] ?? '';
90
+ if (/^\{"type":/u.test(firstLine)) {
91
+ return true;
92
+ }
93
+ const words = firstLine.split(/\s+/u).filter(Boolean);
94
+ return words.length < 3;
95
+ }
@@ -112,6 +112,7 @@ export async function persistExperienceRecords(params) {
112
112
  }
113
113
  try {
114
114
  const frames = framesFromToolMetrics(runMetrics.perTool, terminalEvent);
115
+ const reward = deriveExperienceReward(terminalEvent);
115
116
  const trajectory = summarizeTrajectory({
116
117
  runId: params.manifest.run_id,
117
118
  taskId: params.manifest.task_id,
@@ -120,7 +121,8 @@ export async function persistExperienceRecords(params) {
120
121
  domain: promptPack.domain,
121
122
  stampSignature: promptPack.stamp,
122
123
  frames,
123
- baseSummary: params.manifest.summary ?? undefined
124
+ baseSummary: params.manifest.summary ?? undefined,
125
+ reward
124
126
  });
125
127
  const optimized = optimizeExperience(trajectory, params.policy);
126
128
  const manifestPath = relativeToRepo(params.env, params.paths.manifestPath);
@@ -198,3 +200,13 @@ function findTerminalEvent(events) {
198
200
  }
199
201
  return events.length > 0 ? events[events.length - 1] : null;
200
202
  }
203
+ function deriveExperienceReward(event) {
204
+ if (event.type !== 'exec:end') {
205
+ return { gtScore: 0, relativeRank: 0 };
206
+ }
207
+ const succeeded = event.payload.status === 'succeeded' && event.payload.exitCode === 0;
208
+ return {
209
+ gtScore: succeeded ? 1 : 0,
210
+ relativeRank: 0
211
+ };
212
+ }
@@ -30,11 +30,14 @@ import { RunEventStream, attachRunEventAdapter } from './events/runEventStream.j
30
30
  import { CLI_EXECUTION_MODE_PARSER, resolveRequiresCloudPolicy } from '../utils/executionMode.js';
31
31
  import { resolveCodexCliBin } from './utils/codexCli.js';
32
32
  import { CodexCloudTaskExecutor } from '../cloud/CodexCloudTaskExecutor.js';
33
+ import { persistPipelineExperience } from './services/pipelineExperience.js';
33
34
  const resolveBaseEnvironment = () => normalizeEnvironmentPaths(resolveEnvironmentPaths());
34
35
  const CONFIG_OVERRIDE_ENV_KEYS = ['CODEX_CONFIG_OVERRIDES', 'CODEX_MCP_CONFIG_OVERRIDES'];
35
36
  const DEFAULT_CLOUD_POLL_INTERVAL_SECONDS = 10;
36
37
  const DEFAULT_CLOUD_TIMEOUT_SECONDS = 1800;
37
38
  const DEFAULT_CLOUD_ATTEMPTS = 1;
39
+ const MAX_CLOUD_PROMPT_EXPERIENCES = 3;
40
+ const MAX_CLOUD_PROMPT_EXPERIENCE_CHARS = 320;
38
41
  function collectDelegationEnvOverrides(env = process.env) {
39
42
  const layers = [];
40
43
  for (const key of CONFIG_OVERRIDE_ENV_KEYS) {
@@ -70,6 +73,112 @@ function readCloudNumber(raw, fallback) {
70
73
  }
71
74
  return parsed;
72
75
  }
76
+ function readCloudFeatureList(raw) {
77
+ if (!raw) {
78
+ return [];
79
+ }
80
+ const seen = new Set();
81
+ const features = [];
82
+ for (const token of raw.split(/[,\s]+/u)) {
83
+ const feature = token.trim();
84
+ if (!feature || seen.has(feature)) {
85
+ continue;
86
+ }
87
+ seen.add(feature);
88
+ features.push(feature);
89
+ }
90
+ return features;
91
+ }
92
+ function normalizePromptSnippet(value) {
93
+ return value.replace(/\s+/gu, ' ').trim();
94
+ }
95
+ function truncatePromptSnippet(value) {
96
+ if (value.length <= MAX_CLOUD_PROMPT_EXPERIENCE_CHARS) {
97
+ return value;
98
+ }
99
+ return `${value.slice(0, MAX_CLOUD_PROMPT_EXPERIENCE_CHARS - 1).trimEnd()}…`;
100
+ }
101
+ function readPromptPackDomain(value) {
102
+ if (typeof value !== 'string') {
103
+ return null;
104
+ }
105
+ const trimmed = value.trim();
106
+ return trimmed.length > 0 ? trimmed : null;
107
+ }
108
+ function readPromptPackDomainLower(pack) {
109
+ const domain = readPromptPackDomain(pack.domain);
110
+ return domain ? domain.toLowerCase() : null;
111
+ }
112
+ function hasPromptPackExperiences(pack) {
113
+ if (!readPromptPackDomain(pack.domain)) {
114
+ return false;
115
+ }
116
+ return (Array.isArray(pack.experiences) &&
117
+ pack.experiences.some((entry) => typeof entry === 'string' && normalizePromptSnippet(entry).length > 0));
118
+ }
119
+ function selectPromptPackForCloudPrompt(params) {
120
+ const candidates = (params.promptPacks ?? []).filter(hasPromptPackExperiences);
121
+ if (candidates.length === 0) {
122
+ return null;
123
+ }
124
+ const haystack = [
125
+ params.pipeline.id,
126
+ params.pipeline.title,
127
+ (params.pipeline.tags ?? []).join(' '),
128
+ params.target.id,
129
+ params.target.description ?? '',
130
+ params.stage.id,
131
+ params.stage.title
132
+ ]
133
+ .join(' ')
134
+ .toLowerCase();
135
+ const directMatch = candidates.find((pack) => {
136
+ const domainLower = readPromptPackDomainLower(pack);
137
+ return domainLower !== null && domainLower !== 'implementation' && haystack.includes(domainLower);
138
+ });
139
+ if (directMatch) {
140
+ return directMatch;
141
+ }
142
+ const broadDirectMatch = candidates.find((pack) => {
143
+ const domainLower = readPromptPackDomainLower(pack);
144
+ return domainLower !== null && haystack.includes(domainLower);
145
+ });
146
+ if (broadDirectMatch) {
147
+ return broadDirectMatch;
148
+ }
149
+ const implementation = candidates.find((pack) => readPromptPackDomainLower(pack) === 'implementation');
150
+ if (implementation) {
151
+ return implementation;
152
+ }
153
+ return candidates[0] ?? null;
154
+ }
155
+ function buildCloudExperiencePromptLines(params) {
156
+ const selectedPack = selectPromptPackForCloudPrompt({
157
+ promptPacks: params.manifest.prompt_packs,
158
+ pipeline: params.pipeline,
159
+ target: params.target,
160
+ stage: params.stage
161
+ });
162
+ if (!selectedPack || !Array.isArray(selectedPack.experiences)) {
163
+ return [];
164
+ }
165
+ const snippets = selectedPack.experiences
166
+ .filter((entry) => typeof entry === 'string')
167
+ .map((entry) => normalizePromptSnippet(entry))
168
+ .filter((entry) => entry.length > 0)
169
+ .slice(0, MAX_CLOUD_PROMPT_EXPERIENCES)
170
+ .map((entry) => truncatePromptSnippet(entry));
171
+ if (snippets.length === 0) {
172
+ return [];
173
+ }
174
+ const domainLabel = readPromptPackDomain(selectedPack.domain) ?? 'unknown';
175
+ return [
176
+ '',
177
+ 'Relevant prior experiences (hints, not strict instructions):',
178
+ `Domain: ${domainLabel}`,
179
+ ...snippets.map((entry, index) => `${index + 1}. ${entry}`)
180
+ ];
181
+ }
73
182
  function resolveCloudEnvironmentId(task, target, envOverrides) {
74
183
  const metadata = (target.metadata ?? {});
75
184
  const taskMetadata = (task.metadata ?? {});
@@ -641,6 +750,7 @@ export class CodexOrchestrator {
641
750
  await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
642
751
  logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
643
752
  });
753
+ await persistPipelineExperience({ env, pipeline, manifest, paths });
644
754
  await schedulePersist({ force: true });
645
755
  await appendMetricsEntry(env, paths, manifest, persister);
646
756
  return {
@@ -762,12 +872,16 @@ export class CodexOrchestrator {
762
872
  status: targetEntry.status
763
873
  });
764
874
  const executor = new CodexCloudTaskExecutor();
765
- const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage);
875
+ const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage, manifest);
766
876
  const pollIntervalSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_POLL_INTERVAL_SECONDS ?? process.env.CODEX_CLOUD_POLL_INTERVAL_SECONDS, DEFAULT_CLOUD_POLL_INTERVAL_SECONDS);
767
877
  const timeoutSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_TIMEOUT_SECONDS ?? process.env.CODEX_CLOUD_TIMEOUT_SECONDS, DEFAULT_CLOUD_TIMEOUT_SECONDS);
768
878
  const attempts = readCloudNumber(envOverrides?.CODEX_CLOUD_EXEC_ATTEMPTS ?? process.env.CODEX_CLOUD_EXEC_ATTEMPTS, DEFAULT_CLOUD_ATTEMPTS);
769
879
  const branch = readCloudString(envOverrides?.CODEX_CLOUD_BRANCH) ??
770
880
  readCloudString(process.env.CODEX_CLOUD_BRANCH);
881
+ const enableFeatures = readCloudFeatureList(readCloudString(envOverrides?.CODEX_CLOUD_ENABLE_FEATURES) ??
882
+ readCloudString(process.env.CODEX_CLOUD_ENABLE_FEATURES));
883
+ const disableFeatures = readCloudFeatureList(readCloudString(envOverrides?.CODEX_CLOUD_DISABLE_FEATURES) ??
884
+ readCloudString(process.env.CODEX_CLOUD_DISABLE_FEATURES));
771
885
  const codexBin = resolveCodexCliBin({ ...process.env, ...(envOverrides ?? {}) });
772
886
  const cloudResult = await executor.execute({
773
887
  codexBin,
@@ -779,6 +893,8 @@ export class CodexOrchestrator {
779
893
  timeoutSeconds,
780
894
  attempts,
781
895
  branch,
896
+ enableFeatures,
897
+ disableFeatures,
782
898
  env: envOverrides
783
899
  });
784
900
  success = cloudResult.success;
@@ -825,6 +941,7 @@ export class CodexOrchestrator {
825
941
  await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
826
942
  logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
827
943
  });
944
+ await persistPipelineExperience({ env, pipeline, manifest, paths });
828
945
  await schedulePersist({ force: true });
829
946
  await appendMetricsEntry(env, paths, manifest, persister);
830
947
  return {
@@ -851,7 +968,7 @@ export class CodexOrchestrator {
851
968
  }
852
969
  return null;
853
970
  }
854
- buildCloudPrompt(task, target, pipeline, stage) {
971
+ buildCloudPrompt(task, target, pipeline, stage, manifest) {
855
972
  const lines = [
856
973
  `Task ID: ${task.id}`,
857
974
  `Task title: ${task.title}`,
@@ -861,6 +978,7 @@ export class CodexOrchestrator {
861
978
  '',
862
979
  'Apply the required repository changes for this target stage and produce a diff.'
863
980
  ].filter((line) => Boolean(line));
981
+ lines.push(...buildCloudExperiencePromptLines({ manifest, pipeline, target, stage }));
864
982
  return lines.join('\n');
865
983
  }
866
984
  async performRunLifecycle(context) {