npm - @kbediako/codex-orchestrator - Versions diffs - 0.1.15 → 0.1.17 - Mend

@kbediako/codex-orchestrator 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +6 -1
package/dist/bin/codex-orchestrator.js +119 -2
package/dist/orchestrator/src/cli/delegationServer.js +4 -1
package/dist/orchestrator/src/cli/exec/experience.js +20 -2
package/dist/orchestrator/src/cli/exec/tfgrpo.js +13 -1
package/dist/orchestrator/src/cli/orchestrator.js +120 -2
package/dist/orchestrator/src/cli/rlm/symbolic.js +439 -38
package/dist/orchestrator/src/cli/rlmRunner.js +235 -4
package/dist/orchestrator/src/cli/run/manifest.js +200 -4
package/dist/orchestrator/src/cli/services/commandRunner.js +1 -0
package/dist/orchestrator/src/cli/services/pipelineExperience.js +122 -0
package/dist/orchestrator/src/cli/utils/delegationGuardRunner.js +80 -0
package/dist/orchestrator/src/cloud/CodexCloudTaskExecutor.js +25 -0
package/docs/README.md +4 -1
package/package.json +2 -1
package/skills/collab-deliberation/SKILL.md +72 -8
package/skills/delegate-early/SKILL.md +13 -39
package/skills/delegation-usage/DELEGATION_GUIDE.md +7 -4
package/skills/delegation-usage/SKILL.md +17 -6
package/templates/codex/AGENTS.md +30 -1

package/README.md CHANGED Viewed

@@ -80,6 +80,11 @@ codex -c 'mcp_servers.delegation.enabled=true' ...
 ```
 `delegate-server` is the canonical name; `delegation-server` is supported as an alias (older docs may use it).
+Delegation guard profile:
+- `CODEX_ORCHESTRATOR_GUARD_PROFILE=auto` (default): strict in CO-style repos, warn in lightweight repos.
+- Set `CODEX_ORCHESTRATOR_GUARD_PROFILE=warn` for ad-hoc/no-task-id runs.
+- Set `CODEX_ORCHESTRATOR_GUARD_PROFILE=strict` to enforce full delegation evidence checks.
 ## Delegation + RLM flow
 RLM (Recursive Language Model) is the long-horizon loop used by the `rlm` pipeline (`codex-orchestrator rlm "<goal>"` or `codex-orchestrator start rlm --goal "<goal>"`). Delegated runs only enter RLM when the child is launched with the `rlm` pipeline (or the rlm runner directly). In auto mode it resolves to symbolic when delegated, when `RLM_CONTEXT_PATH` is set, or when the context exceeds `RLM_SYMBOLIC_MIN_BYTES`; otherwise it stays iterative. The runner writes state to `.runs/<task-id>/cli/<run-id>/rlm/state.json` and stops when the validator passes or budgets are exhausted.
@@ -139,7 +144,7 @@ Bundled skills (may vary by release):
 - `docs-first`
 - `collab-evals`
 - `collab-deliberation`
-- `delegate-early`
+- `delegate-early` (compatibility alias; use `delegation-usage`)
 ## DevTools readiness

package/dist/bin/codex-orchestrator.js CHANGED Viewed

@@ -1,4 +1,5 @@
 #!/usr/bin/env node
+import { existsSync } from 'node:fs';
 import { readFile } from 'node:fs/promises';
 import { basename, join } from 'node:path';
 import process from 'node:process';
@@ -339,6 +340,10 @@ async function handleRlm(orchestrator, rawArgs) {
 }
 async function handleResume(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
+    if (isHelpRequest(positionals, flags)) {
+        printResumeHelp();
+        return;
+    }
     const runId = (flags['run'] ?? positionals[0]);
     if (!runId) {
         throw new Error('resume requires --run <run-id>.');
@@ -358,6 +363,10 @@ async function handleResume(orchestrator, rawArgs) {
 }
 async function handleStatus(orchestrator, rawArgs) {
     const { positionals, flags } = parseArgs(rawArgs);
+    if (isHelpRequest(positionals, flags)) {
+        printStatusHelp();
+        return;
+    }
     const runId = (flags['run'] ?? positionals[0]);
     if (!runId) {
         throw new Error('status requires --run <run-id>.');
@@ -619,7 +628,11 @@ async function handleMcp(rawArgs) {
     await serveMcp({ repoRoot, dryRun, extraArgs: positionals });
 }
 async function handleDelegationServer(rawArgs) {
-    const { flags } = parseArgs(rawArgs);
+    const { positionals, flags } = parseArgs(rawArgs);
+    if (isHelpRequest(positionals, flags)) {
+        printDelegationServerHelp();
+        return;
+    }
     const repoRoot = typeof flags['repo'] === 'string' ? flags['repo'] : process.cwd();
     const modeFlag = typeof flags['mode'] === 'string' ? flags['mode'] : undefined;
     const overrideFlag = typeof flags['config'] === 'string'
@@ -748,7 +761,7 @@ function parseExecArgs(rawArgs) {
         }
     }
     return {
-        commandTokens,
+        commandTokens: normalizeExecCommandTokens(commandTokens, cwd),
         notifyTargets,
         otelEndpoint,
         requestedMode,
@@ -757,6 +770,76 @@ function parseExecArgs(rawArgs) {
         taskId
     };
 }
+function normalizeExecCommandTokens(commandTokens, cwd) {
+    if (commandTokens.length !== 1) {
+        return commandTokens;
+    }
+    const token = commandTokens[0].trim();
+    if (token.length === 0 || !/\s/.test(token) || looksLikeExistingPath(token, cwd)) {
+        return commandTokens;
+    }
+    const parsed = splitShellLikeCommand(token);
+    return parsed.length > 0 ? parsed : commandTokens;
+}
+function looksLikeExistingPath(token, cwd) {
+    const probes = [token];
+    if (cwd) {
+        probes.push(join(cwd, token));
+    }
+    for (const probe of probes) {
+        if (existsSync(probe)) {
+            return true;
+        }
+    }
+    return false;
+}
+function splitShellLikeCommand(command) {
+    const tokens = [];
+    let current = '';
+    let quote = null;
+    for (let i = 0; i < command.length; i += 1) {
+        const char = command[i];
+        if (char === '\\' && quote !== null) {
+            const next = command[i + 1];
+            if (next === quote || next === '\\') {
+                current += next;
+                i += 1;
+                continue;
+            }
+        }
+        if (char === '"' || char === "'") {
+            if (quote === char) {
+                quote = null;
+            }
+            else if (quote === null) {
+                quote = char;
+            }
+            else {
+                current += char;
+            }
+            continue;
+        }
+        if (quote === null && /\s/u.test(char)) {
+            if (current.length > 0) {
+                tokens.push(current);
+                current = '';
+            }
+            continue;
+        }
+        current += char;
+    }
+    if (current.length > 0) {
+        tokens.push(current);
+    }
+    return tokens;
+}
+function isHelpRequest(positionals, flags) {
+    if (flags['help'] === true) {
+        return true;
+    }
+    const first = positionals[0];
+    return first === 'help' || first === '--help' || first === '-h';
+}
 function printHelp() {
     console.log(`Usage: codex-orchestrator <command> [options]
@@ -872,3 +955,37 @@ Commands:
     --format json           Emit machine-readable output.
 `);
 }
+function printStatusHelp() {
+    console.log(`Usage: codex-orchestrator status --run <id> [--watch] [--interval N] [--format json]
+Options:
+  --run <id>         Run id to inspect.
+  --watch            Poll until run reaches a terminal state.
+  --interval <sec>   Poll interval when --watch is enabled (default 10).
+  --format json      Emit machine-readable status output.
+`);
+}
+function printResumeHelp() {
+    console.log(`Usage: codex-orchestrator resume --run <id> [options]
+Options:
+  --run <id>            Run id to resume.
+  --token <resume-token>  Verify the resume token before restarting.
+  --actor <name>        Record who resumed the run.
+  --reason <text>       Record why the run was resumed.
+  --target <stage-id>   Override stage selection before resuming.
+  --format json         Emit machine-readable output.
+  --interactive | --ui  Enable read-only HUD when running in a TTY.
+  --no-interactive      Force disable HUD.
+`);
+}
+function printDelegationServerHelp() {
+    console.log(`Usage: codex-orchestrator delegate-server [options]
+Options:
+  --repo <path>                    Repo root for config + manifests (default cwd).
+  --mode <full|question_only>      Limit tool surface for child runs.
+  --config "<key>=<value>[;...]"   Apply config overrides.
+  --help                           Show this message.
+`);
+}

package/dist/orchestrator/src/cli/delegationServer.js CHANGED Viewed

@@ -40,6 +40,7 @@ const CONFIRMATION_ERROR_CODES = new Set([
     'nonce_already_consumed'
 ]);
 const TOOL_PROFILE_ENTRY_PATTERN = /^[A-Za-z0-9][A-Za-z0-9_-]*$/;
+const TERMINAL_RUN_STATUSES = new Set(['succeeded', 'failed', 'cancelled', 'canceled']);
 export async function startDelegationServer(options) {
     const repoRoot = resolve(options.repoRoot);
     const configFiles = await loadDelegationConfigFiles({ repoRoot });
@@ -272,7 +273,9 @@ async function handleDelegateStatus(input, allowedRoots, allowedHosts) {
     const raw = await readFile(manifestPath, 'utf8');
     const manifest = JSON.parse(raw);
     const eventsPath = resolve(dirname(manifestPath), 'events.jsonl');
-    await assertControlEndpoint(manifestPath, allowedHosts);
+    if (!TERMINAL_RUN_STATUSES.has(manifest.status)) {
+        await assertControlEndpoint(manifestPath, allowedHosts);
+    }
     return {
         run_id: manifest.run_id,
         task_id: manifest.task_id,

package/dist/orchestrator/src/cli/exec/experience.js CHANGED Viewed

@@ -54,11 +54,14 @@ function buildTrajectorySummary(frames, fallback) {
     const terminal = frames[frames.length - 1];
     if (terminal?.event.type === 'exec:end') {
         const stdout = terminal.event.payload.stdout?.trim();
-        if (stdout) {
+        if (stdout && !isLowSignalOutput(stdout)) {
             return stdout.split('\n').slice(0, 2).join(' ');
         }
     }
-    return fallback ?? 'TF-GRPO trajectory summary unavailable.';
+    if (fallback && fallback.trim()) {
+        return fallback.trim();
+    }
+    return 'TF-GRPO trajectory summary unavailable.';
 }
 function toToolStat(frame) {
     return {
@@ -75,3 +78,18 @@ function truncateSummary(value, maxWords) {
     }
     return tokens.slice(0, maxWords).join(' ');
 }
+function isLowSignalOutput(stdout) {
+    const trimmed = stdout.trim();
+    if (!trimmed) {
+        return true;
+    }
+    if (trimmed.startsWith('{') || trimmed.startsWith('[')) {
+        return true;
+    }
+    const firstLine = trimmed.split('\n')[0] ?? '';
+    if (/^\{"type":/u.test(firstLine)) {
+        return true;
+    }
+    const words = firstLine.split(/\s+/u).filter(Boolean);
+    return words.length < 3;
+}

package/dist/orchestrator/src/cli/exec/tfgrpo.js CHANGED Viewed

@@ -112,6 +112,7 @@ export async function persistExperienceRecords(params) {
     }
     try {
         const frames = framesFromToolMetrics(runMetrics.perTool, terminalEvent);
+        const reward = deriveExperienceReward(terminalEvent);
         const trajectory = summarizeTrajectory({
             runId: params.manifest.run_id,
             taskId: params.manifest.task_id,
@@ -120,7 +121,8 @@ export async function persistExperienceRecords(params) {
             domain: promptPack.domain,
             stampSignature: promptPack.stamp,
             frames,
-            baseSummary: params.manifest.summary ?? undefined
+            baseSummary: params.manifest.summary ?? undefined,
+            reward
         });
         const optimized = optimizeExperience(trajectory, params.policy);
         const manifestPath = relativeToRepo(params.env, params.paths.manifestPath);
@@ -198,3 +200,13 @@ function findTerminalEvent(events) {
     }
     return events.length > 0 ? events[events.length - 1] : null;
 }
+function deriveExperienceReward(event) {
+    if (event.type !== 'exec:end') {
+        return { gtScore: 0, relativeRank: 0 };
+    }
+    const succeeded = event.payload.status === 'succeeded' && event.payload.exitCode === 0;
+    return {
+        gtScore: succeeded ? 1 : 0,
+        relativeRank: 0
+    };
+}

package/dist/orchestrator/src/cli/orchestrator.js CHANGED Viewed

@@ -30,11 +30,14 @@ import { RunEventStream, attachRunEventAdapter } from './events/runEventStream.j
 import { CLI_EXECUTION_MODE_PARSER, resolveRequiresCloudPolicy } from '../utils/executionMode.js';
 import { resolveCodexCliBin } from './utils/codexCli.js';
 import { CodexCloudTaskExecutor } from '../cloud/CodexCloudTaskExecutor.js';
+import { persistPipelineExperience } from './services/pipelineExperience.js';
 const resolveBaseEnvironment = () => normalizeEnvironmentPaths(resolveEnvironmentPaths());
 const CONFIG_OVERRIDE_ENV_KEYS = ['CODEX_CONFIG_OVERRIDES', 'CODEX_MCP_CONFIG_OVERRIDES'];
 const DEFAULT_CLOUD_POLL_INTERVAL_SECONDS = 10;
 const DEFAULT_CLOUD_TIMEOUT_SECONDS = 1800;
 const DEFAULT_CLOUD_ATTEMPTS = 1;
+const MAX_CLOUD_PROMPT_EXPERIENCES = 3;
+const MAX_CLOUD_PROMPT_EXPERIENCE_CHARS = 320;
 function collectDelegationEnvOverrides(env = process.env) {
     const layers = [];
     for (const key of CONFIG_OVERRIDE_ENV_KEYS) {
@@ -70,6 +73,112 @@ function readCloudNumber(raw, fallback) {
     }
     return parsed;
 }
+function readCloudFeatureList(raw) {
+    if (!raw) {
+        return [];
+    }
+    const seen = new Set();
+    const features = [];
+    for (const token of raw.split(/[,\s]+/u)) {
+        const feature = token.trim();
+        if (!feature || seen.has(feature)) {
+            continue;
+        }
+        seen.add(feature);
+        features.push(feature);
+    }
+    return features;
+}
+function normalizePromptSnippet(value) {
+    return value.replace(/\s+/gu, ' ').trim();
+}
+function truncatePromptSnippet(value) {
+    if (value.length <= MAX_CLOUD_PROMPT_EXPERIENCE_CHARS) {
+        return value;
+    }
+    return `${value.slice(0, MAX_CLOUD_PROMPT_EXPERIENCE_CHARS - 1).trimEnd()}…`;
+}
+function readPromptPackDomain(value) {
+    if (typeof value !== 'string') {
+        return null;
+    }
+    const trimmed = value.trim();
+    return trimmed.length > 0 ? trimmed : null;
+}
+function readPromptPackDomainLower(pack) {
+    const domain = readPromptPackDomain(pack.domain);
+    return domain ? domain.toLowerCase() : null;
+}
+function hasPromptPackExperiences(pack) {
+    if (!readPromptPackDomain(pack.domain)) {
+        return false;
+    }
+    return (Array.isArray(pack.experiences) &&
+        pack.experiences.some((entry) => typeof entry === 'string' && normalizePromptSnippet(entry).length > 0));
+}
+function selectPromptPackForCloudPrompt(params) {
+    const candidates = (params.promptPacks ?? []).filter(hasPromptPackExperiences);
+    if (candidates.length === 0) {
+        return null;
+    }
+    const haystack = [
+        params.pipeline.id,
+        params.pipeline.title,
+        (params.pipeline.tags ?? []).join(' '),
+        params.target.id,
+        params.target.description ?? '',
+        params.stage.id,
+        params.stage.title
+    ]
+        .join(' ')
+        .toLowerCase();
+    const directMatch = candidates.find((pack) => {
+        const domainLower = readPromptPackDomainLower(pack);
+        return domainLower !== null && domainLower !== 'implementation' && haystack.includes(domainLower);
+    });
+    if (directMatch) {
+        return directMatch;
+    }
+    const broadDirectMatch = candidates.find((pack) => {
+        const domainLower = readPromptPackDomainLower(pack);
+        return domainLower !== null && haystack.includes(domainLower);
+    });
+    if (broadDirectMatch) {
+        return broadDirectMatch;
+    }
+    const implementation = candidates.find((pack) => readPromptPackDomainLower(pack) === 'implementation');
+    if (implementation) {
+        return implementation;
+    }
+    return candidates[0] ?? null;
+}
+function buildCloudExperiencePromptLines(params) {
+    const selectedPack = selectPromptPackForCloudPrompt({
+        promptPacks: params.manifest.prompt_packs,
+        pipeline: params.pipeline,
+        target: params.target,
+        stage: params.stage
+    });
+    if (!selectedPack || !Array.isArray(selectedPack.experiences)) {
+        return [];
+    }
+    const snippets = selectedPack.experiences
+        .filter((entry) => typeof entry === 'string')
+        .map((entry) => normalizePromptSnippet(entry))
+        .filter((entry) => entry.length > 0)
+        .slice(0, MAX_CLOUD_PROMPT_EXPERIENCES)
+        .map((entry) => truncatePromptSnippet(entry));
+    if (snippets.length === 0) {
+        return [];
+    }
+    const domainLabel = readPromptPackDomain(selectedPack.domain) ?? 'unknown';
+    return [
+        '',
+        'Relevant prior experiences (hints, not strict instructions):',
+        `Domain: ${domainLabel}`,
+        ...snippets.map((entry, index) => `${index + 1}. ${entry}`)
+    ];
+}
 function resolveCloudEnvironmentId(task, target, envOverrides) {
     const metadata = (target.metadata ?? {});
     const taskMetadata = (task.metadata ?? {});
@@ -641,6 +750,7 @@ export class CodexOrchestrator {
         await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
             logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
         });
+        await persistPipelineExperience({ env, pipeline, manifest, paths });
         await schedulePersist({ force: true });
         await appendMetricsEntry(env, paths, manifest, persister);
         return {
@@ -762,12 +872,16 @@ export class CodexOrchestrator {
                         status: targetEntry.status
                     });
                     const executor = new CodexCloudTaskExecutor();
-                    const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage);
+                    const prompt = this.buildCloudPrompt(task, target, pipeline, targetStage, manifest);
                     const pollIntervalSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_POLL_INTERVAL_SECONDS ?? process.env.CODEX_CLOUD_POLL_INTERVAL_SECONDS, DEFAULT_CLOUD_POLL_INTERVAL_SECONDS);
                     const timeoutSeconds = readCloudNumber(envOverrides?.CODEX_CLOUD_TIMEOUT_SECONDS ?? process.env.CODEX_CLOUD_TIMEOUT_SECONDS, DEFAULT_CLOUD_TIMEOUT_SECONDS);
                     const attempts = readCloudNumber(envOverrides?.CODEX_CLOUD_EXEC_ATTEMPTS ?? process.env.CODEX_CLOUD_EXEC_ATTEMPTS, DEFAULT_CLOUD_ATTEMPTS);
                     const branch = readCloudString(envOverrides?.CODEX_CLOUD_BRANCH) ??
                         readCloudString(process.env.CODEX_CLOUD_BRANCH);
+                    const enableFeatures = readCloudFeatureList(readCloudString(envOverrides?.CODEX_CLOUD_ENABLE_FEATURES) ??
+                        readCloudString(process.env.CODEX_CLOUD_ENABLE_FEATURES));
+                    const disableFeatures = readCloudFeatureList(readCloudString(envOverrides?.CODEX_CLOUD_DISABLE_FEATURES) ??
+                        readCloudString(process.env.CODEX_CLOUD_DISABLE_FEATURES));
                     const codexBin = resolveCodexCliBin({ ...process.env, ...(envOverrides ?? {}) });
                     const cloudResult = await executor.execute({
                         codexBin,
@@ -779,6 +893,8 @@ export class CodexOrchestrator {
                         timeoutSeconds,
                         attempts,
                         branch,
+                        enableFeatures,
+                        disableFeatures,
                         env: envOverrides
                     });
                     success = cloudResult.success;
@@ -825,6 +941,7 @@ export class CodexOrchestrator {
         await schedulePersist({ manifest: true, heartbeat: true, force: true }).catch((error) => {
             logger.warn(`Heartbeat update failed for run ${manifest.run_id}: ${error?.message ?? String(error)}`);
         });
+        await persistPipelineExperience({ env, pipeline, manifest, paths });
         await schedulePersist({ force: true });
         await appendMetricsEntry(env, paths, manifest, persister);
         return {
@@ -851,7 +968,7 @@ export class CodexOrchestrator {
         }
         return null;
     }
-    buildCloudPrompt(task, target, pipeline, stage) {
+    buildCloudPrompt(task, target, pipeline, stage, manifest) {
         const lines = [
             `Task ID: ${task.id}`,
             `Task title: ${task.title}`,
@@ -861,6 +978,7 @@ export class CodexOrchestrator {
             '',
             'Apply the required repository changes for this target stage and produce a diff.'
         ].filter((line) => Boolean(line));
+        lines.push(...buildCloudExperiencePromptLines({ manifest, pipeline, target, stage }));
         return lines.join('\n');
     }
     async performRunLifecycle(context) {