npm - @a5c-ai/babysitter-omp - Versions diffs - 0.1.0 - Mend

@a5c-ai/babysitter-omp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/README.md +80 -0
package/bin/cli.cjs +78 -0
package/bin/install.cjs +144 -0
package/bin/uninstall.cjs +40 -0
package/commands/babysitter-call.md +12 -0
package/commands/babysitter-doctor.md +10 -0
package/commands/babysitter-resume.md +16 -0
package/commands/babysitter-status.md +15 -0
package/extensions/babysitter/cli-wrapper.ts +95 -0
package/extensions/babysitter/constants.ts +77 -0
package/extensions/babysitter/custom-tools.ts +208 -0
package/extensions/babysitter/effect-executor.ts +362 -0
package/extensions/babysitter/guards.ts +257 -0
package/extensions/babysitter/index.ts +554 -0
package/extensions/babysitter/loop-driver.ts +256 -0
package/extensions/babysitter/result-poster.ts +115 -0
package/extensions/babysitter/sdk-bridge.ts +243 -0
package/extensions/babysitter/session-binder.ts +284 -0
package/extensions/babysitter/status-line.ts +54 -0
package/extensions/babysitter/task-interceptor.ts +82 -0
package/extensions/babysitter/todo-replacement.ts +125 -0
package/extensions/babysitter/tool-renderer.ts +263 -0
package/extensions/babysitter/tui-widgets.ts +164 -0
package/extensions/babysitter/types.ts +222 -0
package/package.json +57 -0
package/scripts/setup.sh +74 -0
package/scripts/sync-command-docs.cjs +115 -0
package/skills/babysitter/SKILL.md +45 -0

package/extensions/babysitter/loop-driver.ts ADDED Viewed

@@ -0,0 +1,256 @@
+/**
+ * The orchestration loop driver.
+ *
+ * On every `agent_end` event the loop driver decides whether to continue
+ * iterating: it checks guards, invokes `orchestrateIteration` via the SDK
+ * bridge, maps pending effects to a continuation prompt, and schedules a
+ * follow-up turn via `session.followUp()` if there is more work to do.
+ *
+ * This module uses the babysitter SDK directly -- no CLI subprocesses are
+ * spawned, no JSON is scraped from stdout.  Everything runs in-process,
+ * which is marginally less depressing than the alternative.
+ *
+ * @module loop-driver
+ */
+import type { ExtensionAPI } from './types.js';
+import type { IterationResult, EffectAction } from '@a5c-ai/babysitter-sdk';
+import { iterate, postResult, getRunStatus } from './sdk-bridge.js';
+import { getActiveRun, setActiveRun, clearActiveRun } from './session-binder.js';
+import { checkGuards, recordIterationDigest, resetDigests, recordIterationOutcome, recordPendingCount } from './guards.js';
+// ---------------------------------------------------------------------------
+// Promise-tag extraction
+// ---------------------------------------------------------------------------
+/**
+ * Extract a `<promise>...</promise>` completion proof tag from agent output.
+ *
+ * The babysitter convention is that an agent signals "I'm done" by emitting
+ * a `<promise>` tag.  If present, the loop driver treats the run as
+ * completed and stops iterating.
+ *
+ * @param text - The raw agent output text to scan.
+ * @returns The captured promise string, or `null` if no tag was found.
+ */
+export function extractPromiseTag(text: string): string | null {
+  const match = /<promise>([^<]+)<\/promise>/.exec(text);
+  return match ? match[1] : null;
+}
+// ---------------------------------------------------------------------------
+// Continuation prompt builder
+// ---------------------------------------------------------------------------
+/**
+ * Build the continuation prompt injected into the conversation to keep the
+ * orchestration loop alive.
+ *
+ * The prompt tells the agent which effects are pending, what kind of work
+ * each one requires, and reminds it to post results when done.
+ *
+ * @param iterationResult - The SDK iteration result containing nextActions.
+ * @param runState        - The current run state snapshot.
+ * @returns A prompt string for the next agent turn.
+ */
+export function buildContinuationPrompt(
+  iterationResult: Extract<IterationResult, { status: 'waiting' }>,
+  runState: { runId: string; iteration: number },
+): string {
+  const actions = iterationResult.nextActions;
+  const header =
+    `[babysitter] Iteration ${runState.iteration} | Run ${runState.runId} | ` +
+    `Continue orchestration`;
+  if (actions.length === 0) {
+    return `${header}\n\nNo pending effects. Waiting for external resolution.`;
+  }
+  const effectLines = actions.map((action: EffectAction, idx: number) => {
+    const title =
+      action.taskDef?.title ?? action.label ?? `(effect ${action.effectId})`;
+    return `  ${idx + 1}. [${action.kind}] ${title}  (effectId: ${action.effectId})`;
+  });
+  const instructionsByKind: Record<string, string> = {
+    node: 'Execute the Node.js task and capture its output.',
+    shell: 'Run the shell command and capture stdout/stderr.',
+    agent: 'Delegate to a sub-agent and collect its response.',
+    breakpoint: 'This is a human approval gate. Approve or reject to continue.',
+    sleep: 'Wait for the specified duration, then post an OK result.',
+    orchestrator_task: 'Delegate to the orchestrator sub-process.',
+    skill: 'Invoke the named skill and return its result.',
+  };
+  const uniqueKinds = [...new Set(actions.map((a: EffectAction) => a.kind))];
+  const instructions = uniqueKinds
+    .map((kind) => {
+      const instruction = instructionsByKind[kind] ?? `Handle the "${kind}" effect.`;
+      return `  - ${kind}: ${instruction}`;
+    })
+    .join('\n');
+  return [
+    header,
+    '',
+    `Pending effects (${actions.length}):`,
+    ...effectLines,
+    '',
+    'Instructions by effect kind:',
+    instructions,
+    '',
+    'Execute the effects, post results, then stop.',
+  ].join('\n');
+}
+// ---------------------------------------------------------------------------
+// agent_end handler
+// ---------------------------------------------------------------------------
+/**
+ * Handle the `agent_end` event from oh-my-pi.
+ *
+ * This is the core orchestration loop driver.  When the LLM finishes a
+ * turn, we:
+ *
+ *   1. Look up the active babysitter run for the current session.
+ *   2. Check for a `<promise>` completion proof in the agent output.
+ *   3. Run guard checks (max iterations, time limits, doom-loop detection).
+ *   4. Call `orchestrateIteration` via the SDK bridge.
+ *   5. Based on the result, either clean up (completed/failed) or inject
+ *      a follow-up prompt to continue the loop (waiting).
+ *
+ * @param event - The `agent_end` event payload from oh-my-pi.
+ * @param pi    - The oh-my-pi {@link ExtensionAPI} handle.
+ */
+export async function onAgentEnd(
+  event: {
+    sessionId?: string;
+    output?: string;
+    text?: string;
+  },
+  pi: ExtensionAPI,
+): Promise<void> {
+  const sessionId = event.sessionId ?? 'default';
+  // 1. Look up active run
+  const run = getActiveRun(sessionId);
+  if (!run) {
+    return; // No active babysitter run -- nothing to do.
+  }
+  // 2. Extract agent output and check for completion proof
+  const agentOutput = event.output ?? event.text ?? '';
+  const promise = extractPromiseTag(agentOutput);
+  if (promise) {
+    // The agent declared itself done.  Verify the promise and wrap up.
+    pi.appendEntry({
+      type: 'info',
+      content: `[babysitter] Completion proof received: "${promise}". Finalising run ${run.runId}.`,
+    });
+    clearActiveRun(sessionId);
+    resetDigests();
+    return;
+  }
+  // 3. Guard checks
+  const guardResult = checkGuards(run);
+  if (!guardResult.passed) {
+    pi.appendEntry({
+      type: 'warning',
+      content: `[babysitter] Guard tripped: ${guardResult.reason}. Stopping run ${run.runId}.`,
+    });
+    clearActiveRun(sessionId);
+    resetDigests();
+    return;
+  }
+  // 4. Run SDK orchestration iteration
+  let iterResult: IterationResult;
+  const iterStart = Date.now();
+  try {
+    iterResult = await iterate(run.runDir);
+  } catch (err: unknown) {
+    const errMsg = err instanceof Error ? err.message : String(err);
+    pi.appendEntry({
+      type: 'error',
+      content: `[babysitter] Iteration failed for run ${run.runId}: ${errMsg}`,
+    });
+    // Update state to reflect the error but don't kill the run on one failure
+    recordIterationOutcome(false);
+    run.iteration += 1;
+    run.iterationTimes.push(Date.now() - iterStart);
+    setActiveRun(run);
+    return;
+  }
+  // Record timing and increment iteration
+  recordIterationOutcome(true);
+  run.iteration += 1;
+  run.iterationTimes.push(Date.now() - iterStart);
+  // Record digest and pending count for doom-loop detection
+  if (iterResult.status === 'waiting') {
+    recordIterationDigest(
+      JSON.stringify(iterResult.nextActions.map((a: EffectAction) => a.effectId)),
+    );
+    recordPendingCount(iterResult.nextActions.length);
+  }
+  // 5. Handle the result
+  switch (iterResult.status) {
+    case 'completed': {
+      pi.appendEntry({
+        type: 'info',
+        content: `[babysitter] Run ${run.runId} completed successfully after ${run.iteration} iteration(s).`,
+      });
+      run.status = 'completed';
+      setActiveRun(run);
+      clearActiveRun(sessionId);
+      resetDigests();
+      return;
+    }
+    case 'failed': {
+      const failErr =
+        iterResult.error instanceof Error
+          ? iterResult.error.message
+          : String(iterResult.error ?? 'unknown error');
+      pi.appendEntry({
+        type: 'error',
+        content: `[babysitter] Run ${run.runId} failed: ${failErr}`,
+      });
+      run.status = 'failed';
+      setActiveRun(run);
+      clearActiveRun(sessionId);
+      resetDigests();
+      return;
+    }
+    case 'waiting': {
+      run.status = 'running';
+      setActiveRun(run);
+      const prompt = buildContinuationPrompt(iterResult, {
+        runId: run.runId,
+        iteration: run.iteration,
+      });
+      pi.sendUserMessage({ role: 'user', content: prompt });
+      return;
+    }
+    default: {
+      // Exhaustiveness guard -- should never happen, but the universe
+      // has a talent for producing things that should never happen.
+      const _exhaustive: never = iterResult;
+      pi.appendEntry({
+        type: 'warning',
+        content: `[babysitter] Unexpected iteration status. This should not happen.`,
+      });
+      void _exhaustive;
+      return;
+    }
+  }
+}

package/extensions/babysitter/result-poster.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Posts task results back to the babysitter runtime.
+ *
+ * After an effect has been executed (by the effect-executor or externally)
+ * the result must be committed to the run's journal so the next iteration
+ * can replay it.  This module calls `commitEffectResult` from the SDK
+ * directly — no CLI subprocess, no JSON scraping, no carrier pigeons.
+ *
+ * @module result-poster
+ */
+import {
+  commitEffectResult,
+  type CommitEffectResultOptions,
+  type CommitEffectResultArtifacts,
+} from '@a5c-ai/babysitter-sdk';
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+/** Options accepted by {@link postResult}. */
+export interface PostResultOptions {
+  /** Absolute path to the run directory. */
+  runDir: string;
+  /** The effect identifier to resolve. */
+  effectId: string;
+  /** Whether the task succeeded or failed. */
+  status: 'ok' | 'error';
+  /** The task's return value (when status is 'ok'). */
+  value?: unknown;
+  /** Error payload (when status is 'error'). */
+  error?: unknown;
+  /** Optional stdout captured during execution. */
+  stdout?: string;
+  /** Optional stderr captured during execution. */
+  stderr?: string;
+  /** ISO-8601 timestamp when the task started executing. */
+  startedAt?: string;
+  /** ISO-8601 timestamp when the task finished executing. */
+  finishedAt?: string;
+}
+/** Artifacts returned after a result has been committed. */
+export type PostResultArtifacts = CommitEffectResultArtifacts;
+// ---------------------------------------------------------------------------
+// postResult
+// ---------------------------------------------------------------------------
+/**
+ * Commit an effect result directly via the SDK.
+ *
+ * Translates the extension's {@link PostResultOptions} into a
+ * {@link CommitEffectResultOptions} and delegates to the SDK's
+ * `commitEffectResult`.  The returned artifacts include the persisted
+ * `resultRef` and optional `stdoutRef` / `stderrRef` paths.
+ */
+export async function postResult(
+  opts: PostResultOptions,
+): Promise<PostResultArtifacts> {
+  const sdkOpts: CommitEffectResultOptions = {
+    runDir: opts.runDir,
+    effectId: opts.effectId,
+    result: {
+      status: opts.status,
+      value: opts.status === 'ok' ? opts.value : undefined,
+      error: opts.status === 'error' ? opts.error : undefined,
+      stdout: opts.stdout,
+      stderr: opts.stderr,
+      startedAt: opts.startedAt,
+      finishedAt: opts.finishedAt,
+    },
+  };
+  return await commitEffectResult(sdkOpts);
+}
+// ---------------------------------------------------------------------------
+// postOkResult
+// ---------------------------------------------------------------------------
+/**
+ * Convenience wrapper for posting a successful result.
+ *
+ * @param runDir   - Absolute path to the run directory.
+ * @param effectId - The effect identifier to resolve.
+ * @param value    - The task's return value.
+ */
+export async function postOkResult(
+  runDir: string,
+  effectId: string,
+  value: unknown,
+): Promise<void> {
+  await postResult({ runDir, effectId, status: 'ok', value });
+}
+// ---------------------------------------------------------------------------
+// postErrorResult
+// ---------------------------------------------------------------------------
+/**
+ * Convenience wrapper for posting a failed result.
+ *
+ * @param runDir   - Absolute path to the run directory.
+ * @param effectId - The effect identifier to resolve.
+ * @param error    - The error payload describing what went wrong.
+ */
+export async function postErrorResult(
+  runDir: string,
+  effectId: string,
+  error: unknown,
+): Promise<void> {
+  await postResult({ runDir, effectId, status: 'error', error });
+}

package/extensions/babysitter/sdk-bridge.ts ADDED Viewed

@@ -0,0 +1,243 @@
+/**
+ * Babysitter SDK bridge for the oh-my-pi extension.
+ *
+ * Replaces the former `cli-wrapper` module with direct SDK function calls.
+ * No child processes are spawned, no stdout is parsed, and no JSON is
+ * scraped from a subprocess pipe.  Instead we import the runtime and
+ * storage layers from `@a5c-ai/babysitter-sdk` and call them in-process.
+ *
+ * Every other module in this extension that needs to talk to babysitter
+ * should go through this bridge so there is exactly one place to handle
+ * option translation and error mapping.
+ *
+ * @module sdk-bridge
+ */
+import {
+  createRun,
+  orchestrateIteration,
+  commitEffectResult,
+  type CreateRunOptions,
+  type CreateRunResult,
+  type OrchestrateOptions,
+  type IterationResult,
+  type CommitEffectResultOptions,
+  type CommitEffectResultArtifacts,
+  type EffectAction,
+} from '@a5c-ai/babysitter-sdk';
+import {
+  loadJournal,
+  readRunMetadata,
+} from '@a5c-ai/babysitter-sdk';
+// ---------------------------------------------------------------------------
+// Error wrapper
+// ---------------------------------------------------------------------------
+/** Structured error surfaced by the SDK bridge. */
+export class SdkBridgeError extends Error {
+  /** The original error thrown by the SDK, if any. */
+  readonly cause: unknown;
+  constructor(message: string, cause?: unknown) {
+    super(message);
+    this.name = 'SdkBridgeError';
+    this.cause = cause;
+  }
+}
+// ---------------------------------------------------------------------------
+// createNewRun
+// ---------------------------------------------------------------------------
+/**
+ * Create a brand-new babysitter run.
+ *
+ * Translates the extension's simplified option bag into the shape expected
+ * by the SDK's {@link createRun} and returns the result as-is.
+ */
+export async function createNewRun(opts: {
+  runsDir: string;
+  processId: string;
+  importPath: string;
+  exportName?: string;
+  inputs?: unknown;
+  prompt?: string;
+}): Promise<CreateRunResult> {
+  try {
+    const sdkOpts: CreateRunOptions = {
+      runsDir: opts.runsDir,
+      process: {
+        processId: opts.processId,
+        importPath: opts.importPath,
+        exportName: opts.exportName,
+      },
+      inputs: opts.inputs,
+      prompt: opts.prompt,
+    };
+    return await createRun(sdkOpts);
+  } catch (error) {
+    throw new SdkBridgeError(
+      `Failed to create run for process "${opts.processId}"`,
+      error,
+    );
+  }
+}
+// ---------------------------------------------------------------------------
+// iterate
+// ---------------------------------------------------------------------------
+/**
+ * Run a single orchestration iteration against an existing run.
+ *
+ * The caller hands us the `runDir` (not the run ID) so there is no
+ * ambiguity about which run-directory layout to use.
+ */
+export async function iterate(
+  runDir: string,
+  opts?: {
+    inputs?: unknown;
+    context?: Record<string, unknown>;
+  },
+): Promise<IterationResult> {
+  try {
+    const sdkOpts: OrchestrateOptions = {
+      runDir,
+      inputs: opts?.inputs,
+      context: opts?.context,
+    };
+    return await orchestrateIteration(sdkOpts);
+  } catch (error) {
+    throw new SdkBridgeError(
+      `Iteration failed for runDir "${runDir}"`,
+      error,
+    );
+  }
+}
+// ---------------------------------------------------------------------------
+// postResult
+// ---------------------------------------------------------------------------
+/**
+ * Post an effect result (task completion) back into a run's journal.
+ *
+ * This is the SDK equivalent of `babysitter task:post`.
+ */
+export async function postResult(opts: {
+  runDir: string;
+  effectId: string;
+  status: 'ok' | 'error';
+  value?: unknown;
+  error?: unknown;
+}): Promise<CommitEffectResultArtifacts> {
+  try {
+    const sdkOpts: CommitEffectResultOptions = {
+      runDir: opts.runDir,
+      effectId: opts.effectId,
+      result: {
+        status: opts.status,
+        value: opts.status === 'ok' ? opts.value : undefined,
+        error: opts.status === 'error' ? opts.error : undefined,
+      },
+    };
+    return await commitEffectResult(sdkOpts);
+  } catch (error) {
+    throw new SdkBridgeError(
+      `Failed to post result for effect "${opts.effectId}"`,
+      error,
+    );
+  }
+}
+// ---------------------------------------------------------------------------
+// getRunStatus
+// ---------------------------------------------------------------------------
+/**
+ * Retrieve the current status of a run by reading its metadata and journal.
+ *
+ * Status is derived from the journal's terminal event (if any):
+ *   - `RUN_COMPLETED`  -> `"completed"`
+ *   - `RUN_FAILED`     -> `"failed"`
+ *   - otherwise        -> `"running"`
+ *
+ * Pending effects are those with an `EFFECT_REQUESTED` event but no
+ * corresponding `EFFECT_RESOLVED`.
+ */
+export async function getRunStatus(runDir: string): Promise<{
+  runId: string;
+  processId: string;
+  status: string;
+  pendingEffects: EffectAction[];
+}> {
+  try {
+    const metadata = await readRunMetadata(runDir);
+    const journal = await loadJournal(runDir);
+    // Derive run status from the last event in the journal.
+    let status = 'running';
+    for (const entry of journal) {
+      if (entry.type === 'RUN_COMPLETED') {
+        status = 'completed';
+      } else if (entry.type === 'RUN_FAILED') {
+        status = 'failed';
+      }
+    }
+    // Build the set of pending effects (requested but not yet resolved).
+    const resolvedEffectIds = new Set<string>();
+    const requestedEffects = new Map<string, EffectAction>();
+    for (const entry of journal) {
+      const data = entry.data as Record<string, unknown> | undefined;
+      if (!data) continue;
+      if (entry.type === 'EFFECT_RESOLVED') {
+        resolvedEffectIds.add(data.effectId as string);
+      } else if (entry.type === 'EFFECT_REQUESTED') {
+        const effectId = data.effectId as string;
+        requestedEffects.set(effectId, {
+          effectId,
+          invocationKey: (data.invocationKey as string) ?? '',
+          kind: (data.kind as string) ?? 'unknown',
+          label: data.label as string | undefined,
+          taskDef: (data.taskDef ?? {}) as EffectAction['taskDef'],
+        });
+      }
+    }
+    const pendingEffects: EffectAction[] = [];
+    for (const [effectId, action] of requestedEffects) {
+      if (!resolvedEffectIds.has(effectId)) {
+        pendingEffects.push(action);
+      }
+    }
+    return {
+      runId: metadata.runId,
+      processId: metadata.processId,
+      status,
+      pendingEffects,
+    };
+  } catch (error) {
+    throw new SdkBridgeError(
+      `Failed to read run status for "${runDir}"`,
+      error,
+    );
+  }
+}
+// ---------------------------------------------------------------------------
+// getPendingEffects
+// ---------------------------------------------------------------------------
+/**
+ * Convenience shorthand — returns only the pending effects for a run.
+ */
+export async function getPendingEffects(runDir: string): Promise<EffectAction[]> {
+  const { pendingEffects } = await getRunStatus(runDir);
+  return pendingEffects;
+}