npm - @a5c-ai/babysitter-paperclip - Versions diffs - 0.0.2-staging.02a0ee21 - Mend

@a5c-ai/babysitter-paperclip 0.0.2-staging.02a0ee21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/BABYSITTER.md +46 -0
package/README.md +229 -0
package/esbuild.config.mjs +12 -0
package/package.json +29 -0
package/src/__tests__/delegating-adapter.test.ts +85 -0
package/src/__tests__/types.test.ts +29 -0
package/src/babysitter-bridge.ts +313 -0
package/src/delegating-adapter.ts +97 -0
package/src/harness-plugin-installer.ts +202 -0
package/src/manifest.ts +101 -0
package/src/types.ts +130 -0
package/src/ui/BabysitterDashboard.tsx +142 -0
package/src/ui/BabysitterSidebar.tsx +123 -0
package/src/ui/BreakpointApproval.tsx +212 -0
package/src/ui/RunDetailTab.tsx +169 -0
package/src/ui/index.tsx +9 -0
package/src/ui/styles.ts +75 -0
package/src/worker.ts +595 -0
package/tsconfig.json +19 -0
package/versions.json +3 -0

package/src/babysitter-bridge.ts ADDED Viewed

@@ -0,0 +1,313 @@
+/**
+ * CLI bridge to the babysitter SDK.
+ *
+ * Wraps babysitter CLI commands as typed async functions for use by the
+ * Paperclip plugin worker. All operations shell out to the `babysitter` CLI
+ * to maintain a clean process boundary.
+ */
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+import type {
+  RunDetail,
+  RunEvent,
+  PendingEffect,
+  PendingBreakpoint,
+} from "./types";
+const exec = promisify(execFile);
+const CLI = "babysitter";
+/** Execute a babysitter CLI command and return parsed JSON. */
+async function runCli<T>(
+  args: string[],
+  options?: { cwd?: string }
+): Promise<T> {
+  const { stdout } = await exec(CLI, [...args, "--json"], {
+    cwd: options?.cwd,
+    timeout: 60_000,
+  });
+  return JSON.parse(stdout) as T;
+}
+/** Create a new babysitter run. */
+export async function createRun(opts: {
+  processId: string;
+  entry: string;
+  inputsFile: string;
+  runsDir?: string;
+  cwd?: string;
+}): Promise<{ runId: string; runDir: string }> {
+  const args = [
+    "run:create",
+    "--process-id",
+    opts.processId,
+    "--entry",
+    opts.entry,
+    "--inputs",
+    opts.inputsFile,
+  ];
+  if (opts.runsDir) args.push("--runs-dir", opts.runsDir);
+  return runCli(args, { cwd: opts.cwd });
+}
+/** Iterate a run until pending effects or completion. */
+export async function iterateRun(
+  runDir: string,
+  options?: { cwd?: string }
+): Promise<{
+  status: string;
+  nextActions: Array<{
+    effectId: string;
+    kind: string;
+    label: string;
+    taskId: string;
+    taskDef?: Record<string, unknown>;
+  }>;
+  metadata: Record<string, unknown>;
+}> {
+  return runCli(["run:iterate", runDir], options);
+}
+/** Get run status. */
+export async function getRunStatus(
+  runDir: string,
+  options?: { cwd?: string }
+): Promise<{
+  state: string;
+  pendingByKind: Record<string, number>;
+  completionProof: string | null;
+}> {
+  return runCli(["run:status", runDir], options);
+}
+/** Get run events. */
+export async function getRunEvents(
+  runDir: string,
+  limit?: number,
+  options?: { cwd?: string }
+): Promise<RunEvent[]> {
+  const args = ["run:events", runDir];
+  if (limit) args.push("--limit", String(limit));
+  return runCli(args, options);
+}
+/** List pending tasks. */
+export async function listPendingTasks(
+  runDir: string,
+  options?: { cwd?: string }
+): Promise<PendingEffect[]> {
+  return runCli(["task:list", runDir, "--pending"], options);
+}
+/** Show a specific task. */
+export async function showTask(
+  runDir: string,
+  effectId: string,
+  options?: { cwd?: string }
+): Promise<{ effect: PendingEffect; task: Record<string, unknown> | null }> {
+  return runCli(["task:show", runDir, effectId], options);
+}
+/** Post a task result (approve/reject breakpoint or post effect result). */
+export async function postTaskResult(
+  runDir: string,
+  effectId: string,
+  result: {
+    status: "ok" | "error";
+    value: Record<string, unknown>;
+  },
+  options?: { cwd?: string }
+): Promise<{ status: string }> {
+  return runCli(
+    [
+      "task:post",
+      runDir,
+      effectId,
+      "--status",
+      result.status,
+      "--value-inline",
+      JSON.stringify(result.value),
+    ],
+    options
+  );
+}
+/** Approve a breakpoint. */
+export async function approveBreakpoint(
+  runDir: string,
+  effectId: string,
+  response?: string,
+  options?: { cwd?: string }
+): Promise<{ status: string }> {
+  return postTaskResult(
+    runDir,
+    effectId,
+    {
+      status: "ok",
+      value: { approved: true, response: response ?? "Approved via Paperclip UI" },
+    },
+    options
+  );
+}
+/** Reject a breakpoint. Note: uses --status ok with approved: false. */
+export async function rejectBreakpoint(
+  runDir: string,
+  effectId: string,
+  feedback: string,
+  options?: { cwd?: string }
+): Promise<{ status: string }> {
+  return postTaskResult(
+    runDir,
+    effectId,
+    {
+      status: "ok",
+      value: { approved: false, feedback },
+    },
+    options
+  );
+}
+/**
+ * Extract pending breakpoints from a run with full metadata.
+ *
+ * This reads the task.json for each breakpoint effect to get the full payload
+ * including question, options, expert routing, tags, and strategy. This is the
+ * same metadata that the underlying harness (Claude Code, OpenClaw) uses to
+ * present breakpoints to users.
+ *
+ * The breakpoint lifecycle:
+ *   1. Process calls ctx.breakpoint(payload) in SDK
+ *   2. SDK writes task.json with kind:"breakpoint" + metadata.payload
+ *   3. run:iterate returns "waiting" with the breakpoint as a pending action
+ *   4. Underlying harness stop hook detects only-breakpoints-pending → allows exit
+ *   5. Paperclip polls run:status, sees pending breakpoint
+ *   6. Paperclip reads task.json metadata, surfaces in UI
+ *   7. User approves/rejects in Paperclip UI
+ *   8. Paperclip posts via task:post --status ok (ALWAYS ok, even for rejection)
+ *   9. Next run:iterate resolves the cached breakpoint result
+ */
+export async function getPendingBreakpoints(
+  runDir: string,
+  options?: { cwd?: string }
+): Promise<PendingBreakpoint[]> {
+  const tasks = await listPendingTasks(runDir, options);
+  const breakpoints: PendingBreakpoint[] = [];
+  for (const t of tasks) {
+    if (t.kind !== "breakpoint") continue;
+    // Try to get full task metadata including question/options
+    let question: string | undefined;
+    let taskOptions: string[] | undefined;
+    let expert: string | string[] | undefined;
+    let tags: string[] | undefined;
+    let strategy: string | undefined;
+    let previousFeedback: string | undefined;
+    let attempt: number | undefined;
+    try {
+      const detail = await showTask(runDir, t.effectId, options);
+      const task = detail.task as Record<string, unknown> | null;
+      if (task) {
+        const metadata = task.metadata as Record<string, unknown> | undefined;
+        const payload = metadata?.payload as Record<string, unknown> | undefined;
+        if (payload) {
+          question = (payload.question ?? payload.title) as string | undefined;
+          taskOptions = payload.options as string[] | undefined;
+          expert = payload.expert as string | string[] | undefined;
+          tags = payload.tags as string[] | undefined;
+          strategy = payload.strategy as string | undefined;
+          previousFeedback = payload.previousFeedback as string | undefined;
+          attempt = payload.attempt as number | undefined;
+        }
+      }
+    } catch {
+      // Task metadata unavailable - use basic info
+    }
+    breakpoints.push({
+      effectId: t.effectId,
+      title: question ?? t.label,
+      question,
+      options: taskOptions,
+      expert,
+      tags,
+      strategy: strategy as PendingBreakpoint["strategy"],
+      previousFeedback,
+      attempt,
+      requestedAt: t.requestedAt,
+    });
+  }
+  return breakpoints;
+}
+/**
+ * Check if a run has ONLY breakpoints pending (no other effect types).
+ *
+ * This mirrors the check in the Claude Code stop hook (claudeCode.ts:578-598):
+ * when only breakpoints are pending, the harness allows exit because human
+ * action is required. This is the signal that Paperclip should surface
+ * breakpoints in the UI.
+ */
+export async function hasOnlyBreakpointsPending(
+  runDir: string,
+  options?: { cwd?: string }
+): Promise<{ onlyBreakpoints: boolean; breakpointCount: number; otherCount: number }> {
+  const status = await getRunStatus(runDir, options);
+  const pending = status.pendingByKind;
+  const breakpointCount = pending.breakpoint ?? 0;
+  const otherCount = Object.entries(pending)
+    .filter(([k]) => k !== "breakpoint")
+    .reduce((sum, [, v]) => sum + v, 0);
+  return {
+    onlyBreakpoints: breakpointCount > 0 && otherCount === 0,
+    breakpointCount,
+    otherCount,
+  };
+}
+/**
+ * Install the babysitter plugin for a specific harness.
+ * Delegates to `babysitter harness:install-plugin <name>`.
+ */
+export async function installHarnessPlugin(
+  harnessName: string,
+  options?: { cwd?: string }
+): Promise<{ success: boolean; output: string }> {
+  try {
+    const { stdout } = await exec(
+      CLI,
+      ["harness:install-plugin", harnessName, "--json"],
+      { cwd: options?.cwd, timeout: 120_000 }
+    );
+    return { success: true, output: stdout };
+  } catch (err) {
+    return {
+      success: false,
+      output: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
+/**
+ * Discover available harnesses and their plugin status.
+ */
+export async function discoverHarnesses(
+  options?: { cwd?: string }
+): Promise<Array<{ name: string; available: boolean; pluginInstalled?: boolean }>> {
+  try {
+    return await runCli(["harness:discover"], options);
+  } catch {
+    return [];
+  }
+}
+/** Build the run directory path from a runs dir and run ID. */
+export function buildRunDir(runsDir: string, runId: string): string {
+  return `${runsDir}/${runId}`;
+}

package/src/delegating-adapter.ts ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Delegating adapter for Paperclip harness detection.
+ *
+ * Detects which underlying AI harness a Paperclip agent uses and delegates
+ * babysitter adapter operations to the appropriate harness adapter.
+ *
+ * Detection tiers (in priority order):
+ *   1. Agent metadata — read adapterType from agent config (e.g., claude_local -> claude-code)
+ *   2. Environment probing — check env vars for known harness signatures
+ *   3. Explicit config — fall back to plugin settings
+ */
+import { ADAPTER_TYPE_MAP, type HarnessDetectionResult } from "./types";
+/**
+ * Detect the underlying harness from a Paperclip agent's adapter type.
+ *
+ * @param adapterType - The agent's adapterType field (e.g., "claude_local")
+ * @param pluginConfig - Plugin settings for fallback detection
+ * @returns Detection result with harness name and confidence
+ */
+export function detectHarness(
+  adapterType?: string,
+  pluginConfig?: { defaultHarness?: string }
+): HarnessDetectionResult {
+  // Tier 1: Agent metadata inspection (highest confidence)
+  if (adapterType && adapterType in ADAPTER_TYPE_MAP) {
+    return {
+      harnessName: ADAPTER_TYPE_MAP[adapterType],
+      detectionTier: "agent-metadata",
+      confidence: "high",
+    };
+  }
+  // Tier 2: Environment variable probing
+  const envHarness = detectFromEnvironment();
+  if (envHarness) {
+    return {
+      harnessName: envHarness,
+      detectionTier: "env-probe",
+      confidence: "medium",
+    };
+  }
+  // Tier 3: Explicit plugin configuration
+  if (pluginConfig?.defaultHarness) {
+    return {
+      harnessName: pluginConfig.defaultHarness,
+      detectionTier: "config",
+      confidence: "medium",
+    };
+  }
+  // Fallback: default to claude-code
+  return {
+    harnessName: "claude-code",
+    detectionTier: "fallback",
+    confidence: "low",
+  };
+}
+/**
+ * Probe environment variables for known harness signatures.
+ */
+function detectFromEnvironment(): string | undefined {
+  const env = process.env;
+  // Claude Code sets CLAUDE_CODE_* env vars
+  if (env.CLAUDE_CODE_SESSION || env.CLAUDE_CODE_ENTRYPOINT) {
+    return "claude-code";
+  }
+  // Codex uses CODEX_* vars
+  if (env.CODEX_SESSION || env.CODEX_HOME) {
+    return "codex";
+  }
+  // Gemini CLI
+  if (env.GEMINI_CLI_SESSION || env.GOOGLE_GENAI_API_KEY) {
+    return "gemini-cli";
+  }
+  // Cursor
+  if (env.CURSOR_SESSION) {
+    return "cursor";
+  }
+  return undefined;
+}
+/**
+ * Map a Paperclip adapter type string to a babysitter harness name.
+ * Returns undefined if no mapping exists.
+ */
+export function mapAdapterType(adapterType: string): string | undefined {
+  return ADAPTER_TYPE_MAP[adapterType];
+}

package/src/harness-plugin-installer.ts ADDED Viewed

@@ -0,0 +1,202 @@
+/**
+ * Harness plugin installer for the Paperclip babysitter integration.
+ *
+ * When the Paperclip plugin detects an underlying harness (e.g., claude_local),
+ * it needs the corresponding babysitter harness plugin installed for that
+ * harness to handle the stop-hook iteration loop and breakpoint presentation.
+ *
+ * This module checks whether the babysitter plugin is installed for a given
+ * harness and provides installation commands.
+ *
+ * The underlying harness plugin is what actually drives the orchestration loop:
+ *   - Claude Code: stop-hook pauses between iterations, allows exit when only
+ *     breakpoints are pending (user must approve externally)
+ *   - OpenClaw: agent_end hook fires async iteration, before_prompt_build
+ *     injects context
+ *
+ * The Paperclip plugin SUPPLEMENTS this by:
+ *   - Monitoring run state for pending breakpoints via run:status / task:list
+ *   - Surfacing breakpoints in the Paperclip dashboard UI
+ *   - Allowing approve/reject through Paperclip action handlers
+ *   - Posting results via task:post, which the underlying harness picks up
+ *     on next iteration
+ */
+import { execFile } from "node:child_process";
+import { promisify } from "node:util";
+const exec = promisify(execFile);
+/** Maps babysitter harness names to their plugin install commands. */
+const HARNESS_INSTALL_COMMANDS: Record<string, { check: string[]; install: string[] }> = {
+  "claude-code": {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "claude-code"],
+  },
+  codex: {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "codex"],
+  },
+  openclaw: {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "openclaw"],
+  },
+  "gemini-cli": {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "gemini-cli"],
+  },
+  cursor: {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "cursor"],
+  },
+  "github-copilot": {
+    check: ["babysitter", "harness:discover", "--json"],
+    install: ["babysitter", "harness:install-plugin", "github-copilot"],
+  },
+};
+/** Marketplace name for the a5c.ai babysitter plugins. */
+const MARKETPLACE_NAME = "a5c.ai";
+const MARKETPLACE_URL = "https://github.com/a5c-ai/babysitter.git";
+export interface HarnessPluginStatus {
+  harnessName: string;
+  cliAvailable: boolean;
+  pluginInstalled: boolean;
+  installCommand?: string;
+}
+/**
+ * Check if the babysitter CLI is available.
+ */
+export async function isBabysitterCliAvailable(): Promise<boolean> {
+  try {
+    await exec("babysitter", ["--version"], { timeout: 10_000 });
+    return true;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Check if a harness CLI is available and the babysitter plugin is installed.
+ */
+export async function checkHarnessPluginStatus(
+  harnessName: string
+): Promise<HarnessPluginStatus> {
+  const result: HarnessPluginStatus = {
+    harnessName,
+    cliAvailable: false,
+    pluginInstalled: false,
+  };
+  // Check if babysitter CLI is available
+  if (!(await isBabysitterCliAvailable())) {
+    result.installCommand = "npm install -g @a5c-ai/babysitter-sdk";
+    return result;
+  }
+  result.cliAvailable = true;
+  // Check harness discovery to see if the harness CLI is available
+  try {
+    const { stdout } = await exec("babysitter", ["harness:discover", "--json"], {
+      timeout: 15_000,
+    });
+    const discovery = JSON.parse(stdout) as Array<{
+      name: string;
+      available: boolean;
+      pluginInstalled?: boolean;
+    }>;
+    const harness = discovery.find(
+      (h) => h.name === harnessName || h.name === harnessName.replace("-", "_")
+    );
+    if (harness) {
+      result.cliAvailable = harness.available;
+      result.pluginInstalled = harness.pluginInstalled ?? false;
+    }
+  } catch {
+    // Discovery failed - assume not installed
+  }
+  if (!result.pluginInstalled) {
+    const cmd = HARNESS_INSTALL_COMMANDS[harnessName];
+    result.installCommand = cmd
+      ? cmd.install.join(" ")
+      : `babysitter harness:install-plugin ${harnessName}`;
+  }
+  return result;
+}
+/**
+ * Attempt to install the babysitter plugin for a given harness.
+ * Returns success/failure and any output.
+ */
+export async function installHarnessPlugin(
+  harnessName: string
+): Promise<{ success: boolean; output: string }> {
+  const cmd = HARNESS_INSTALL_COMMANDS[harnessName];
+  if (!cmd) {
+    return {
+      success: false,
+      output: `No install command known for harness: ${harnessName}`,
+    };
+  }
+  try {
+    // First ensure marketplace is added
+    try {
+      await exec("babysitter", [
+        "plugin:add-marketplace",
+        "--marketplace-url", MARKETPLACE_URL,
+        "--global",
+      ], { timeout: 30_000 });
+    } catch {
+      // Marketplace may already exist - continue
+    }
+    // Install the plugin
+    const [binary, ...args] = cmd.install;
+    const { stdout, stderr } = await exec(binary, args, { timeout: 60_000 });
+    return { success: true, output: stdout || stderr || "Installed successfully" };
+  } catch (err) {
+    return {
+      success: false,
+      output: err instanceof Error ? err.message : String(err),
+    };
+  }
+}
+/**
+ * Ensure the babysitter SDK CLI is installed.
+ * Attempts npm global install, falls back to providing npx instructions.
+ */
+export async function ensureBabysitterCli(): Promise<{
+  available: boolean;
+  method: "global" | "npx" | "missing";
+}> {
+  if (await isBabysitterCliAvailable()) {
+    return { available: true, method: "global" };
+  }
+  // Try installing globally
+  try {
+    await exec("npm", ["install", "-g", "@a5c-ai/babysitter-sdk"], {
+      timeout: 60_000,
+    });
+    return { available: true, method: "global" };
+  } catch {
+    // Check npx fallback
+    try {
+      await exec("npx", ["-y", "@a5c-ai/babysitter-sdk", "--version"], {
+        timeout: 30_000,
+      });
+      return { available: true, method: "npx" };
+    } catch {
+      return { available: false, method: "missing" };
+    }
+  }
+}

package/src/manifest.ts ADDED Viewed

@@ -0,0 +1,101 @@
+/**
+ * Babysitter Paperclip plugin manifest.
+ *
+ * Declares capabilities, event subscriptions, UI slots, and settings
+ * for the Babysitter orchestration integration with Paperclip.
+ */
+export const manifest = {
+  id: "babysitter",
+  displayName: "Babysitter Orchestrator",
+  description:
+    "Deterministic, event-sourced orchestration for Paperclip agents via Babysitter.",
+  version: "0.0.1",
+  entrypoints: {
+    worker: "dist/worker.js",
+    ui: "dist/ui",
+  },
+  capabilities: [
+    "events.subscribe",
+    "events.emit",
+    "plugin.state.read",
+    "plugin.state.write",
+    "agents.read",
+    "agent.tools.register",
+    "ui.dashboardWidget.register",
+    "ui.detailTab.register",
+    "ui.action.register",
+    "ui.sidebar.register",
+  ],
+  events: {
+    subscribe: [
+      "agent.run.started",
+      "agent.run.finished",
+      "agent.run.failed",
+      "agent.run.cancelled",
+    ],
+    emit: [
+      "plugin.babysitter.run.created",
+      "plugin.babysitter.breakpoint.requested",
+      "plugin.babysitter.breakpoint.resolved",
+    ],
+  },
+  settings: {
+    runsDir: {
+      type: "string" as const,
+      default: ".a5c/runs",
+      displayName: "Runs Directory",
+      description: "Directory where babysitter run data is stored.",
+    },
+    autoIterate: {
+      type: "boolean" as const,
+      default: true,
+      displayName: "Auto-Iterate",
+      description:
+        "Automatically iterate runs when effects are resolved.",
+    },
+    maxIterations: {
+      type: "number" as const,
+      default: 256,
+      displayName: "Max Iterations",
+      description: "Maximum orchestration iterations per run.",
+    },
+    breakpointTimeout: {
+      type: "number" as const,
+      default: 3600000,
+      displayName: "Breakpoint Timeout (ms)",
+      description:
+        "Time to wait for breakpoint approval before timing out (default 1 hour).",
+    },
+  },
+  ui: {
+    slots: [
+      {
+        type: "dashboardWidget",
+        id: "babysitter-dashboard",
+        displayName: "Babysitter Runs",
+        exportName: "BabysitterDashboard",
+      },
+      {
+        type: "detailTab",
+        id: "babysitter-run-detail",
+        displayName: "Babysitter Run",
+        exportName: "RunDetailTab",
+        entityTypes: ["agent"],
+      },
+      {
+        type: "sidebarPanel",
+        id: "babysitter-sidebar",
+        displayName: "Babysitter",
+        exportName: "BabysitterSidebar",
+      },
+    ],
+  },
+} as const;
+export default manifest;