npm - @agentstep/agent-sdk - Versions diffs - 0.1.0 - Mend

@agentstep/agent-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

package/package.json +45 -0
package/src/auth/middleware.ts +38 -0
package/src/backends/claude/args.ts +88 -0
package/src/backends/claude/index.ts +193 -0
package/src/backends/claude/permission-hook.ts +152 -0
package/src/backends/claude/tool-bridge.ts +211 -0
package/src/backends/claude/translator.ts +209 -0
package/src/backends/claude/wrapper-script.ts +45 -0
package/src/backends/codex/args.ts +69 -0
package/src/backends/codex/auth.ts +35 -0
package/src/backends/codex/index.ts +57 -0
package/src/backends/codex/setup.ts +37 -0
package/src/backends/codex/translator.ts +223 -0
package/src/backends/codex/wrapper-script.ts +26 -0
package/src/backends/factory/args.ts +45 -0
package/src/backends/factory/auth.ts +30 -0
package/src/backends/factory/index.ts +56 -0
package/src/backends/factory/setup.ts +34 -0
package/src/backends/factory/translator.ts +139 -0
package/src/backends/factory/wrapper-script.ts +33 -0
package/src/backends/gemini/args.ts +44 -0
package/src/backends/gemini/auth.ts +30 -0
package/src/backends/gemini/index.ts +53 -0
package/src/backends/gemini/setup.ts +34 -0
package/src/backends/gemini/translator.ts +139 -0
package/src/backends/gemini/wrapper-script.ts +26 -0
package/src/backends/opencode/args.ts +53 -0
package/src/backends/opencode/auth.ts +53 -0
package/src/backends/opencode/index.ts +70 -0
package/src/backends/opencode/mcp.ts +67 -0
package/src/backends/opencode/setup.ts +54 -0
package/src/backends/opencode/translator.ts +168 -0
package/src/backends/opencode/wrapper-script.ts +46 -0
package/src/backends/registry.ts +38 -0
package/src/backends/shared/ndjson.ts +29 -0
package/src/backends/shared/translator-types.ts +69 -0
package/src/backends/shared/wrap-prompt.ts +17 -0
package/src/backends/types.ts +85 -0
package/src/config/index.ts +95 -0
package/src/db/agents.ts +185 -0
package/src/db/api_keys.ts +78 -0
package/src/db/batch.ts +142 -0
package/src/db/client.ts +81 -0
package/src/db/environments.ts +127 -0
package/src/db/events.ts +208 -0
package/src/db/memory.ts +143 -0
package/src/db/migrations.ts +295 -0
package/src/db/proxy.ts +37 -0
package/src/db/sessions.ts +295 -0
package/src/db/vaults.ts +110 -0
package/src/errors.ts +53 -0
package/src/handlers/agents.ts +194 -0
package/src/handlers/batch.ts +41 -0
package/src/handlers/docs.ts +87 -0
package/src/handlers/environments.ts +154 -0
package/src/handlers/events.ts +234 -0
package/src/handlers/index.ts +12 -0
package/src/handlers/memory.ts +141 -0
package/src/handlers/openapi.ts +14 -0
package/src/handlers/sessions.ts +223 -0
package/src/handlers/stream.ts +76 -0
package/src/handlers/threads.ts +26 -0
package/src/handlers/ui/app.js +984 -0
package/src/handlers/ui/index.html +112 -0
package/src/handlers/ui/style.css +164 -0
package/src/handlers/ui.ts +1281 -0
package/src/handlers/vaults.ts +99 -0
package/src/http.ts +35 -0
package/src/index.ts +104 -0
package/src/init.ts +227 -0
package/src/openapi/registry.ts +8 -0
package/src/openapi/schemas.ts +625 -0
package/src/openapi/spec.ts +691 -0
package/src/providers/apple.ts +220 -0
package/src/providers/daytona.ts +217 -0
package/src/providers/docker.ts +264 -0
package/src/providers/e2b.ts +203 -0
package/src/providers/fly.ts +276 -0
package/src/providers/modal.ts +222 -0
package/src/providers/podman.ts +206 -0
package/src/providers/registry.ts +28 -0
package/src/providers/shared.ts +11 -0
package/src/providers/sprites.ts +55 -0
package/src/providers/types.ts +73 -0
package/src/providers/vercel.ts +208 -0
package/src/proxy/forward.ts +111 -0
package/src/queue/index.ts +111 -0
package/src/sessions/actor.ts +53 -0
package/src/sessions/bus.ts +155 -0
package/src/sessions/driver.ts +818 -0
package/src/sessions/grader.ts +120 -0
package/src/sessions/interrupt.ts +14 -0
package/src/sessions/sweeper.ts +136 -0
package/src/sessions/threads.ts +126 -0
package/src/sessions/tools.ts +50 -0
package/src/shutdown.ts +78 -0
package/src/sprite/client.ts +294 -0
package/src/sprite/exec.ts +161 -0
package/src/sprite/lifecycle.ts +339 -0
package/src/sprite/pool.ts +65 -0
package/src/sprite/setup.ts +159 -0
package/src/state.ts +61 -0
package/src/types.ts +339 -0
package/src/util/clock.ts +7 -0
package/src/util/ids.ts +11 -0

package/src/sessions/grader.ts ADDED Viewed

@@ -0,0 +1,120 @@
+/**
+ * Outcome grader — evaluates agent output against a rubric.
+ *
+ * Runs as a direct Anthropic API call from the Node.js process (NOT
+ * `claude -p` on the container) to avoid corrupting Claude session state.
+ * Uses tool_use with a structured result schema for reliable parsing.
+ */
+import { getConfig } from "../config";
+export type GraderResult = "satisfied" | "needs_revision" | "failed";
+export interface GraderEvaluation {
+  result: GraderResult;
+  feedback: string;
+  usage: { input_tokens: number; output_tokens: number };
+}
+const GRADER_SYSTEM = `You are an evaluation grader. You evaluate whether an agent's work output satisfies a rubric.
+You MUST call the evaluate_outcome tool with your assessment. Do not respond with plain text.`;
+const EVALUATE_TOOL = {
+  name: "evaluate_outcome",
+  description: "Submit your evaluation of the agent's output against the rubric.",
+  input_schema: {
+    type: "object" as const,
+    properties: {
+      result: {
+        type: "string" as const,
+        enum: ["satisfied", "needs_revision", "failed"],
+        description: "satisfied: output meets the rubric. needs_revision: output is close but needs specific changes. failed: output fundamentally fails to meet the rubric.",
+      },
+      feedback: {
+        type: "string" as const,
+        description: "Brief explanation of the evaluation. If needs_revision, include specific actionable feedback for the agent.",
+      },
+    },
+    required: ["result", "feedback"],
+  },
+};
+/**
+ * Call the Anthropic API directly to grade agent output against a rubric.
+ * Falls back to a simple "satisfied" if the API key is not configured.
+ */
+export async function runGraderEvaluation(
+  rubric: string,
+  agentOutput: string,
+  model: string,
+): Promise<GraderEvaluation> {
+  const cfg = getConfig();
+  const apiKey = cfg.anthropicApiKey;
+  if (!apiKey) {
+    // No API key — can't grade, default to satisfied
+    return {
+      result: "satisfied",
+      feedback: "Grader skipped: no ANTHROPIC_API_KEY configured for direct API evaluation.",
+      usage: { input_tokens: 0, output_tokens: 0 },
+    };
+  }
+  const userMessage = `## Rubric\n${rubric}\n\n## Agent Output\n${agentOutput}`;
+  const response = await fetch("https://api.anthropic.com/v1/messages", {
+    method: "POST",
+    headers: {
+      "content-type": "application/json",
+      "x-api-key": apiKey,
+      "anthropic-version": "2023-06-01",
+    },
+    body: JSON.stringify({
+      model: model || "claude-sonnet-4-20250514",
+      max_tokens: 1024,
+      system: GRADER_SYSTEM,
+      tools: [EVALUATE_TOOL],
+      tool_choice: { type: "tool", name: "evaluate_outcome" },
+      messages: [{ role: "user", content: userMessage }],
+    }),
+  });
+  if (!response.ok) {
+    const errText = await response.text().catch(() => "");
+    console.warn(`[grader] API call failed (${response.status}): ${errText}`);
+    return {
+      result: "satisfied",
+      feedback: `Grader API error (${response.status}), defaulting to satisfied.`,
+      usage: { input_tokens: 0, output_tokens: 0 },
+    };
+  }
+  const data = await response.json() as {
+    content: Array<{ type: string; name?: string; input?: { result?: string; feedback?: string } }>;
+    usage: { input_tokens: number; output_tokens: number };
+  };
+  // Extract the tool_use result
+  const toolUse = data.content.find(
+    (b) => b.type === "tool_use" && b.name === "evaluate_outcome",
+  );
+  if (!toolUse?.input?.result) {
+    return {
+      result: "satisfied",
+      feedback: "Grader returned no structured result, defaulting to satisfied.",
+      usage: data.usage ?? { input_tokens: 0, output_tokens: 0 },
+    };
+  }
+  const validResults: GraderResult[] = ["satisfied", "needs_revision", "failed"];
+  const result = validResults.includes(toolUse.input.result as GraderResult)
+    ? (toolUse.input.result as GraderResult)
+    : "satisfied";
+  return {
+    result,
+    feedback: toolUse.input.feedback ?? "",
+    usage: data.usage ?? { input_tokens: 0, output_tokens: 0 },
+  };
+}

package/src/sessions/interrupt.ts ADDED Viewed

@@ -0,0 +1,14 @@
+/**
+ * Interrupt handler.
+ *
+ * Must be called from inside the session's actor so the interrupt lands
+ * between two consecutive event appends, never in the middle of one.
+ */
+import { getRuntime } from "../state";
+export function interruptSession(sessionId: string): boolean {
+  const run = getRuntime().inFlightRuns.get(sessionId);
+  if (!run) return false;
+  run.controller.abort(new DOMException("interrupted", "AbortError"));
+  return true;
+}

package/src/sessions/sweeper.ts ADDED Viewed

@@ -0,0 +1,136 @@
+/**
+ * Periodic sweeper: idle eviction + orphan reconcile.
+ *
+ * One unified sweeper with one `setInterval`, installed in `lib/init.ts`.
+ * One timer means one set of interleaving concerns.
+ *
+ * Park/restore as originally planned is infeasible (spike S2: sprites.dev
+ * checkpoints are per-sprite only; no stopSprite API). The correct M5
+ * model is: pin the sprite for the session's lifetime, and evict the
+ * session (release the sprite, terminate the row) after idle TTL expires.
+ *
+ * Re-entrancy: `sweeping` prevents overlapping ticks from stacking if a
+ * prior sweep runs longer than the interval. A stuck `releaseSession` can
+ * take up to `spriteTimeoutMs` (30s default) per candidate, so a bad run
+ * with 50 candidates could exceed the 60s interval.
+ *
+ * Shutdown cooperation: `stopping` is set by the shutdown handler so the
+ * sweep bails out cleanly between candidates if SIGTERM fires mid-run.
+ */
+import { getDb } from "../db/client";
+import { getConfig } from "../config";
+import { nowMs } from "../util/clock";
+import { getRuntime } from "../state";
+import { getActor, dropActor } from "./actor";
+import { appendEvent, dropEmitter } from "./bus";
+import {
+  archiveSession,
+  getSessionRow,
+  updateSessionStatus,
+} from "../db/sessions";
+import { releaseSession, reconcileOrphans, reconcileDockerOrphans } from "../sprite/lifecycle";
+let sweeping = false;
+let stopping = false;
+export function markStopping(): void {
+  stopping = true;
+}
+/** Exposed for tests only. */
+export function __resetSweeperState(): void {
+  sweeping = false;
+  stopping = false;
+}
+export async function runSweep(): Promise<void> {
+  if (sweeping || stopping) return;
+  sweeping = true;
+  try {
+    try {
+      await evictIdleSessions();
+    } catch (e) {
+      console.warn("[sweeper] evict failed:", e);
+    }
+    const cfg = getConfig();
+    if (cfg.spriteToken) {
+      try {
+        await reconcileOrphans();
+      } catch (e) {
+        console.warn("[sweeper] reconcile sprites failed:", e);
+      }
+    }
+    try {
+      await reconcileDockerOrphans();
+    } catch (e) {
+      // Docker not available — skip silently
+      if (!(e instanceof Error) || !e.message.includes("ENOENT")) {
+        console.warn("[sweeper] reconcile docker failed:", e);
+      }
+    }
+  } finally {
+    sweeping = false;
+  }
+}
+async function evictIdleSessions(): Promise<void> {
+  if (stopping) return;
+  const cfg = getConfig();
+  const now = nowMs();
+  // COALESCE so sessions that never ran a turn (idle_since IS NULL) still
+  // age out from their created_at. LIMIT caps the worst case per sweep.
+  const rows = getDb()
+    .prepare(
+      `SELECT id FROM sessions
+       WHERE status = 'idle'
+         AND archived_at IS NULL
+         AND COALESCE(idle_since, created_at) + ? < ?
+       LIMIT ?`,
+    )
+    .all(cfg.sessionMaxAgeMs, now, 50) as { id: string }[];
+  if (rows.length === 0) return;
+  for (const { id: sessionId } of rows) {
+    if (stopping) return;
+    try {
+      await getActor(sessionId).enqueue(async () => {
+        // CRITICAL: runTurn executes OUTSIDE the actor lock (see
+        // app/v1/sessions/[id]/events/route.ts — enqueueTurn launches runTurn
+        // after releasing the actor). Checking session.status in the DB is
+        // not enough — it may still be "idle" for a few ms after a fresh
+        // POST /events fired off a new turn. The in-memory inFlightRuns map
+        // is the authoritative "turn in progress" signal.
+        const rt = getRuntime();
+        if (rt.inFlightRuns.has(sessionId)) return;
+        const row = getSessionRow(sessionId);
+        if (!row || row.status !== "idle" || row.archived_at != null) return;
+        // Re-check the TTL inside the lock — if another code path already
+        // bumped idle_since forward (turn completed), bail.
+        const base = row.idle_since ?? row.created_at;
+        if (base + cfg.sessionMaxAgeMs >= now) return;
+        await releaseSession(sessionId);
+        appendEvent(sessionId, {
+          type: "session.status_terminated",
+          payload: { reason: "idle_ttl" },
+          origin: "server",
+          processedAt: nowMs(),
+        });
+        updateSessionStatus(sessionId, "terminated", "idle_ttl");
+        archiveSession(sessionId);
+      });
+      dropActor(sessionId);
+      dropEmitter(sessionId);
+    } catch (err) {
+      // Per-candidate isolation: one stuck session must not block the rest
+      // of the sweep. `releaseSession` is already best-effort internally,
+      // so this catch mainly protects against appendEvent/DB failures.
+      console.warn(`[sweeper] evict ${sessionId} failed:`, err);
+    }
+  }
+}

package/src/sessions/threads.ts ADDED Viewed

@@ -0,0 +1,126 @@
+/**
+ * Multi-agent thread orchestrator.
+ *
+ * When a parent session's agent calls `spawn_agent`, the driver delegates
+ * to this module. It creates a child session, runs it to completion, and
+ * returns the child's final agent.message text as the tool result.
+ *
+ * Depth is capped at MAX_THREAD_DEPTH to prevent infinite recursion.
+ */
+import { createSession, getSessionRow } from "../db/sessions";
+import { getAgent } from "../db/agents";
+import { getSession } from "../db/sessions";
+import { listEvents } from "../db/events";
+import { appendEvent } from "./bus";
+import { getActor } from "./actor";
+import { runTurn } from "./driver";
+import { nowMs } from "../util/clock";
+import { ApiError } from "../errors";
+const MAX_THREAD_DEPTH = 3;
+/**
+ * Spawn a child agent session, run it to completion, and return the
+ * child's final agent.message text.
+ */
+export async function handleSpawnAgent(
+  parentSessionId: string,
+  agentId: string,
+  prompt: string,
+  parentDepth: number,
+): Promise<string> {
+  if (parentDepth >= MAX_THREAD_DEPTH) {
+    throw new ApiError(
+      400,
+      "invalid_request_error",
+      `thread depth limit reached (max ${MAX_THREAD_DEPTH})`,
+    );
+  }
+  const parentSession = getSession(parentSessionId);
+  if (!parentSession) {
+    throw new ApiError(404, "not_found_error", `parent session not found: ${parentSessionId}`);
+  }
+  const agent = getAgent(agentId);
+  if (!agent) {
+    throw new ApiError(404, "not_found_error", `agent not found: ${agentId}`);
+  }
+  // Create child session with parent reference and incremented depth
+  const childSession = createSession({
+    agent_id: agent.id,
+    agent_version: agent.version,
+    environment_id: parentSession.environment_id,
+    title: `Thread from ${parentSessionId}`,
+    metadata: { parent_session_id: parentSessionId },
+    parent_session_id: parentSessionId,
+    thread_depth: parentDepth + 1,
+    vault_ids: parentSession.vault_ids,
+  });
+  // Emit thread_started on parent
+  appendEvent(parentSessionId, {
+    type: "session.thread_started",
+    payload: { child_session_id: childSession.id, agent_id: agentId },
+    origin: "server",
+    processedAt: nowMs(),
+  });
+  // Spawn the child actor
+  getActor(childSession.id);
+  // Run the child turn
+  const eventId = `thread_${childSession.id}_${nowMs()}`;
+  await runTurn(childSession.id, [
+    { kind: "text", eventId, text: prompt },
+  ]);
+  // Wait for completion: poll until session is idle
+  const maxWaitMs = 300_000; // 5 minutes
+  const pollIntervalMs = 500;
+  const startMs = nowMs();
+  let childRow = getSessionRow(childSession.id);
+  while (childRow && childRow.status === "running" && nowMs() - startMs < maxWaitMs) {
+    await new Promise((resolve) => setTimeout(resolve, pollIntervalMs));
+    childRow = getSessionRow(childSession.id);
+  }
+  // If timed out, interrupt and clean up the child
+  if (childRow && childRow.status === "running") {
+    const { interruptSession } = await import("./interrupt");
+    interruptSession(childSession.id);
+  }
+  // Extract the last agent.message text from the child's events
+  let resultText = "";
+  const events = listEvents(childSession.id, { limit: 100, order: "desc" });
+  for (const evt of events) {
+    if (evt.type === "agent.message") {
+      const payload = JSON.parse(evt.payload_json) as {
+        content?: Array<{ type: string; text?: string }>;
+      };
+      const text = (payload.content ?? [])
+        .filter((b) => b.type === "text" && b.text)
+        .map((b) => b.text!)
+        .join("");
+      if (text) {
+        resultText = text;
+        break;
+      }
+    }
+  }
+  // Emit thread_completed on parent
+  appendEvent(parentSessionId, {
+    type: "session.thread_completed",
+    payload: {
+      child_session_id: childSession.id,
+      result: resultText || "(no response from sub-agent)",
+    },
+    origin: "server",
+    processedAt: nowMs(),
+  });
+  return resultText || "(no response from sub-agent)";
+}

package/src/sessions/tools.ts ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * Resolve an agent's `tools` array into claude --allowed-tools /
+ * --disallowed-tools args plus the set of custom tool names.
+ *
+ * Built-in set matches the claude CLI's tool names (case-sensitive).
+ */
+import { BUILT_IN_TOOL_NAMES, type BuiltInToolName, type ToolConfig } from "../types";
+export interface ResolvedTools {
+  allowedTools: string[];
+  disallowedTools: string[];
+  customToolNames: Set<string>;
+}
+export function resolveToolset(tools: ToolConfig[]): ResolvedTools {
+  const customToolNames = new Set<string>();
+  let builtInEnabled = new Set<BuiltInToolName>(BUILT_IN_TOOL_NAMES);
+  let hadAgentToolset = false;
+  for (const tool of tools) {
+    if (tool.type === "agent_toolset_20260401") {
+      hadAgentToolset = true;
+      const defaultEnabled = tool.default_config?.enabled ?? true;
+      if (!defaultEnabled) {
+        builtInEnabled = new Set();
+      }
+      for (const cfg of tool.configs ?? []) {
+        const name = cfg.name;
+        if (!BUILT_IN_TOOL_NAMES.includes(name as BuiltInToolName)) continue;
+        if (cfg.enabled === false) {
+          builtInEnabled.delete(name as BuiltInToolName);
+        } else if (cfg.enabled === true) {
+          builtInEnabled.add(name as BuiltInToolName);
+        }
+      }
+    } else if (tool.type === "custom") {
+      customToolNames.add(tool.name);
+    }
+  }
+  // If no agent_toolset_20260401 was declared, start from empty built-ins.
+  if (!hadAgentToolset) {
+    builtInEnabled = new Set();
+  }
+  const allowedTools = Array.from(builtInEnabled);
+  const disallowedTools = BUILT_IN_TOOL_NAMES.filter((n) => !builtInEnabled.has(n));
+  return { allowedTools, disallowedTools, customToolNames };
+}

package/src/shutdown.ts ADDED Viewed

@@ -0,0 +1,78 @@
+/**
+ * Graceful shutdown on SIGTERM and SIGINT.
+ *
+ * Aborts all in-flight turn controllers, gives them up to 5s to emit their
+ * `session.status_idle{stop_reason:"interrupted"}` via the driver's normal
+ * abort path, then exits. Sessions that don't finish in time will be picked
+ * up by the next startup's stale-recovery path.
+ *
+ * Pattern from
+ */
+import { getRuntime } from "./state";
+import { markStopping } from "./sessions/sweeper";
+import { syncDb, closeDb } from "./db/client";
+type GlobalShutdown = typeof globalThis & {
+  __caShutdownInstalled?: boolean;
+  __caSweeperHandle?: NodeJS.Timeout;
+};
+const g = globalThis as GlobalShutdown;
+export function installShutdownHandlers(): void {
+  if (g.__caShutdownInstalled) return;
+  g.__caShutdownInstalled = true;
+  process.on("SIGTERM", () => {
+    void shutdown("SIGTERM");
+  });
+  process.on("SIGINT", () => {
+    void shutdown("SIGINT");
+  });
+}
+let shuttingDown = false;
+async function shutdown(signal: string): Promise<void> {
+  if (shuttingDown) {
+    // Second signal — force exit immediately
+    console.log(`[shutdown] forced exit`);
+    process.exit(1);
+  }
+  shuttingDown = true;
+  const rt = getRuntime();
+  const inflight = rt.inFlightRuns.size;
+  if (inflight > 0) {
+    console.log(`[shutdown] stopping ${inflight} in-flight turn${inflight > 1 ? "s" : ""}...`);
+  } else {
+    console.log(`[shutdown] shutting down...`);
+  }
+  // Tell the sweeper to stop starting new eviction work, then clear the
+  // interval. Any in-progress sweep finishes its current candidate and bails.
+  markStopping();
+  if (g.__caSweeperHandle) {
+    clearInterval(g.__caSweeperHandle);
+    g.__caSweeperHandle = undefined;
+  }
+  for (const run of rt.inFlightRuns.values()) {
+    try {
+      run.controller.abort(new DOMException("shutting down", "AbortError"));
+    } catch {
+      /* ignore */
+    }
+  }
+  // Give drivers a moment to append their idle-interrupted events
+  if (inflight > 0) {
+    await new Promise((r) => setTimeout(r, 5000));
+  }
+  // Sync embedded replica to Turso and close the DB cleanly
+  syncDb();
+  closeDb();
+  process.exit(0);
+}