npm - @fusionkit/adapter-ai-sdk - Versions diffs - 0.1.0 - Mend

@fusionkit/adapter-ai-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/index.d.ts +23 -0
package/dist/index.js +17 -0
package/dist/managed-server.d.ts +102 -0
package/dist/managed-server.js +348 -0
package/dist/mlx-env.d.ts +178 -0
package/dist/mlx-env.js +371 -0
package/dist/model.d.ts +88 -0
package/dist/model.js +149 -0
package/dist/remote-tools.d.ts +56 -0
package/dist/remote-tools.js +57 -0
package/dist/routed-model.d.ts +88 -0
package/dist/routed-model.js +218 -0
package/dist/swarm-tools.d.ts +149 -0
package/dist/swarm-tools.js +324 -0
package/dist/test/golden.test.d.ts +1 -0
package/dist/test/golden.test.js +129 -0
package/dist/test/managed-server.test.d.ts +1 -0
package/dist/test/managed-server.test.js +198 -0
package/dist/test/mlx-env.test.d.ts +1 -0
package/dist/test/mlx-env.test.js +351 -0
package/dist/test/model.test.d.ts +1 -0
package/dist/test/model.test.js +110 -0
package/dist/test/remote-tools.test.d.ts +1 -0
package/dist/test/remote-tools.test.js +151 -0
package/dist/test/routed-model.test.d.ts +1 -0
package/dist/test/routed-model.test.js +223 -0
package/dist/test/swarm-tools.test.d.ts +1 -0
package/dist/test/swarm-tools.test.js +157 -0
package/dist/worktree-agent.d.ts +53 -0
package/dist/worktree-agent.js +303 -0
package/package.json +39 -0

package/dist/remote-tools.js ADDED Viewed

@@ -0,0 +1,57 @@
+import { jsonSchema, tool } from "ai";
+import { createCommandContext, executeGovernedCommand, targets, toGovernedRunRecord } from "@fusionkit/handoff";
+/** Default per-call wait ceiling for governed tool runs. */
+const DEFAULT_REMOTE_TOOL_TIMEOUT_MS = 5 * 60 * 1000;
+/**
+ * App-owned loops, honestly labeled (spec §6.2): the model loop stays in the
+ * caller's process and carries no durability claim. What Warrant adds is the
+ * execution boundary — every tool call becomes a signed run contract executed
+ * in a governed session and returns alongside an offline-verifiable receipt.
+ *
+ * There is no `handoff-needed` stream event and no mid-generation
+ * continuation; those are deliberately out of scope.
+ */
+export function remoteTools(config) {
+    const context = "context" in config ? config.context : createCommandContext(config);
+    const target = targets.pool(config.pool);
+    const pullResults = config.pullResults ?? true;
+    const timeoutMs = config.timeoutMs ?? DEFAULT_REMOTE_TOOL_TIMEOUT_MS;
+    const records = [];
+    const shell = tool({
+        description: "Run a shell command in a governed session on a customer-controlled runner. " +
+            "The session materializes the current workspace; changes are pulled back. " +
+            "Every call is recorded in a signed, offline-verifiable receipt.",
+        inputSchema: jsonSchema({
+            type: "object",
+            properties: {
+                command: {
+                    type: "string",
+                    description: "The shell command to execute in the governed session."
+                }
+            },
+            required: ["command"],
+            additionalProperties: false
+        }),
+        execute: async ({ command }) => {
+            const result = await executeGovernedCommand(context, {
+                command,
+                target,
+                reason: "app-owned loop tool call",
+                timeoutMs,
+                pullResults
+            });
+            records.push({ toolName: "shell", ...toGovernedRunRecord(command, result) });
+            return {
+                runId: result.run.runId,
+                status: result.status,
+                exitCode: result.exitCode,
+                output: result.output
+            };
+        }
+    });
+    return {
+        tools: { shell },
+        calls: () => [...records],
+        context
+    };
+}

package/dist/routed-model.d.ts ADDED Viewed

@@ -0,0 +1,88 @@
+import type { LanguageModelV3, LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3StreamResult } from "@ai-sdk/provider";
+import { z } from "zod";
+import type { Handoff } from "@fusionkit/handoff";
+declare const routerCardSchema: z.ZodObject<{
+    version: z.ZodLiteral<"uniroute.router.v1">;
+    embedder: z.ZodObject<{
+        model: z.ZodString;
+        dims: z.ZodNumber;
+    }, z.core.$strip>;
+    lambda: z.ZodNumber;
+    assignment: z.ZodUnion<readonly [z.ZodObject<{
+        type: z.ZodLiteral<"centroids">;
+        centroids: z.ZodArray<z.ZodArray<z.ZodNumber>>;
+    }, z.core.$strip>, z.ZodObject<{
+        type: z.ZodLiteral<"softmax">;
+        theta: z.ZodArray<z.ZodArray<z.ZodNumber>>;
+    }, z.core.$strip>]>;
+    models: z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        psi: z.ZodArray<z.ZodNumber>;
+        cost: z.ZodNumber;
+    }, z.core.$strip>>;
+}, z.core.$strip>;
+export type RouterCard = z.infer<typeof routerCardSchema>;
+/** Parse and structurally validate a router card (e.g. from readFile + JSON.parse). */
+export declare function loadRouterCard(data: unknown): RouterCard;
+/** One routing decision, reported for every call (including fallbacks). */
+export type RouteDecision = {
+    /** Chosen model id (a card model id). */
+    model: string;
+    /** Predicted error probability gamma for the chosen model. */
+    predictedError: number;
+    /** The chosen model's per-prompt cost from the card. */
+    cost: number;
+    /** gamma + lambda * cost, the quantity that was minimised. */
+    score: number;
+    /** True when this call fell back after a better-ranked candidate failed. */
+    fallback: boolean;
+    /** Human-readable explanation. */
+    reason: string;
+};
+export type RoutedModelConfig = {
+    /** The fitted router (see loadRouterCard). */
+    card: RouterCard;
+    /** Card model id -> the model that serves it (e.g. an mlxServer instance). */
+    candidates: Record<string, LanguageModelV3>;
+    /**
+     * Embed the prompt text in the card's embedding space. Must be the same
+     * embedder the card was fitted with (card.embedder.model); the vector
+     * length is checked against card.embedder.dims on every call.
+     */
+    embed: (text: string) => Promise<number[]>;
+    /** Override the card's default lambda (cost/quality trade-off). */
+    lambda?: number;
+    /** Try the next-best candidate when a call fails. Defaults to true. */
+    fallback?: boolean;
+    /** Observer for every routing decision (withRoutedModel wires h.trace). */
+    onDecision?: (decision: RouteDecision) => void;
+};
+export declare class RoutedModel implements LanguageModelV3 {
+    readonly specificationVersion: "v3";
+    readonly provider = "warrant-uniroute";
+    readonly modelId: string;
+    private readonly config;
+    constructor(config: RoutedModelConfig);
+    get supportedUrls(): LanguageModelV3["supportedUrls"];
+    /** Candidates ordered by cost-adjusted predicted error (best first). */
+    private rank;
+    private note;
+    private dispatch;
+    doGenerate(options: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult>;
+    doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
+}
+/** Create a UniRoute-routed model over a candidate pool. */
+export declare function routedModel(config: RoutedModelConfig): RoutedModel;
+/**
+ * Attach a routed model to a continuation context as `h.model`, recording
+ * every routing decision as a `model.routed` trace event (the analog of
+ * withModel for pools). `localModels` lists candidate ids served locally;
+ * everything else is reported as a cloud route, and fallbacks surface as
+ * escalations so triggers.modelEscalated() fires.
+ */
+export declare function withRoutedModel<H extends Handoff>(h: H, config: Omit<RoutedModelConfig, "onDecision"> & {
+    localModels?: string[];
+}): H & {
+    model: RoutedModel;
+};
+export {};

package/dist/routed-model.js ADDED Viewed

@@ -0,0 +1,218 @@
+import { z } from "zod";
+import { attachModel } from "./model.js";
+/**
+ * UniRoute routing over a pool of models (arXiv:2502.08773).
+ *
+ * The router is a *router card* — a portable `uniroute.router.v1` JSON
+ * artifact produced offline by the Python `uniroute-mlx` package (see
+ * python/uniroute-mlx): a prompt-cluster assignment map plus, per candidate
+ * model, its per-cluster error vector Psi and per-prompt cost. Routing a
+ * call is: embed the prompt, derive cluster weights, and pick
+ * argmin_m [ Phi(x) . Psi(m) + lambda * cost(m) ].
+ *
+ * All fitting and evaluation lives in Python; this file only ports that
+ * one online rule. Candidates are ordinary LanguageModelV3 instances —
+ * typically `mlxServer(...)` managed processes plus a cloud model — so the
+ * existing lifecycle (lazy start, scale-to-zero, crash recovery) is reused
+ * untouched.
+ *
+ * Honest semantics, mirroring HandoffModel: a failed call on the chosen
+ * model falls back to the next-best candidate *between* calls; once a
+ * stream has started emitting, it belongs to the model that produced it.
+ */
+const cardModelSchema = z.object({
+    id: z.string().min(1),
+    psi: z.array(z.number()).min(1),
+    cost: z.number().nonnegative()
+});
+const routerCardSchema = z.object({
+    version: z.literal("uniroute.router.v1"),
+    embedder: z.object({ model: z.string().min(1), dims: z.number().int().positive() }),
+    lambda: z.number().nonnegative(),
+    assignment: z.union([
+        z.object({ type: z.literal("centroids"), centroids: z.array(z.array(z.number())).min(1) }),
+        z.object({ type: z.literal("softmax"), theta: z.array(z.array(z.number())).min(1) })
+    ]),
+    models: z.array(cardModelSchema).min(1)
+});
+/** Parse and structurally validate a router card (e.g. from readFile + JSON.parse). */
+export function loadRouterCard(data) {
+    const card = routerCardSchema.parse(data);
+    const rows = card.assignment.type === "centroids"
+        ? card.assignment.centroids
+        : card.assignment.theta;
+    // softmax theta carries a trailing bias column on top of the embedding dims.
+    const expectedWidth = card.assignment.type === "centroids" ? card.embedder.dims : card.embedder.dims + 1;
+    for (const row of rows) {
+        if (row.length !== expectedWidth) {
+            throw new Error(`router card assignment width ${row.length} does not match embedder dims ${card.embedder.dims}`);
+        }
+    }
+    const clusters = rows.length;
+    for (const model of card.models) {
+        if (model.psi.length !== clusters) {
+            throw new Error(`router card model ${model.id} has psi length ${model.psi.length}, expected ${clusters}`);
+        }
+    }
+    return card;
+}
+/** Cluster weights Phi(x): one-hot nearest centroid, or the learned softmax. */
+function clusterWeights(card, embedding) {
+    if (embedding.length !== card.embedder.dims) {
+        throw new Error(`embedding has ${embedding.length} dims, the router card expects ${card.embedder.dims}`);
+    }
+    const assignment = card.assignment;
+    switch (assignment.type) {
+        case "centroids": {
+            let best = 0;
+            let bestDistance = Infinity;
+            for (let k = 0; k < assignment.centroids.length; k++) {
+                const centroid = assignment.centroids[k] ?? [];
+                let distance = 0;
+                for (let d = 0; d < centroid.length; d++) {
+                    const diff = (embedding[d] ?? 0) - (centroid[d] ?? 0);
+                    distance += diff * diff;
+                }
+                if (distance < bestDistance) {
+                    bestDistance = distance;
+                    best = k;
+                }
+            }
+            return assignment.centroids.map((_, k) => (k === best ? 1 : 0));
+        }
+        case "softmax": {
+            const features = [...embedding, 1];
+            const logits = assignment.theta.map((row) => row.reduce((sum, weight, d) => sum + weight * (features[d] ?? 0), 0));
+            const max = Math.max(...logits);
+            const exps = logits.map((logit) => Math.exp(logit - max));
+            const total = exps.reduce((sum, value) => sum + value, 0);
+            return exps.map((value) => value / total);
+        }
+        default: {
+            const exhausted = assignment;
+            throw new Error(`unknown assignment type: ${JSON.stringify(exhausted)}`);
+        }
+    }
+}
+/** Serialize the prompt's user-visible text for embedding. */
+function promptText(options) {
+    const parts = [];
+    for (const message of options.prompt) {
+        if (typeof message.content === "string") {
+            parts.push(message.content);
+            continue;
+        }
+        for (const piece of message.content) {
+            if (piece.type === "text")
+                parts.push(piece.text);
+        }
+    }
+    return parts.join("\n");
+}
+export class RoutedModel {
+    specificationVersion = "v3";
+    provider = "warrant-uniroute";
+    modelId;
+    config;
+    constructor(config) {
+        const cardIds = new Set(config.card.models.map((model) => model.id));
+        const missing = [...cardIds].filter((id) => !(id in config.candidates));
+        if (missing.length > 0) {
+            throw new Error(`router card models without candidates: ${missing.join(", ")}`);
+        }
+        this.config = config;
+        this.modelId = `uniroute(${config.card.models.map((model) => model.id).join(" | ")})`;
+    }
+    get supportedUrls() {
+        return {};
+    }
+    /** Candidates ordered by cost-adjusted predicted error (best first). */
+    async rank(options) {
+        const { card } = this.config;
+        const lambda = this.config.lambda ?? card.lambda;
+        const embedding = await this.config.embed(promptText(options));
+        const weights = clusterWeights(card, embedding);
+        const ranked = card.models.map((model) => {
+            const gamma = model.psi.reduce((sum, error, k) => sum + error * (weights[k] ?? 0), 0);
+            return {
+                id: model.id,
+                model: this.config.candidates[model.id],
+                gamma,
+                cost: model.cost,
+                score: gamma + lambda * model.cost
+            };
+        });
+        // Ties break toward the cheaper model, matching the Python rule.
+        return ranked.sort((a, b) => a.score - b.score || a.cost - b.cost);
+    }
+    note(candidate, fallback, reason) {
+        this.config.onDecision?.({
+            model: candidate.id,
+            predictedError: candidate.gamma,
+            cost: candidate.cost,
+            score: candidate.score,
+            fallback,
+            reason
+        });
+    }
+    async dispatch(options, call) {
+        const ranked = await this.rank(options);
+        const allowFallback = this.config.fallback ?? true;
+        let lastError;
+        for (let i = 0; i < ranked.length; i++) {
+            const candidate = ranked[i];
+            const fallback = i > 0;
+            const reason = fallback
+                ? `fallback: ${lastError?.message ?? String(lastError)}`
+                : `lowest cost-adjusted predicted error (gamma=${candidate.gamma.toFixed(4)}, cost=${candidate.cost.toFixed(4)})`;
+            try {
+                const result = await call(candidate.model);
+                this.note(candidate, fallback, reason);
+                return result;
+            }
+            catch (error) {
+                lastError = error;
+                this.note(candidate, fallback, `call failed: ${error instanceof Error ? error.message : String(error)}`);
+                if (!allowFallback)
+                    throw error;
+            }
+        }
+        throw lastError instanceof Error
+            ? lastError
+            : new Error(`every candidate failed: ${String(lastError)}`);
+    }
+    doGenerate(options) {
+        return this.dispatch(options, (model) => model.doGenerate(options));
+    }
+    doStream(options) {
+        // A stream that fails to *start* falls back; one that dies mid-flight
+        // belongs to the model that produced it (same semantics as HandoffModel).
+        return this.dispatch(options, (model) => model.doStream(options));
+    }
+}
+/** Create a UniRoute-routed model over a candidate pool. */
+export function routedModel(config) {
+    return new RoutedModel(config);
+}
+/**
+ * Attach a routed model to a continuation context as `h.model`, recording
+ * every routing decision as a `model.routed` trace event (the analog of
+ * withModel for pools). `localModels` lists candidate ids served locally;
+ * everything else is reported as a cloud route, and fallbacks surface as
+ * escalations so triggers.modelEscalated() fires.
+ */
+export function withRoutedModel(h, config) {
+    const { localModels, ...rest } = config;
+    const local = new Set(localModels ?? []);
+    return attachModel(h, routedModel({
+        ...rest,
+        onDecision: (decision) => {
+            h.noteModelDecision({
+                model: decision.model,
+                route: local.has(decision.model) ? "local" : "cloud",
+                escalated: decision.fallback,
+                reason: decision.reason
+            });
+        }
+    }));
+}

package/dist/swarm-tools.d.ts ADDED Viewed

@@ -0,0 +1,149 @@
+import type { Tool } from "ai";
+import { Handoff } from "@fusionkit/handoff";
+import type { ContinuationPolicy, Scorecard } from "@fusionkit/handoff";
+import type { ActorRef, AgentSpec, RunStatus, SessionIsolation } from "@fusionkit/protocol";
+import { PlaneClient } from "@fusionkit/sdk";
+/**
+ * `swarmTools()` gives a *cloud orchestrator harness* (Claude Code dynamic
+ * workflows, Codex goals — anything run through `HarnessAgent`) the governed
+ * dispatch surface it lacks: fan a goal out across cheap local Pi workers,
+ * inspect them, compose their disjoint results, and escalate the rest to a
+ * cloud target. The orchestration *loop* stays the harness's own; Warrant
+ * contributes only the execution boundary, exactly as `remoteTools()` does
+ * for app-owned loops.
+ *
+ * Every tool is host-executed (the harness calls it; this process runs it),
+ * each dispatch and escalation is a signed governed run with an offline-
+ * verifiable receipt, and the only writes that reach the workspace of record
+ * are pulls of those governed runs. The orchestrator's own sandbox is never
+ * mirrored back. Judgment is the orchestrator's; the *evidence* it judges on
+ * — the deterministic `Scorecard` and the receipt — is Warrant's.
+ *
+ * Structural invariant: only the orchestrator receives these tools. Workers
+ * are plain `pi` runs and cannot dispatch, so fan-out depth is one.
+ */
+export type SwarmPlane = PlaneClient | {
+    url: string;
+    adminToken: string;
+};
+export type SwarmToolsConfig = {
+    /** Local git workspace whose state every governed run materializes. */
+    workspace: string;
+    plane: SwarmPlane;
+    /** Pool of runners with a pi harness backend: the cheap local workers. */
+    workerPool: string;
+    /** Pool that runs escalations (a real-OS tier for the cloud agent). */
+    cloudPool: string;
+    actor?: ActorRef;
+    secrets?: string[];
+    allowHosts?: string[];
+    allowUntracked?: string[];
+    /** Client-side continuation policy (fan-out ceiling, allowed pools). Defaults to localFirst(). */
+    policy?: ContinuationPolicy;
+    /** Per-run wait ceiling. Defaults to 10 minutes. */
+    timeoutMs?: number;
+    /** Agent for workers. Defaults to pi (the local-swarm harness). */
+    workerAgent?: AgentSpec;
+    /** Session tier for workers. Defaults to "hermetic" (just-bash + pi). */
+    workerSession?: SessionIsolation;
+    /** Session tier for escalations. Defaults to "process". */
+    cloudSession?: SessionIsolation;
+    /** Agent for escalations. Defaults to claude-code. */
+    cloudAgent?: AgentSpec;
+    /** Cap on cloud escalations for the lifetime of this toolset. Defaults to the fan-out ceiling. */
+    maxEscalations?: number;
+    /** Max bytes of each pulled diff returned to the orchestrator. Defaults to 4 KiB. */
+    diffExcerptBytes?: number;
+};
+/** Alternative wiring: attach to an existing pi-default continuation context. */
+export type SwarmToolsContextConfig = Omit<SwarmToolsConfig, "workspace" | "plane" | "secrets" | "allowHosts" | "allowUntracked" | "actor" | "policy"> & {
+    context: Handoff;
+};
+export type WorkerTaskInput = {
+    prompt: string;
+    /** Files this worker is meant to touch. Surfaced in the prompt; verified from evidence. */
+    fileScope?: string[];
+};
+export type DispatchInput = {
+    tasks: WorkerTaskInput[];
+};
+export type DispatchOutput = {
+    dispatched: {
+        runId: string;
+        prompt: string;
+    }[];
+    /** True when the requested fan-out exceeded the continuation policy ceiling. */
+    budgetExceeded: boolean;
+    reason: string;
+};
+export type StatusInput = {
+    runIds: string[];
+};
+export type StatusOutput = {
+    statuses: {
+        runId: string;
+        status: RunStatus;
+        known: boolean;
+    }[];
+};
+export type PullInput = {
+    runId: string;
+};
+export type PullOutput = {
+    runId: string;
+    status: RunStatus;
+    /** "accepted": pulled onto the workspace; "escalate": failed or overlapping. */
+    verdict: "accepted" | "escalate";
+    reason: string;
+    filesChanged: string[];
+    /** Paths that collided with already-pulled work (verdict "escalate"). */
+    conflictingPaths?: string[];
+    scorecard?: Scorecard;
+    diffExcerpt?: string;
+    receipt?: {
+        contractHash: string;
+        eventsHead: string;
+        verified: boolean;
+    };
+};
+export type EscalateInput = {
+    task: string;
+    reason?: string;
+};
+export type EscalateOutput = {
+    runId?: string;
+    status?: RunStatus;
+    /** True when the escalation budget for this toolset is exhausted. */
+    budgetExceeded: boolean;
+    reason: string;
+    filesChanged?: string[];
+    receipt?: {
+        contractHash: string;
+        eventsHead: string;
+        verified: boolean;
+    };
+};
+export type SwarmToolSet = {
+    dispatch_workers: Tool<DispatchInput, DispatchOutput>;
+    worker_status: Tool<StatusInput, StatusOutput>;
+    pull_worker: Tool<PullInput, PullOutput>;
+    escalate_task: Tool<EscalateInput, EscalateOutput>;
+};
+/** One evidence record per governed run the orchestrator drove through these tools. */
+export type SwarmRunRecord = {
+    tool: "dispatch_workers" | "pull_worker" | "escalate_task";
+    runId: string;
+    status: RunStatus;
+    verdict?: "accepted" | "escalate";
+    contractHash?: string;
+    receiptVerified?: boolean;
+};
+export type SwarmTools = {
+    /** AI SDK-compatible tools; pass as `HarnessAgent`'s `tools`. */
+    tools: SwarmToolSet;
+    /** One record per governed run driven through these tools. */
+    calls(): SwarmRunRecord[];
+    /** The underlying pi-default continuation context (trace, summary, …). */
+    context: Handoff;
+};
+export declare function swarmTools(config: SwarmToolsConfig | SwarmToolsContextConfig): SwarmTools;