@fusionkit/adapter-ai-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ import { jsonSchema, tool } from "ai";
2
+ import { createCommandContext, executeGovernedCommand, targets, toGovernedRunRecord } from "@fusionkit/handoff";
3
+ /** Default per-call wait ceiling for governed tool runs. */
4
+ const DEFAULT_REMOTE_TOOL_TIMEOUT_MS = 5 * 60 * 1000;
5
+ /**
6
+ * App-owned loops, honestly labeled (spec §6.2): the model loop stays in the
7
+ * caller's process and carries no durability claim. What Warrant adds is the
8
+ * execution boundary — every tool call becomes a signed run contract executed
9
+ * in a governed session and returns alongside an offline-verifiable receipt.
10
+ *
11
+ * There is no `handoff-needed` stream event and no mid-generation
12
+ * continuation; those are deliberately out of scope.
13
+ */
14
+ export function remoteTools(config) {
15
+ const context = "context" in config ? config.context : createCommandContext(config);
16
+ const target = targets.pool(config.pool);
17
+ const pullResults = config.pullResults ?? true;
18
+ const timeoutMs = config.timeoutMs ?? DEFAULT_REMOTE_TOOL_TIMEOUT_MS;
19
+ const records = [];
20
+ const shell = tool({
21
+ description: "Run a shell command in a governed session on a customer-controlled runner. " +
22
+ "The session materializes the current workspace; changes are pulled back. " +
23
+ "Every call is recorded in a signed, offline-verifiable receipt.",
24
+ inputSchema: jsonSchema({
25
+ type: "object",
26
+ properties: {
27
+ command: {
28
+ type: "string",
29
+ description: "The shell command to execute in the governed session."
30
+ }
31
+ },
32
+ required: ["command"],
33
+ additionalProperties: false
34
+ }),
35
+ execute: async ({ command }) => {
36
+ const result = await executeGovernedCommand(context, {
37
+ command,
38
+ target,
39
+ reason: "app-owned loop tool call",
40
+ timeoutMs,
41
+ pullResults
42
+ });
43
+ records.push({ toolName: "shell", ...toGovernedRunRecord(command, result) });
44
+ return {
45
+ runId: result.run.runId,
46
+ status: result.status,
47
+ exitCode: result.exitCode,
48
+ output: result.output
49
+ };
50
+ }
51
+ });
52
+ return {
53
+ tools: { shell },
54
+ calls: () => [...records],
55
+ context
56
+ };
57
+ }
@@ -0,0 +1,88 @@
1
+ import type { LanguageModelV3, LanguageModelV3CallOptions, LanguageModelV3GenerateResult, LanguageModelV3StreamResult } from "@ai-sdk/provider";
2
+ import { z } from "zod";
3
+ import type { Handoff } from "@fusionkit/handoff";
4
+ declare const routerCardSchema: z.ZodObject<{
5
+ version: z.ZodLiteral<"uniroute.router.v1">;
6
+ embedder: z.ZodObject<{
7
+ model: z.ZodString;
8
+ dims: z.ZodNumber;
9
+ }, z.core.$strip>;
10
+ lambda: z.ZodNumber;
11
+ assignment: z.ZodUnion<readonly [z.ZodObject<{
12
+ type: z.ZodLiteral<"centroids">;
13
+ centroids: z.ZodArray<z.ZodArray<z.ZodNumber>>;
14
+ }, z.core.$strip>, z.ZodObject<{
15
+ type: z.ZodLiteral<"softmax">;
16
+ theta: z.ZodArray<z.ZodArray<z.ZodNumber>>;
17
+ }, z.core.$strip>]>;
18
+ models: z.ZodArray<z.ZodObject<{
19
+ id: z.ZodString;
20
+ psi: z.ZodArray<z.ZodNumber>;
21
+ cost: z.ZodNumber;
22
+ }, z.core.$strip>>;
23
+ }, z.core.$strip>;
24
+ export type RouterCard = z.infer<typeof routerCardSchema>;
25
+ /** Parse and structurally validate a router card (e.g. from readFile + JSON.parse). */
26
+ export declare function loadRouterCard(data: unknown): RouterCard;
27
+ /** One routing decision, reported for every call (including fallbacks). */
28
+ export type RouteDecision = {
29
+ /** Chosen model id (a card model id). */
30
+ model: string;
31
+ /** Predicted error probability gamma for the chosen model. */
32
+ predictedError: number;
33
+ /** The chosen model's per-prompt cost from the card. */
34
+ cost: number;
35
+ /** gamma + lambda * cost, the quantity that was minimised. */
36
+ score: number;
37
+ /** True when this call fell back after a better-ranked candidate failed. */
38
+ fallback: boolean;
39
+ /** Human-readable explanation. */
40
+ reason: string;
41
+ };
42
+ export type RoutedModelConfig = {
43
+ /** The fitted router (see loadRouterCard). */
44
+ card: RouterCard;
45
+ /** Card model id -> the model that serves it (e.g. an mlxServer instance). */
46
+ candidates: Record<string, LanguageModelV3>;
47
+ /**
48
+ * Embed the prompt text in the card's embedding space. Must be the same
49
+ * embedder the card was fitted with (card.embedder.model); the vector
50
+ * length is checked against card.embedder.dims on every call.
51
+ */
52
+ embed: (text: string) => Promise<number[]>;
53
+ /** Override the card's default lambda (cost/quality trade-off). */
54
+ lambda?: number;
55
+ /** Try the next-best candidate when a call fails. Defaults to true. */
56
+ fallback?: boolean;
57
+ /** Observer for every routing decision (withRoutedModel wires h.trace). */
58
+ onDecision?: (decision: RouteDecision) => void;
59
+ };
60
+ export declare class RoutedModel implements LanguageModelV3 {
61
+ readonly specificationVersion: "v3";
62
+ readonly provider = "warrant-uniroute";
63
+ readonly modelId: string;
64
+ private readonly config;
65
+ constructor(config: RoutedModelConfig);
66
+ get supportedUrls(): LanguageModelV3["supportedUrls"];
67
+ /** Candidates ordered by cost-adjusted predicted error (best first). */
68
+ private rank;
69
+ private note;
70
+ private dispatch;
71
+ doGenerate(options: LanguageModelV3CallOptions): Promise<LanguageModelV3GenerateResult>;
72
+ doStream(options: LanguageModelV3CallOptions): Promise<LanguageModelV3StreamResult>;
73
+ }
74
+ /** Create a UniRoute-routed model over a candidate pool. */
75
+ export declare function routedModel(config: RoutedModelConfig): RoutedModel;
76
+ /**
77
+ * Attach a routed model to a continuation context as `h.model`, recording
78
+ * every routing decision as a `model.routed` trace event (the analog of
79
+ * withModel for pools). `localModels` lists candidate ids served locally;
80
+ * everything else is reported as a cloud route, and fallbacks surface as
81
+ * escalations so triggers.modelEscalated() fires.
82
+ */
83
+ export declare function withRoutedModel<H extends Handoff>(h: H, config: Omit<RoutedModelConfig, "onDecision"> & {
84
+ localModels?: string[];
85
+ }): H & {
86
+ model: RoutedModel;
87
+ };
88
+ export {};
@@ -0,0 +1,218 @@
1
+ import { z } from "zod";
2
+ import { attachModel } from "./model.js";
3
+ /**
4
+ * UniRoute routing over a pool of models (arXiv:2502.08773).
5
+ *
6
+ * The router is a *router card* — a portable `uniroute.router.v1` JSON
7
+ * artifact produced offline by the Python `uniroute-mlx` package (see
8
+ * python/uniroute-mlx): a prompt-cluster assignment map plus, per candidate
9
+ * model, its per-cluster error vector Psi and per-prompt cost. Routing a
10
+ * call is: embed the prompt, derive cluster weights, and pick
11
+ * argmin_m [ Phi(x) . Psi(m) + lambda * cost(m) ].
12
+ *
13
+ * All fitting and evaluation lives in Python; this file only ports that
14
+ * one online rule. Candidates are ordinary LanguageModelV3 instances —
15
+ * typically `mlxServer(...)` managed processes plus a cloud model — so the
16
+ * existing lifecycle (lazy start, scale-to-zero, crash recovery) is reused
17
+ * untouched.
18
+ *
19
+ * Honest semantics, mirroring HandoffModel: a failed call on the chosen
20
+ * model falls back to the next-best candidate *between* calls; once a
21
+ * stream has started emitting, it belongs to the model that produced it.
22
+ */
23
+ const cardModelSchema = z.object({
24
+ id: z.string().min(1),
25
+ psi: z.array(z.number()).min(1),
26
+ cost: z.number().nonnegative()
27
+ });
28
+ const routerCardSchema = z.object({
29
+ version: z.literal("uniroute.router.v1"),
30
+ embedder: z.object({ model: z.string().min(1), dims: z.number().int().positive() }),
31
+ lambda: z.number().nonnegative(),
32
+ assignment: z.union([
33
+ z.object({ type: z.literal("centroids"), centroids: z.array(z.array(z.number())).min(1) }),
34
+ z.object({ type: z.literal("softmax"), theta: z.array(z.array(z.number())).min(1) })
35
+ ]),
36
+ models: z.array(cardModelSchema).min(1)
37
+ });
38
+ /** Parse and structurally validate a router card (e.g. from readFile + JSON.parse). */
39
+ export function loadRouterCard(data) {
40
+ const card = routerCardSchema.parse(data);
41
+ const rows = card.assignment.type === "centroids"
42
+ ? card.assignment.centroids
43
+ : card.assignment.theta;
44
+ // softmax theta carries a trailing bias column on top of the embedding dims.
45
+ const expectedWidth = card.assignment.type === "centroids" ? card.embedder.dims : card.embedder.dims + 1;
46
+ for (const row of rows) {
47
+ if (row.length !== expectedWidth) {
48
+ throw new Error(`router card assignment width ${row.length} does not match embedder dims ${card.embedder.dims}`);
49
+ }
50
+ }
51
+ const clusters = rows.length;
52
+ for (const model of card.models) {
53
+ if (model.psi.length !== clusters) {
54
+ throw new Error(`router card model ${model.id} has psi length ${model.psi.length}, expected ${clusters}`);
55
+ }
56
+ }
57
+ return card;
58
+ }
59
+ /** Cluster weights Phi(x): one-hot nearest centroid, or the learned softmax. */
60
+ function clusterWeights(card, embedding) {
61
+ if (embedding.length !== card.embedder.dims) {
62
+ throw new Error(`embedding has ${embedding.length} dims, the router card expects ${card.embedder.dims}`);
63
+ }
64
+ const assignment = card.assignment;
65
+ switch (assignment.type) {
66
+ case "centroids": {
67
+ let best = 0;
68
+ let bestDistance = Infinity;
69
+ for (let k = 0; k < assignment.centroids.length; k++) {
70
+ const centroid = assignment.centroids[k] ?? [];
71
+ let distance = 0;
72
+ for (let d = 0; d < centroid.length; d++) {
73
+ const diff = (embedding[d] ?? 0) - (centroid[d] ?? 0);
74
+ distance += diff * diff;
75
+ }
76
+ if (distance < bestDistance) {
77
+ bestDistance = distance;
78
+ best = k;
79
+ }
80
+ }
81
+ return assignment.centroids.map((_, k) => (k === best ? 1 : 0));
82
+ }
83
+ case "softmax": {
84
+ const features = [...embedding, 1];
85
+ const logits = assignment.theta.map((row) => row.reduce((sum, weight, d) => sum + weight * (features[d] ?? 0), 0));
86
+ const max = Math.max(...logits);
87
+ const exps = logits.map((logit) => Math.exp(logit - max));
88
+ const total = exps.reduce((sum, value) => sum + value, 0);
89
+ return exps.map((value) => value / total);
90
+ }
91
+ default: {
92
+ const exhausted = assignment;
93
+ throw new Error(`unknown assignment type: ${JSON.stringify(exhausted)}`);
94
+ }
95
+ }
96
+ }
97
+ /** Serialize the prompt's user-visible text for embedding. */
98
+ function promptText(options) {
99
+ const parts = [];
100
+ for (const message of options.prompt) {
101
+ if (typeof message.content === "string") {
102
+ parts.push(message.content);
103
+ continue;
104
+ }
105
+ for (const piece of message.content) {
106
+ if (piece.type === "text")
107
+ parts.push(piece.text);
108
+ }
109
+ }
110
+ return parts.join("\n");
111
+ }
112
+ export class RoutedModel {
113
+ specificationVersion = "v3";
114
+ provider = "warrant-uniroute";
115
+ modelId;
116
+ config;
117
+ constructor(config) {
118
+ const cardIds = new Set(config.card.models.map((model) => model.id));
119
+ const missing = [...cardIds].filter((id) => !(id in config.candidates));
120
+ if (missing.length > 0) {
121
+ throw new Error(`router card models without candidates: ${missing.join(", ")}`);
122
+ }
123
+ this.config = config;
124
+ this.modelId = `uniroute(${config.card.models.map((model) => model.id).join(" | ")})`;
125
+ }
126
+ get supportedUrls() {
127
+ return {};
128
+ }
129
+ /** Candidates ordered by cost-adjusted predicted error (best first). */
130
+ async rank(options) {
131
+ const { card } = this.config;
132
+ const lambda = this.config.lambda ?? card.lambda;
133
+ const embedding = await this.config.embed(promptText(options));
134
+ const weights = clusterWeights(card, embedding);
135
+ const ranked = card.models.map((model) => {
136
+ const gamma = model.psi.reduce((sum, error, k) => sum + error * (weights[k] ?? 0), 0);
137
+ return {
138
+ id: model.id,
139
+ model: this.config.candidates[model.id],
140
+ gamma,
141
+ cost: model.cost,
142
+ score: gamma + lambda * model.cost
143
+ };
144
+ });
145
+ // Ties break toward the cheaper model, matching the Python rule.
146
+ return ranked.sort((a, b) => a.score - b.score || a.cost - b.cost);
147
+ }
148
+ note(candidate, fallback, reason) {
149
+ this.config.onDecision?.({
150
+ model: candidate.id,
151
+ predictedError: candidate.gamma,
152
+ cost: candidate.cost,
153
+ score: candidate.score,
154
+ fallback,
155
+ reason
156
+ });
157
+ }
158
+ async dispatch(options, call) {
159
+ const ranked = await this.rank(options);
160
+ const allowFallback = this.config.fallback ?? true;
161
+ let lastError;
162
+ for (let i = 0; i < ranked.length; i++) {
163
+ const candidate = ranked[i];
164
+ const fallback = i > 0;
165
+ const reason = fallback
166
+ ? `fallback: ${lastError?.message ?? String(lastError)}`
167
+ : `lowest cost-adjusted predicted error (gamma=${candidate.gamma.toFixed(4)}, cost=${candidate.cost.toFixed(4)})`;
168
+ try {
169
+ const result = await call(candidate.model);
170
+ this.note(candidate, fallback, reason);
171
+ return result;
172
+ }
173
+ catch (error) {
174
+ lastError = error;
175
+ this.note(candidate, fallback, `call failed: ${error instanceof Error ? error.message : String(error)}`);
176
+ if (!allowFallback)
177
+ throw error;
178
+ }
179
+ }
180
+ throw lastError instanceof Error
181
+ ? lastError
182
+ : new Error(`every candidate failed: ${String(lastError)}`);
183
+ }
184
+ doGenerate(options) {
185
+ return this.dispatch(options, (model) => model.doGenerate(options));
186
+ }
187
+ doStream(options) {
188
+ // A stream that fails to *start* falls back; one that dies mid-flight
189
+ // belongs to the model that produced it (same semantics as HandoffModel).
190
+ return this.dispatch(options, (model) => model.doStream(options));
191
+ }
192
+ }
193
+ /** Create a UniRoute-routed model over a candidate pool. */
194
+ export function routedModel(config) {
195
+ return new RoutedModel(config);
196
+ }
197
+ /**
198
+ * Attach a routed model to a continuation context as `h.model`, recording
199
+ * every routing decision as a `model.routed` trace event (the analog of
200
+ * withModel for pools). `localModels` lists candidate ids served locally;
201
+ * everything else is reported as a cloud route, and fallbacks surface as
202
+ * escalations so triggers.modelEscalated() fires.
203
+ */
204
+ export function withRoutedModel(h, config) {
205
+ const { localModels, ...rest } = config;
206
+ const local = new Set(localModels ?? []);
207
+ return attachModel(h, routedModel({
208
+ ...rest,
209
+ onDecision: (decision) => {
210
+ h.noteModelDecision({
211
+ model: decision.model,
212
+ route: local.has(decision.model) ? "local" : "cloud",
213
+ escalated: decision.fallback,
214
+ reason: decision.reason
215
+ });
216
+ }
217
+ }));
218
+ }
@@ -0,0 +1,149 @@
1
+ import type { Tool } from "ai";
2
+ import { Handoff } from "@fusionkit/handoff";
3
+ import type { ContinuationPolicy, Scorecard } from "@fusionkit/handoff";
4
+ import type { ActorRef, AgentSpec, RunStatus, SessionIsolation } from "@fusionkit/protocol";
5
+ import { PlaneClient } from "@fusionkit/sdk";
6
+ /**
7
+ * `swarmTools()` gives a *cloud orchestrator harness* (Claude Code dynamic
8
+ * workflows, Codex goals — anything run through `HarnessAgent`) the governed
9
+ * dispatch surface it lacks: fan a goal out across cheap local Pi workers,
10
+ * inspect them, compose their disjoint results, and escalate the rest to a
11
+ * cloud target. The orchestration *loop* stays the harness's own; Warrant
12
+ * contributes only the execution boundary, exactly as `remoteTools()` does
13
+ * for app-owned loops.
14
+ *
15
+ * Every tool is host-executed (the harness calls it; this process runs it),
16
+ * each dispatch and escalation is a signed governed run with an offline-
17
+ * verifiable receipt, and the only writes that reach the workspace of record
18
+ * are pulls of those governed runs. The orchestrator's own sandbox is never
19
+ * mirrored back. Judgment is the orchestrator's; the *evidence* it judges on
20
+ * — the deterministic `Scorecard` and the receipt — is Warrant's.
21
+ *
22
+ * Structural invariant: only the orchestrator receives these tools. Workers
23
+ * are plain `pi` runs and cannot dispatch, so fan-out depth is one.
24
+ */
25
+ export type SwarmPlane = PlaneClient | {
26
+ url: string;
27
+ adminToken: string;
28
+ };
29
+ export type SwarmToolsConfig = {
30
+ /** Local git workspace whose state every governed run materializes. */
31
+ workspace: string;
32
+ plane: SwarmPlane;
33
+ /** Pool of runners with a pi harness backend: the cheap local workers. */
34
+ workerPool: string;
35
+ /** Pool that runs escalations (a real-OS tier for the cloud agent). */
36
+ cloudPool: string;
37
+ actor?: ActorRef;
38
+ secrets?: string[];
39
+ allowHosts?: string[];
40
+ allowUntracked?: string[];
41
+ /** Client-side continuation policy (fan-out ceiling, allowed pools). Defaults to localFirst(). */
42
+ policy?: ContinuationPolicy;
43
+ /** Per-run wait ceiling. Defaults to 10 minutes. */
44
+ timeoutMs?: number;
45
+ /** Agent for workers. Defaults to pi (the local-swarm harness). */
46
+ workerAgent?: AgentSpec;
47
+ /** Session tier for workers. Defaults to "hermetic" (just-bash + pi). */
48
+ workerSession?: SessionIsolation;
49
+ /** Session tier for escalations. Defaults to "process". */
50
+ cloudSession?: SessionIsolation;
51
+ /** Agent for escalations. Defaults to claude-code. */
52
+ cloudAgent?: AgentSpec;
53
+ /** Cap on cloud escalations for the lifetime of this toolset. Defaults to the fan-out ceiling. */
54
+ maxEscalations?: number;
55
+ /** Max bytes of each pulled diff returned to the orchestrator. Defaults to 4 KiB. */
56
+ diffExcerptBytes?: number;
57
+ };
58
+ /** Alternative wiring: attach to an existing pi-default continuation context. */
59
+ export type SwarmToolsContextConfig = Omit<SwarmToolsConfig, "workspace" | "plane" | "secrets" | "allowHosts" | "allowUntracked" | "actor" | "policy"> & {
60
+ context: Handoff;
61
+ };
62
+ export type WorkerTaskInput = {
63
+ prompt: string;
64
+ /** Files this worker is meant to touch. Surfaced in the prompt; verified from evidence. */
65
+ fileScope?: string[];
66
+ };
67
+ export type DispatchInput = {
68
+ tasks: WorkerTaskInput[];
69
+ };
70
+ export type DispatchOutput = {
71
+ dispatched: {
72
+ runId: string;
73
+ prompt: string;
74
+ }[];
75
+ /** True when the requested fan-out exceeded the continuation policy ceiling. */
76
+ budgetExceeded: boolean;
77
+ reason: string;
78
+ };
79
+ export type StatusInput = {
80
+ runIds: string[];
81
+ };
82
+ export type StatusOutput = {
83
+ statuses: {
84
+ runId: string;
85
+ status: RunStatus;
86
+ known: boolean;
87
+ }[];
88
+ };
89
+ export type PullInput = {
90
+ runId: string;
91
+ };
92
+ export type PullOutput = {
93
+ runId: string;
94
+ status: RunStatus;
95
+ /** "accepted": pulled onto the workspace; "escalate": failed or overlapping. */
96
+ verdict: "accepted" | "escalate";
97
+ reason: string;
98
+ filesChanged: string[];
99
+ /** Paths that collided with already-pulled work (verdict "escalate"). */
100
+ conflictingPaths?: string[];
101
+ scorecard?: Scorecard;
102
+ diffExcerpt?: string;
103
+ receipt?: {
104
+ contractHash: string;
105
+ eventsHead: string;
106
+ verified: boolean;
107
+ };
108
+ };
109
+ export type EscalateInput = {
110
+ task: string;
111
+ reason?: string;
112
+ };
113
+ export type EscalateOutput = {
114
+ runId?: string;
115
+ status?: RunStatus;
116
+ /** True when the escalation budget for this toolset is exhausted. */
117
+ budgetExceeded: boolean;
118
+ reason: string;
119
+ filesChanged?: string[];
120
+ receipt?: {
121
+ contractHash: string;
122
+ eventsHead: string;
123
+ verified: boolean;
124
+ };
125
+ };
126
+ export type SwarmToolSet = {
127
+ dispatch_workers: Tool<DispatchInput, DispatchOutput>;
128
+ worker_status: Tool<StatusInput, StatusOutput>;
129
+ pull_worker: Tool<PullInput, PullOutput>;
130
+ escalate_task: Tool<EscalateInput, EscalateOutput>;
131
+ };
132
+ /** One evidence record per governed run the orchestrator drove through these tools. */
133
+ export type SwarmRunRecord = {
134
+ tool: "dispatch_workers" | "pull_worker" | "escalate_task";
135
+ runId: string;
136
+ status: RunStatus;
137
+ verdict?: "accepted" | "escalate";
138
+ contractHash?: string;
139
+ receiptVerified?: boolean;
140
+ };
141
+ export type SwarmTools = {
142
+ /** AI SDK-compatible tools; pass as `HarnessAgent`'s `tools`. */
143
+ tools: SwarmToolSet;
144
+ /** One record per governed run driven through these tools. */
145
+ calls(): SwarmRunRecord[];
146
+ /** The underlying pi-default continuation context (trace, summary, …). */
147
+ context: Handoff;
148
+ };
149
+ export declare function swarmTools(config: SwarmToolsConfig | SwarmToolsContextConfig): SwarmTools;