npm - @pugi/sdk - Versions diffs - 0.1.0-alpha.3 - Mend

@pugi/sdk 0.1.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/LICENSE +21 -0
package/README.md +29 -0
package/dist/agent-contracts.d.ts +311 -0
package/dist/agent-contracts.js +67 -0
package/dist/audit-trace.d.ts +387 -0
package/dist/audit-trace.js +44 -0
package/dist/device-flow.d.ts +98 -0
package/dist/device-flow.js +55 -0
package/dist/engine-adapter.d.ts +376 -0
package/dist/engine-adapter.js +47 -0
package/dist/engine-loop.d.ts +457 -0
package/dist/engine-loop.js +342 -0
package/dist/handoff.d.ts +605 -0
package/dist/handoff.js +76 -0
package/dist/index.d.ts +10 -0
package/dist/index.js +10 -0
package/dist/mcp-schemas.d.ts +27 -0
package/dist/mcp-schemas.js +11 -0
package/dist/permission-rules.d.ts +65 -0
package/dist/permission-rules.js +35 -0
package/dist/transport.d.ts +559 -0
package/dist/transport.js +482 -0
package/package.json +47 -0

package/dist/engine-loop.d.ts ADDED Viewed

@@ -0,0 +1,457 @@
+/**
+ * Engine loop protocol — the Pugi CLI's tool-use loop driver.
+ *
+ * The CLI's `NativePugiEngineAdapter` runs a structured tool-use loop against
+ * Anvil. Each turn the CLI sends the conversation transcript + a tools schema
+ * to the runtime; the runtime returns either a final text answer or a list of
+ * tool calls. The CLI executes the calls locally (read/write/edit/grep/glob/
+ * bash) against the workspace and feeds the results back in the next turn.
+ *
+ * This module defines the contracts shared by:
+ *   - CLI side: the loop driver (`runEngineLoop`) + budget enforcement.
+ *   - Runtime side: a thin proxy in front of `AnvilBridgeService.askPersona`.
+ *   - Tests: a fixture-based `EngineLoopClient` that returns canned responses
+ *     so the loop can be exercised without network.
+ *
+ * Local-first contract (ADR-0037):
+ *   - The CLI is the only side that touches the filesystem. The runtime
+ *     never sees raw file bytes — only the tool results that the local
+ *     loop chooses to surface back into the transcript.
+ *   - Budgets (`maxToolCalls`, `maxTokens`) are enforced client-side so a
+ *     runaway model cannot rack up Anvil cost without the operator noticing.
+ *   - The loop refuses to write/edit/bash when the command kind is `plan`.
+ *
+ * Why OpenAI-compatible shape (instead of Anthropic's tool_use blocks):
+ *   - Anvil's chat-completions endpoint is OpenAI-compatible; coercing to
+ *     OpenAI-style `tools` + `tool_calls` matches the upstream wire format
+ *     exactly. Providers that natively speak Anthropic (Claude) are wrapped
+ *     by Anvil's bridge layer — that translation is not the CLI's concern.
+ */
+import { z } from 'zod';
+/**
+ * Command surface that the CLI invokes. The runtime uses this to select a
+ * system prompt and persona behaviour:
+ *   - `code` — general edit+create. Budget: 20 tool calls / 50k tokens.
+ *   - `explain` — read-only walkthrough. Budget: 5 / 20k.
+ *   - `fix`  — bug investigation + targeted patch. Budget: 20 / 50k.
+ *   - `plan` — produce a plan artifact, no mutations. Budget: 3 / 30k.
+ *               Mutating tools refused even if the model requests them.
+ *   - `build` — multi-file scaffolding. Budget: 30 / 80k.
+ */
+export declare const engineCommandKindSchema: z.ZodEnum<["code", "explain", "fix", "plan", "build"]>;
+export type EngineCommandKind = z.infer<typeof engineCommandKindSchema>;
+/**
+ * Per-command budget envelope. Hard caps enforced inside `runEngineLoop`:
+ *   - `maxToolCalls` — total executed tool calls across all turns.
+ *   - `maxTokens` — total tokens accumulated (prompt + completion) across
+ *     turns. Counted via `usage.totalTokens` reported by the runtime; when
+ *     the runtime reports `tokensUsed === 0` we fall back to a
+ *     `transcript-chars / 4` heuristic so a runtime that omits usage
+ *     accounting (older Anvil builds, fixture clients, providers that
+ *     return null usage on tool_use responses) still trips the budget
+ *     instead of looping forever. Code Reviewer P2 retro 2026-05-23.
+ *
+ * The loop terminates with `status: 'budget_exhausted'` when either cap is
+ * exceeded. The caller decides whether that is a failure or a normal stop.
+ */
+export declare const engineBudgetSchema: z.ZodObject<{
+    maxToolCalls: z.ZodNumber;
+    maxTokens: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    maxToolCalls: number;
+    maxTokens: number;
+}, {
+    maxToolCalls: number;
+    maxTokens: number;
+}>;
+export type EngineBudget = z.infer<typeof engineBudgetSchema>;
+/**
+ * Canonical per-command budgets. Tuned to keep Anvil cost predictable while
+ * still giving `build` enough headroom to scaffold a small feature.
+ *
+ *   code/fix → 20 calls / 50k tokens
+ *   explain  → 5 calls  / 20k tokens
+ *   plan     → 8 calls  / 30k tokens   (read-only)
+ *   build    → 30 calls / 80k tokens
+ *
+ * Dogfood note 2026-05-24: `plan` was originally budgeted at 3 tool calls
+ * on the assumption that the model would issue 1-2 read calls + emit the
+ * plan. Real-world traces show 3-4 glob/grep calls disappear into repo
+ * surveying alone — the model produces zero plan output and the artifact
+ * file says `[budget_exhausted]`. Bumping to 8 buys breathing room for
+ * decently-sized repos while still bounding cost. plan stays read-only at
+ * the sentinel level — the call-count change does not weaken safety.
+ */
+export declare const defaultEngineBudgets: Record<EngineCommandKind, EngineBudget>;
+/**
+ * Message role shape — mirrors OpenAI's chat-completions schema with a
+ * `tool` role for tool result frames. Pugi's runtime proxy maps these to
+ * AnvilBridgeMessage (which has the same shape modulo `name` carrying the
+ * tool_call_id for tool frames).
+ */
+export declare const engineLoopMessageSchema: z.ZodObject<{
+    role: z.ZodEnum<["system", "user", "assistant", "tool"]>;
+    content: z.ZodString;
+    /** Optional model-emitted tool calls when `role === 'assistant'`. */
+    toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        name: z.ZodString;
+        arguments: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        id: string;
+        name: string;
+        arguments: string;
+    }, {
+        id: string;
+        name: string;
+        arguments: string;
+    }>, "many">>;
+    /** Tool call id this `tool` frame is responding to. */
+    toolCallId: z.ZodOptional<z.ZodString>;
+    /** Tool name this `tool` frame is responding to. */
+    toolName: z.ZodOptional<z.ZodString>;
+}, "strip", z.ZodTypeAny, {
+    role: "system" | "user" | "assistant" | "tool";
+    content: string;
+    toolCalls?: {
+        id: string;
+        name: string;
+        arguments: string;
+    }[] | undefined;
+    toolCallId?: string | undefined;
+    toolName?: string | undefined;
+}, {
+    role: "system" | "user" | "assistant" | "tool";
+    content: string;
+    toolCalls?: {
+        id: string;
+        name: string;
+        arguments: string;
+    }[] | undefined;
+    toolCallId?: string | undefined;
+    toolName?: string | undefined;
+}>;
+export type EngineLoopMessage = z.infer<typeof engineLoopMessageSchema>;
+/**
+ * OpenAI-compatible tool definition. The CLI builds this from
+ * `toolRegistry`. `parameters` is a JSON Schema object — we keep it as
+ * `unknown` here so the SDK stays JSON-Schema-version-agnostic.
+ */
+export declare const engineLoopToolSchema: z.ZodObject<{
+    name: z.ZodString;
+    description: z.ZodString;
+    parameters: z.ZodUnknown;
+}, "strip", z.ZodTypeAny, {
+    name: string;
+    description: string;
+    parameters?: unknown;
+}, {
+    name: string;
+    description: string;
+    parameters?: unknown;
+}>;
+export type EngineLoopTool = z.infer<typeof engineLoopToolSchema>;
+/**
+ * Response shape returned by `EngineLoopClient.send`. Exactly one of the
+ * three terminal states fires per turn:
+ *   - `tool_use`: model emitted N tool calls; loop must execute and feed
+ *     results back.
+ *   - `text`: model emitted a final text answer; loop stops.
+ *   - `error`: runtime call failed (auth, network, schema). Loop stops
+ *     with `status: 'failed'` so the caller surfaces the cause.
+ */
+export type EngineLoopResponse = {
+    stop: 'tool_use';
+    assistantMessage: EngineLoopMessage;
+    tokensUsed: number;
+} | {
+    stop: 'text';
+    assistantMessage: EngineLoopMessage;
+    content: string;
+    tokensUsed: number;
+} | {
+    stop: 'error';
+    code: 'auth_missing' | 'endpoint_missing' | 'rate_limited' | 'failed';
+    message: string;
+};
+/**
+ * Pluggable transport. Production binds this to an Anvil-backed HTTP
+ * client; tests bind a fixture client. The interface is intentionally
+ * narrow — just `send(messages, tools, options)` — so the loop driver
+ * does not depend on whether the runtime is HTTP, gRPC, or in-process.
+ */
+export interface EngineLoopClient {
+    send(messages: EngineLoopMessage[], tools: EngineLoopTool[], options: {
+        personaSlug: string;
+        maxTokens?: number;
+        temperature?: number;
+        signal?: AbortSignal;
+    }): Promise<EngineLoopResponse>;
+}
+/**
+ * Tool execution callback. Returns the textual result to feed back into
+ * the next turn's `tool` message. Throws → execution failed (the CLI
+ * captures the error string and feeds it back so the model can recover).
+ */
+export type EngineLoopToolExecutor = (input: {
+    name: string;
+    arguments: string;
+    callId: string;
+}) => Promise<string>;
+/**
+ * Lifecycle hooks. The CLI side wires these to `session.ts`
+ * recordToolCall/recordToolResult so the existing event log + index
+ * reflects every loop iteration.
+ */
+export interface EngineLoopHooks {
+    onTurnStart?(turnIndex: number, messageCount: number): void;
+    onTurnComplete?(turnIndex: number, response: EngineLoopResponse): void;
+    onToolCall?(call: {
+        id: string;
+        name: string;
+        arguments: string;
+    }): void;
+    onToolResult?(call: {
+        id: string;
+        name: string;
+    }, result: {
+        ok: true;
+        content: string;
+    } | {
+        ok: false;
+        error: string;
+    }): void;
+}
+/**
+ * Terminal status of the loop driver. The CLI maps these to the existing
+ * `EngineResult.status` ('done' | 'blocked' | 'failed'):
+ *   - completed         → done
+ *   - budget_exhausted  → blocked (operator chose the budget)
+ *   - tool_refused      → blocked (plan mode + write requested)
+ *   - failed            → failed (runtime/transport problem)
+ */
+export type EngineLoopStatus = 'completed' | 'budget_exhausted' | 'tool_refused' | 'failed';
+export interface EngineLoopOutcome {
+    status: EngineLoopStatus;
+    /** Final assistant text (empty string on failure). */
+    finalText: string;
+    /** Number of tool calls actually executed. */
+    toolCallCount: number;
+    /** Cumulative tokens reported by the runtime. */
+    tokensUsed: number;
+    /** Number of LLM turns (network round-trips) used. */
+    turnsUsed: number;
+    /** Failure reason when `status !== 'completed'`. */
+    reason?: string;
+}
+/**
+ * Core driver. Pure transport-agnostic loop:
+ *
+ *   1. Prepend system + user messages.
+ *   2. Call `client.send(transcript, tools)`.
+ *   3. If response is `text` → return completed.
+ *   4. If response is `tool_use` → execute each call via `executor`,
+ *      append the assistant + tool frames to the transcript, increment
+ *      counters, loop.
+ *   5. After every turn check budgets; bail if exceeded.
+ *
+ * No filesystem access lives here — the CLI's `engine-tools.ts` is the
+ * sole place that touches disk. Keeping the loop pure makes it trivial
+ * to unit-test with a fixture client.
+ */
+export declare function runEngineLoop(input: {
+    client: EngineLoopClient;
+    executor: EngineLoopToolExecutor;
+    systemPrompt: string;
+    userPrompt: string;
+    tools: EngineLoopTool[];
+    budget: EngineBudget;
+    personaSlug: string;
+    hooks?: EngineLoopHooks;
+    temperature?: number;
+    signal?: AbortSignal;
+}): Promise<EngineLoopOutcome>;
+/**
+ * Server wire request — what `AnvilEngineLoopClient` POSTs to
+ * `POST /api/pugi/engine` on every turn. Sprint 2E proxy endpoint
+ * mirrors this Zod schema admin-api-side so the contract has a single
+ * source of truth.
+ *
+ * Required fields:
+ *   - `messages` — transcript so far (system + user + assistant + tool).
+ *   - `tools` — tool registry the runtime is allowed to invoke for this
+ *     turn. The CLI strips mutating tools when `command === 'plan'`;
+ *     the server defends against forged bodies via a second-layer check.
+ *   - `personaSlug` — persona to invoke; the server uses this for
+ *     persona system-prompt injection + consensus-tier resolution.
+ *
+ * Optional fields (the CLI only supplies a subset today; the schema
+ * accepts every documented knob so Sprint 3+ tooling can opt in
+ * without a contract change):
+ *   - `command` — engine command kind. When present the server picks a
+ *     per-command model from `PUGI_ENGINE_MODEL_<COMMAND>` env or
+ *     hardcoded default. When absent the server falls back to the
+ *     persona's `defaultModel`.
+ *   - `model` — explicit model override. Wins over `command` resolution.
+ *     Useful for tier-aware operators who want to pin a model.
+ *   - `maxTokens` — upper bound on completion size for this turn.
+ *   - `temperature` — sampling temperature for this turn.
+ */
+export declare const engineLoopServerRequestSchema: z.ZodObject<{
+    personaSlug: z.ZodString;
+    messages: z.ZodArray<z.ZodObject<{
+        role: z.ZodEnum<["system", "user", "assistant", "tool"]>;
+        content: z.ZodString;
+        /** Optional model-emitted tool calls when `role === 'assistant'`. */
+        toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+            id: z.ZodString;
+            name: z.ZodString;
+            arguments: z.ZodString;
+        }, "strip", z.ZodTypeAny, {
+            id: string;
+            name: string;
+            arguments: string;
+        }, {
+            id: string;
+            name: string;
+            arguments: string;
+        }>, "many">>;
+        /** Tool call id this `tool` frame is responding to. */
+        toolCallId: z.ZodOptional<z.ZodString>;
+        /** Tool name this `tool` frame is responding to. */
+        toolName: z.ZodOptional<z.ZodString>;
+    }, "strip", z.ZodTypeAny, {
+        role: "system" | "user" | "assistant" | "tool";
+        content: string;
+        toolCalls?: {
+            id: string;
+            name: string;
+            arguments: string;
+        }[] | undefined;
+        toolCallId?: string | undefined;
+        toolName?: string | undefined;
+    }, {
+        role: "system" | "user" | "assistant" | "tool";
+        content: string;
+        toolCalls?: {
+            id: string;
+            name: string;
+            arguments: string;
+        }[] | undefined;
+        toolCallId?: string | undefined;
+        toolName?: string | undefined;
+    }>, "many">;
+    tools: z.ZodArray<z.ZodObject<{
+        name: z.ZodString;
+        description: z.ZodString;
+        parameters: z.ZodUnknown;
+    }, "strip", z.ZodTypeAny, {
+        name: string;
+        description: string;
+        parameters?: unknown;
+    }, {
+        name: string;
+        description: string;
+        parameters?: unknown;
+    }>, "many">;
+    command: z.ZodOptional<z.ZodEnum<["code", "explain", "fix", "plan", "build"]>>;
+    model: z.ZodOptional<z.ZodString>;
+    maxTokens: z.ZodOptional<z.ZodNumber>;
+    temperature: z.ZodOptional<z.ZodNumber>;
+}, "strip", z.ZodTypeAny, {
+    personaSlug: string;
+    messages: {
+        role: "system" | "user" | "assistant" | "tool";
+        content: string;
+        toolCalls?: {
+            id: string;
+            name: string;
+            arguments: string;
+        }[] | undefined;
+        toolCallId?: string | undefined;
+        toolName?: string | undefined;
+    }[];
+    tools: {
+        name: string;
+        description: string;
+        parameters?: unknown;
+    }[];
+    maxTokens?: number | undefined;
+    command?: "code" | "explain" | "fix" | "plan" | "build" | undefined;
+    model?: string | undefined;
+    temperature?: number | undefined;
+}, {
+    personaSlug: string;
+    messages: {
+        role: "system" | "user" | "assistant" | "tool";
+        content: string;
+        toolCalls?: {
+            id: string;
+            name: string;
+            arguments: string;
+        }[] | undefined;
+        toolCallId?: string | undefined;
+        toolName?: string | undefined;
+    }[];
+    tools: {
+        name: string;
+        description: string;
+        parameters?: unknown;
+    }[];
+    maxTokens?: number | undefined;
+    command?: "code" | "explain" | "fix" | "plan" | "build" | undefined;
+    model?: string | undefined;
+    temperature?: number | undefined;
+}>;
+export type EngineLoopServerRequest = z.infer<typeof engineLoopServerRequestSchema>;
+/**
+ * Server wire response — what the admin-api Sprint 2E endpoint returns
+ * for every turn. The shape matches what `AnvilEngineLoopClient` parses:
+ *
+ *   - `stop === 'text'` — model produced a final answer, loop terminates.
+ *   - `stop === 'tool_use'` — model emitted `toolCalls`, CLI executes
+ *     them locally and feeds results back next turn.
+ *   - `stop === 'length'` — completion truncated by `maxTokens`. The
+ *     CLI treats this as final text and stops; surface partial content.
+ */
+export declare const engineLoopServerResponseSchema: z.ZodObject<{
+    stop: z.ZodEnum<["text", "tool_use", "length"]>;
+    content: z.ZodString;
+    toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
+        id: z.ZodString;
+        name: z.ZodString;
+        arguments: z.ZodString;
+    }, "strip", z.ZodTypeAny, {
+        id: string;
+        name: string;
+        arguments: string;
+    }, {
+        id: string;
+        name: string;
+        arguments: string;
+    }>, "many">>;
+    tokensUsed: z.ZodNumber;
+    model: z.ZodString;
+}, "strip", z.ZodTypeAny, {
+    content: string;
+    model: string;
+    stop: "length" | "text" | "tool_use";
+    tokensUsed: number;
+    toolCalls?: {
+        id: string;
+        name: string;
+        arguments: string;
+    }[] | undefined;
+}, {
+    content: string;
+    model: string;
+    stop: "length" | "text" | "tool_use";
+    tokensUsed: number;
+    toolCalls?: {
+        id: string;
+        name: string;
+        arguments: string;
+    }[] | undefined;
+}>;
+export type EngineLoopServerResponse = z.infer<typeof engineLoopServerResponseSchema>;
+//# sourceMappingURL=engine-loop.d.ts.map