npm - @loreai/gateway - Versions diffs - 0.14.0 → 0.14.1 - Mend

@loreai/gateway 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/bin.cjs +27 -0
package/dist/index.cjs +1042 -0
package/dist/index.d.cts +21 -0
package/package.json +10 -10
package/dist/index.js +0 -50087
package/src/auth.ts +0 -133
package/src/batch-queue.ts +0 -575
package/src/cache-analytics.ts +0 -344
package/src/cli/agents.ts +0 -107
package/src/cli/bin.ts +0 -11
package/src/cli/help.ts +0 -55
package/src/cli/lib/binary.ts +0 -353
package/src/cli/lib/bspatch.ts +0 -306
package/src/cli/lib/delta-upgrade.ts +0 -790
package/src/cli/lib/errors.ts +0 -48
package/src/cli/lib/ghcr.ts +0 -389
package/src/cli/lib/patch-cache.ts +0 -342
package/src/cli/lib/upgrade.ts +0 -454
package/src/cli/lib/version-check.ts +0 -385
package/src/cli/main.ts +0 -152
package/src/cli/run.ts +0 -181
package/src/cli/start.ts +0 -82
package/src/cli/upgrade.ts +0 -311
package/src/cli/version.ts +0 -22
package/src/compaction.ts +0 -195
package/src/config.ts +0 -199
package/src/idle.ts +0 -240
package/src/index.ts +0 -41
package/src/llm-adapter.ts +0 -182
package/src/pipeline.ts +0 -1681
package/src/recall.ts +0 -433
package/src/recorder.ts +0 -192
package/src/server.ts +0 -250
package/src/session.ts +0 -207
package/src/stream/anthropic.ts +0 -708
package/src/temporal-adapter.ts +0 -310
package/src/translate/anthropic.ts +0 -469
package/src/translate/openai.ts +0 -536
package/src/translate/types.ts +0 -222
package/src/worker-model.ts +0 -408

package/src/translate/anthropic.ts DELETED Viewed

@@ -1,469 +0,0 @@
-/**
- * Anthropic ↔ Gateway translation layer.
- *
- * Converts between Anthropic's `/v1/messages` API format and the gateway's
- * internal `GatewayRequest`/`GatewayResponse` types. The parser is lenient —
- * unknown fields pass through in `metadata` rather than causing errors.
- */
-import type {
-  GatewayContentBlock,
-  GatewayMessage,
-  GatewayRequest,
-  GatewayResponse,
-  GatewayTool,
-} from "./types";
-import { extractAuth, authHeaders } from "../auth";
-// ---------------------------------------------------------------------------
-// Anthropic API version — used in all outgoing requests
-// ---------------------------------------------------------------------------
-const ANTHROPIC_VERSION = "2023-06-01";
-// ---------------------------------------------------------------------------
-// Fields the gateway reads/writes — everything else goes into `metadata`
-// ---------------------------------------------------------------------------
-/** Top-level body fields that are extracted into `GatewayRequest` fields. */
-const KNOWN_BODY_FIELDS = new Set([
-  "model",
-  "system",
-  "messages",
-  "tools",
-  "max_tokens",
-  "stream",
-]);
-// ---------------------------------------------------------------------------
-// Helpers — content block translation
-// ---------------------------------------------------------------------------
-/**
- * Normalize an Anthropic content block (from a message's `content` array)
- * into a `GatewayContentBlock`. Unknown block types are preserved as text
- * blocks with a JSON dump so no information is lost.
- */
-function toGatewayBlock(block: Record<string, unknown>): GatewayContentBlock {
-  switch (block.type) {
-    case "text":
-      return { type: "text", text: String(block.text ?? "") };
-    case "thinking":
-      return {
-        type: "thinking",
-        thinking: String(block.thinking ?? ""),
-        ...(block.signature != null
-          ? { signature: String(block.signature) }
-          : undefined),
-      };
-    case "tool_use":
-      return {
-        type: "tool_use",
-        id: String(block.id ?? ""),
-        name: String(block.name ?? ""),
-        input: block.input,
-      };
-    case "tool_result": {
-      // Anthropic `tool_result` content can be a string or array of blocks.
-      let content = "";
-      if (typeof block.content === "string") {
-        content = block.content;
-      } else if (Array.isArray(block.content)) {
-        content = (block.content as Array<Record<string, unknown>>)
-          .filter((b) => b.type === "text")
-          .map((b) => String(b.text ?? ""))
-          .join("\n");
-      }
-      return {
-        type: "tool_result",
-        toolUseId: String(block.tool_use_id ?? ""),
-        content,
-        ...(block.is_error ? { isError: true } : undefined),
-      };
-    }
-    default:
-      // Unknown block type — preserve as text so nothing is silently dropped
-      return { type: "text", text: JSON.stringify(block) };
-  }
-}
-/**
- * Normalize Anthropic message content (string or array of blocks) into
- * a `GatewayContentBlock[]`.
- */
-function normalizeContent(content: unknown): GatewayContentBlock[] {
-  if (typeof content === "string") {
-    return [{ type: "text", text: content }];
-  }
-  if (Array.isArray(content)) {
-    return content.map((block) =>
-      toGatewayBlock(block as Record<string, unknown>),
-    );
-  }
-  // Null / undefined / unexpected → empty
-  return [];
-}
-/**
- * Normalize Anthropic's `system` field. Can be:
- *  - `undefined` / `null`  → `""`
- *  - a plain string         → used directly
- *  - an array of content blocks (e.g. with `cache_control`) → join text blocks
- */
-function normalizeSystem(system: unknown): string {
-  if (system == null) return "";
-  if (typeof system === "string") return system;
-  if (Array.isArray(system)) {
-    return (system as Array<Record<string, unknown>>)
-      .filter((block) => block.type === "text")
-      .map((block) => String(block.text ?? ""))
-      .join("\n");
-  }
-  return String(system);
-}
-// ---------------------------------------------------------------------------
-// Reverse helpers — gateway blocks → Anthropic format
-// ---------------------------------------------------------------------------
-/**
- * Convert a `GatewayContentBlock` back to Anthropic's wire format.
- */
-function toAnthropicBlock(
-  block: GatewayContentBlock,
-): Record<string, unknown> {
-  switch (block.type) {
-    case "text":
-      return { type: "text", text: block.text };
-    case "thinking":
-      return {
-        type: "thinking",
-        thinking: block.thinking,
-        ...(block.signature != null ? { signature: block.signature } : undefined),
-      };
-    case "tool_use":
-      return {
-        type: "tool_use",
-        id: block.id,
-        name: block.name,
-        input: block.input,
-      };
-    case "tool_result": {
-      const result: Record<string, unknown> = {
-        type: "tool_result",
-        tool_use_id: block.toolUseId,
-        content: block.content,
-      };
-      if (block.isError) result.is_error = true;
-      return result;
-    }
-  }
-}
-// ---------------------------------------------------------------------------
-// parseAnthropicRequest
-// ---------------------------------------------------------------------------
-/**
- * Parse a raw Anthropic `/v1/messages` request body into a `GatewayRequest`.
- *
- * Lenient: unknown top-level fields are preserved in `metadata` for
- * faithful upstream forwarding. Content normalization handles both
- * string and array forms.
- */
-export function parseAnthropicRequest(
-  body: unknown,
-  headers: Record<string, string>,
-): GatewayRequest {
-  const raw = (body ?? {}) as Record<string, unknown>;
-  // --- Extract known fields ---
-  const model = String(raw.model ?? "");
-  const system = normalizeSystem(raw.system);
-  const stream = raw.stream === true;
-  const maxTokens =
-    typeof raw.max_tokens === "number" ? raw.max_tokens : 4096;
-  // --- Messages ---
-  const rawMessages = Array.isArray(raw.messages) ? raw.messages : [];
-  const messages: GatewayMessage[] = rawMessages.map(
-    (msg: Record<string, unknown>) => ({
-      role: msg.role === "assistant" ? "assistant" : "user",
-      content: normalizeContent(msg.content),
-    }),
-  );
-  // --- Tools ---
-  const rawTools = Array.isArray(raw.tools) ? raw.tools : [];
-  const tools: GatewayTool[] = rawTools.map(
-    (t: Record<string, unknown>) => ({
-      name: String(t.name ?? ""),
-      description: String(t.description ?? ""),
-      inputSchema: (t.input_schema as Record<string, unknown>) ?? {},
-    }),
-  );
-  // --- Metadata: everything the gateway doesn't explicitly process ---
-  const metadata: Record<string, unknown> = {};
-  for (const [key, value] of Object.entries(raw)) {
-    if (!KNOWN_BODY_FIELDS.has(key)) {
-      metadata[key] = value;
-    }
-  }
-  return {
-    protocol: "anthropic",
-    model,
-    system,
-    messages,
-    tools,
-    stream,
-    maxTokens,
-    metadata,
-    rawHeaders: headers,
-  };
-}
-// ---------------------------------------------------------------------------
-// Caching options
-// ---------------------------------------------------------------------------
-/**
- * Options controlling Anthropic prompt caching behavior.
- *
- * Two independent mechanisms:
- *  1. **System prompt caching**: sends `system` as a block array with an
- *     explicit `cache_control` breakpoint. This is the highest-stability
- *     cache slot — the system prompt rarely changes within a session.
- *  2. **Conversation caching**: places an explicit `cache_control` breakpoint
- *     on the last message block, enabling Anthropic to cache the conversation
- *     prefix up to that point. Between consecutive stable turns (same gradient
- *     layer, no distillation arrival, no window eviction), the prefix is
- *     byte-identical → cache reads at 0.1× base cost vs 1× uncached.
- *
- * Title/summary passthrough requests should NEVER enable caching — their
- * content varies every call, producing 1.25× write cost with zero reads.
- */
-export type AnthropicCacheOptions = {
-  /**
-   * Cache the system prompt with an explicit breakpoint.
-   * - `"5m"` — default 5-minute TTL (conversation turns, frequent enough
-   *   for 5m refresh)
-   * - `"1h"` — extended 1-hour TTL (worker calls that come in bursts
-   *   separated by minutes of user thinking)
-   * - `false` — no system caching
-   */
-  systemTTL?: "5m" | "1h" | false;
-  /**
-   * LTM knowledge text to inject as a separate system block after the host
-   * prompt. Keeping it in a separate block means the host prompt gets its
-   * own cache breakpoint (1h) and LTM changes don't bust the host prefix.
-   *
-   * When provided AND systemTTL is set, the system becomes a 2-block array:
-   *   system[0]: host prompt  — cache_control with systemTTL
-   *   system[1]: LTM content  — no cache_control (benefits from prefix)
-   */
-  ltmSystem?: string;
-  /**
-   * Cache the last tool definition with an explicit 1h breakpoint.
-   * Tool definitions (including our injected recall tool) are stable
-   * across turns — caching them avoids re-processing on every request.
-   */
-  cacheTools?: boolean;
-  /**
-   * Place an explicit `cache_control` breakpoint on the last block of the
-   * last message, enabling Anthropic to cache the conversation prefix.
-   *
-   * When `true`, the gateway adds `cache_control: { type: "ephemeral" }`
-   * to the final content block. On the next turn, Anthropic's lookback
-   * window finds the prior breakpoint, reads the cached prefix (0.1×
-   * cost), and writes only the new tail (1.25×).
-   */
-  cacheConversation?: boolean;
-};
-// ---------------------------------------------------------------------------
-// buildAnthropicRequest
-// ---------------------------------------------------------------------------
-/**
- * Convert a `GatewayRequest` back to Anthropic API format for upstream
- * forwarding.
- *
- * Returns the relative path, headers, and JSON body. The caller prepends
- * the upstream base URL.
- *
- * @param req   The normalized gateway request
- * @param cache Optional caching configuration. When omitted, no
- *              `cache_control` annotations are added (passthrough behavior).
- */
-export function buildAnthropicRequest(
-  req: GatewayRequest,
-  cache?: AnthropicCacheOptions,
-): {
-  url: string;
-  headers: Record<string, string>;
-  body: unknown;
-} {
-  // --- Headers ---
-  const headers: Record<string, string> = {
-    "content-type": "application/json",
-    "anthropic-version": ANTHROPIC_VERSION,
-  };
-  // Forward auth from the original request (API key or OAuth Bearer)
-  const cred = extractAuth(req.rawHeaders);
-  if (cred) {
-    Object.assign(headers, authHeaders(cred));
-  }
-  // Forward anthropic-beta if present (enables features like extended thinking)
-  const beta =
-    req.rawHeaders["anthropic-beta"] || req.rawHeaders["Anthropic-Beta"] || "";
-  if (beta) {
-    headers["anthropic-beta"] = beta;
-  }
-  // --- Body ---
-  const body: Record<string, unknown> = {
-    model: req.model,
-    max_tokens: req.maxTokens,
-    stream: req.stream,
-  };
-  // System — only include if non-empty
-  if (req.system) {
-    const systemTTL = cache?.systemTTL;
-    const ltmText = cache?.ltmSystem;
-    if (systemTTL) {
-      // Send as block array with explicit cache_control breakpoint on the
-      // host prompt. The host prompt is the most stable part (changes only
-      // when the host mutates AGENTS.md, memory, etc.) so it gets a 1h TTL.
-      const cacheControl: Record<string, string> =
-        systemTTL === "1h"
-          ? { type: "ephemeral", ttl: "1h" }
-          : { type: "ephemeral" };
-      const blocks: Record<string, unknown>[] = [
-        { type: "text", text: req.system, cache_control: cacheControl },
-      ];
-      // LTM knowledge as a separate block — no cache_control of its own,
-      // but benefits from the host prompt prefix cache. When LTM changes,
-      // only this block and everything after it is re-processed; the host
-      // prompt prefix is still a cache read.
-      if (ltmText) {
-        blocks.push({ type: "text", text: ltmText });
-      }
-      body.system = blocks;
-    } else {
-      // No caching — concatenate LTM into a single string.
-      body.system = ltmText ? `${req.system}\n\n${ltmText}` : req.system;
-    }
-  }
-  // Messages
-  const messages = req.messages.map((msg) => ({
-    role: msg.role,
-    content: msg.content.map(toAnthropicBlock),
-  }));
-  // Conversation caching: place a breakpoint on the final content block of
-  // the last message. Anthropic's 20-block lookback finds the prior turn's
-  // breakpoint, reads the cached prefix, and writes only the new tail.
-  if (cache?.cacheConversation && messages.length > 0) {
-    const lastMsg = messages[messages.length - 1]!;
-    if (lastMsg.content.length > 0) {
-      const lastBlock = lastMsg.content[lastMsg.content.length - 1]!;
-      (lastBlock as Record<string, unknown>).cache_control = {
-        type: "ephemeral",
-      };
-    }
-  }
-  body.messages = messages;
-  // Tools — only include if present
-  if (req.tools.length > 0) {
-    const tools = req.tools.map((t) => ({
-      name: t.name,
-      description: t.description,
-      input_schema: t.inputSchema,
-    }));
-    // Tool caching: place a 1h breakpoint on the last tool definition.
-    // Tool definitions (including our recall tool) are stable across turns.
-    if (cache?.cacheTools && tools.length > 0) {
-      const lastTool = tools[tools.length - 1]!;
-      (lastTool as Record<string, unknown>).cache_control = {
-        type: "ephemeral",
-        ttl: "1h",
-      };
-    }
-    body.tools = tools;
-  }
-  // Restore all metadata params (temperature, top_p, stop_sequences, etc.)
-  for (const [key, value] of Object.entries(req.metadata)) {
-    body[key] = value;
-  }
-  return {
-    url: "/v1/messages",
-    headers,
-    body,
-  };
-}
-// ---------------------------------------------------------------------------
-// buildAnthropicNonStreamResponse
-// ---------------------------------------------------------------------------
-/**
- * Build a non-streaming Anthropic response JSON from a `GatewayResponse`.
- *
- * Produces the standard Anthropic `/v1/messages` response shape with
- * `type: "message"`, `role: "assistant"`, content blocks, and usage.
- */
-export function buildAnthropicNonStreamResponse(
-  resp: GatewayResponse,
-): unknown {
-  const usage: Record<string, number> = {
-    input_tokens: resp.usage.inputTokens,
-    output_tokens: resp.usage.outputTokens,
-  };
-  if (resp.usage.cacheReadInputTokens != null) {
-    usage.cache_read_input_tokens = resp.usage.cacheReadInputTokens;
-  }
-  if (resp.usage.cacheCreationInputTokens != null) {
-    usage.cache_creation_input_tokens = resp.usage.cacheCreationInputTokens;
-  }
-  return {
-    id: resp.id,
-    type: "message",
-    role: "assistant",
-    model: resp.model,
-    content: resp.content.map(toAnthropicBlock),
-    stop_reason: resp.stopReason,
-    stop_sequence: null,
-    usage,
-  };
-}