npm - @loreai/gateway - Versions diffs - 0.14.0 → 0.14.1 - Mend

@loreai/gateway 0.14.0 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/dist/bin.cjs +27 -0
package/dist/index.cjs +1042 -0
package/dist/index.d.cts +21 -0
package/package.json +10 -10
package/dist/index.js +0 -50087
package/src/auth.ts +0 -133
package/src/batch-queue.ts +0 -575
package/src/cache-analytics.ts +0 -344
package/src/cli/agents.ts +0 -107
package/src/cli/bin.ts +0 -11
package/src/cli/help.ts +0 -55
package/src/cli/lib/binary.ts +0 -353
package/src/cli/lib/bspatch.ts +0 -306
package/src/cli/lib/delta-upgrade.ts +0 -790
package/src/cli/lib/errors.ts +0 -48
package/src/cli/lib/ghcr.ts +0 -389
package/src/cli/lib/patch-cache.ts +0 -342
package/src/cli/lib/upgrade.ts +0 -454
package/src/cli/lib/version-check.ts +0 -385
package/src/cli/main.ts +0 -152
package/src/cli/run.ts +0 -181
package/src/cli/start.ts +0 -82
package/src/cli/upgrade.ts +0 -311
package/src/cli/version.ts +0 -22
package/src/compaction.ts +0 -195
package/src/config.ts +0 -199
package/src/idle.ts +0 -240
package/src/index.ts +0 -41
package/src/llm-adapter.ts +0 -182
package/src/pipeline.ts +0 -1681
package/src/recall.ts +0 -433
package/src/recorder.ts +0 -192
package/src/server.ts +0 -250
package/src/session.ts +0 -207
package/src/stream/anthropic.ts +0 -708
package/src/temporal-adapter.ts +0 -310
package/src/translate/anthropic.ts +0 -469
package/src/translate/openai.ts +0 -536
package/src/translate/types.ts +0 -222
package/src/worker-model.ts +0 -408

package/src/translate/types.ts DELETED Viewed

@@ -1,222 +0,0 @@
-/**
- * Internal representation types for the Lore gateway.
- *
- * The gateway accepts both Anthropic (`/v1/messages`) and OpenAI
- * (`/v1/chat/completions`) protocol requests, normalizes them into these
- * types for Lore pipeline processing, then translates back to the original
- * protocol for the upstream response.
- *
- * Design: types are intentionally minimal — only fields that Lore's context
- * management (gradient, LTM, distillation) actually reads/writes. Protocol-
- * specific fields the gateway doesn't process live in `metadata`.
- */
-// ---------------------------------------------------------------------------
-// Content blocks — discriminated union on `type`
-// ---------------------------------------------------------------------------
-export type GatewayTextBlock = {
-  type: "text";
-  text: string;
-};
-export type GatewayThinkingBlock = {
-  type: "thinking";
-  thinking: string;
-  /** Anthropic extended thinking signature, opaque bytes. */
-  signature?: string;
-};
-export type GatewayToolUseBlock = {
-  type: "tool_use";
-  /** Provider-assigned tool call ID (e.g. `toolu_…` for Anthropic). */
-  id: string;
-  name: string;
-  input: unknown;
-};
-export type GatewayToolResultBlock = {
-  type: "tool_result";
-  /** ID of the tool_use block this result corresponds to. */
-  toolUseId: string;
-  content: string;
-  isError?: boolean;
-};
-export type GatewayContentBlock =
-  | GatewayTextBlock
-  | GatewayThinkingBlock
-  | GatewayToolUseBlock
-  | GatewayToolResultBlock;
-// ---------------------------------------------------------------------------
-// Messages
-// ---------------------------------------------------------------------------
-/** Normalized message — system messages are extracted to `GatewayRequest.system`. */
-export type GatewayMessage = {
-  role: "user" | "assistant";
-  content: GatewayContentBlock[];
-};
-// ---------------------------------------------------------------------------
-// Tools
-// ---------------------------------------------------------------------------
-/** Normalized tool definition. Both protocols use JSON Schema for input. */
-export type GatewayTool = {
-  name: string;
-  description: string;
-  inputSchema: Record<string, unknown>;
-};
-// ---------------------------------------------------------------------------
-// Request — the normalized form after ingress translation
-// ---------------------------------------------------------------------------
-export type GatewayProtocol = "anthropic" | "openai";
-/** Normalized request after ingress translation from either protocol. */
-export type GatewayRequest = {
-  /** Which protocol the request arrived as — determines egress translation. */
-  protocol: GatewayProtocol;
-  /** Model identifier (e.g. `claude-sonnet-4-20250514`, `gpt-4o`). */
-  model: string;
-  /**
-   * Extracted system prompt.
-   * - Anthropic: top-level `system` field.
-   * - OpenAI: first message with `role: "system"`, removed from messages.
-   */
-  system: string;
-  messages: GatewayMessage[];
-  tools: GatewayTool[];
-  stream: boolean;
-  maxTokens: number;
-  /**
-   * Protocol-specific parameters the gateway doesn't process but must
-   * forward to the upstream provider (e.g. `temperature`, `top_p`,
-   * `stop_sequences`, `tool_choice`).
-   */
-  metadata: Record<string, unknown>;
-  /** Original request headers — passed through for auth, tracing, etc. */
-  rawHeaders: Record<string, string>;
-  /**
-   * Additional OpenAI-compatible parameters preserved for upstream forwarding.
-   * Populated by `parseOpenAIRequest`.
-   */
-  extras?: {
-    temperature?: number;
-    top_p?: number;
-    frequency_penalty?: number;
-    presence_penalty?: number;
-    user?: string;
-    logprobs?: boolean;
-    top_logprobs?: number;
-  };
-};
-// ---------------------------------------------------------------------------
-// Response — accumulated from upstream streaming/non-streaming response
-// ---------------------------------------------------------------------------
-export type GatewayUsage = {
-  inputTokens: number;
-  outputTokens: number;
-  /** Anthropic prompt caching — present when cache hits occur. */
-  cacheReadInputTokens?: number;
-  /** Anthropic prompt caching — tokens written to cache on this request. */
-  cacheCreationInputTokens?: number;
-};
-/** Accumulated response from the upstream provider. */
-export type GatewayResponse = {
-  id: string;
-  model: string;
-  content: GatewayContentBlock[];
-  /** Provider stop reason (e.g. `end_turn`, `stop`, `tool_use`, `length`). */
-  stopReason: string;
-  usage: GatewayUsage;
-};
-// ---------------------------------------------------------------------------
-// Recall store (cross-request, gateway recall interception)
-// ---------------------------------------------------------------------------
-/** Stored recall result for marker-based round-trip expansion. */
-export type StoredRecall = {
-  /** The tool_use ID to reconstruct in the upstream request. */
-  toolUseId: string;
-  /** Original recall input (query + scope). */
-  input: { query: string; scope?: string };
-  /** Position (content block index) in the original assistant message. */
-  position: number;
-  /** Executed recall result (formatted markdown). */
-  result: string;
-};
-/** Map from marker key (`${scope}:${query}`) → stored recall data. */
-export type RecallStore = Map<string, StoredRecall>;
-// ---------------------------------------------------------------------------
-// Session state — per-session tracking for Lore pipeline integration
-// ---------------------------------------------------------------------------
-/** Per-turn cache analysis emitted as structured log data. */
-export type CacheTurnAnalysis = {
-  /** Turn number within this session. */
-  turn: number;
-  // --- Ground truth from API response ---
-  /** Tokens served from prompt cache (hit). */
-  cacheRead: number;
-  /** Tokens written to prompt cache (miss / new). */
-  cacheCreation: number;
-  /** Uncached input tokens. */
-  inputTokens: number;
-  /** cacheRead / total input — 0..1. */
-  cacheHitRate: number;
-  // --- Request body prefix comparison ---
-  /** Bytes matching from start of serialized request body vs previous turn. */
-  prefixMatchBytes: number;
-  /** prefixMatchBytes / min(prev, current) body length — 0..1. */
-  prefixMatchPercent: number;
-  /** Semantic location of the first divergence (e.g. "messages[3].content[1]"). */
-  divergencePoint: string;
-  /** Human-readable reason (e.g. "system prompt changed", "new message appended"). */
-  divergenceReason: string;
-};
-/** Per-session cache analytics state. */
-export type CacheAnalytics = {
-  /** Deflate-compressed serialized request body from the last turn. */
-  lastRequestBody: Uint8Array | null;
-  /** Uncompressed byte length of lastRequestBody (for prefix match %). */
-  lastRequestBodyLength: number;
-  /** cache_read_input_tokens from last API response. */
-  lastCacheRead: number;
-  /** cache_creation_input_tokens from last API response. */
-  lastCacheCreation: number;
-  /** Total turns observed. */
-  turnCount: number;
-  /** Confirmed busts (API returned cacheRead=0 with cacheCreation>0). */
-  bustCount: number;
-};
-/** Per-session state tracked by the gateway for Lore pipeline decisions. */
-export type SessionState = {
-  sessionID: string;
-  projectPath: string;
-  /** SHA-256 fingerprint of the first user message — used for session correlation. */
-  fingerprint: string;
-  /** Unix timestamp (ms) of the last request in this session. */
-  lastRequestTime: number;
-  /** Total user+assistant messages seen in this session. */
-  messageCount: number;
-  /** Turns since last curation run — triggers background curation. */
-  turnsSinceCuration: number;
-  /** Stored recall results for marker-based round-trip expansion. */
-  recallStore: RecallStore;
-  /** Cache analytics — request body prefix comparison + API cache fields. */
-  cacheAnalytics: CacheAnalytics;
-};

package/src/worker-model.ts DELETED Viewed

@@ -1,408 +0,0 @@
-/**
- * Gateway worker model discovery and resolution.
- *
- * Discovers available models from the upstream Anthropic `/v1/models` API,
- * fetches per-model pricing from models.dev (open-source model database),
- * and integrates with core's worker model validation/resolution pipeline.
- *
- * This replaces the OpenCode adapter's `getProviderModels()` +
- * `maybeValidateWorkerModel()` — the gateway is the universal path and
- * doesn't depend on the OpenCode SDK's model listing (which can report
- * deprecated models as "active").
- */
-import {
-  workerModel,
-  temporal,
-  distillation as distillationMod,
-  config as loreConfig,
-  log,
-} from "@loreai/core";
-import type { LLMClient } from "@loreai/core";
-import type { AuthCredential } from "./auth";
-import { authHeaders } from "./auth";
-// ---------------------------------------------------------------------------
-// Cost lookup — models.dev with hardcoded fallback
-// ---------------------------------------------------------------------------
-/**
- * models.dev JSON API endpoint — returns all providers/models with pricing.
- *
- * Single request replaces N individual TOML fetches. Response shape:
- *   { anthropic: { models: { "claude-sonnet-4-20250514": { cost: { input: 3 }, ... }, ... } } }
- * Cost values are per-million-token USD.
- */
-const MODELS_DEV_API = "https://models.dev/api.json";
-/** Cached models.dev cost data: modelID → per-million-token input cost. */
-let cachedCostMap: Map<string, number> | null = null;
-let cachedCostMapAt = 0;
-const COST_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
-/**
- * Hardcoded fallback costs (per-input-token, USD) used when models.dev
- * API is unreachable. Prefix-matched against model IDs.
- *
- * These only serve as a safety net — runtime pricing from models.dev is
- * preferred and fetched on every discovery cycle (cached 1h).
- */
-const FALLBACK_COSTS: Array<{ prefix: string; inputCostPerToken: number }> = [
-  { prefix: "claude-opus-4", inputCostPerToken: 15 / 1_000_000 },
-  { prefix: "claude-sonnet-4", inputCostPerToken: 3 / 1_000_000 },
-  { prefix: "claude-haiku-4", inputCostPerToken: 1 / 1_000_000 },
-  { prefix: "claude-haiku-3-5", inputCostPerToken: 0.8 / 1_000_000 },
-  { prefix: "claude-sonnet-3-5", inputCostPerToken: 3 / 1_000_000 },
-  { prefix: "claude-3-haiku", inputCostPerToken: 0.25 / 1_000_000 },
-  { prefix: "claude-3-sonnet", inputCostPerToken: 3 / 1_000_000 },
-  { prefix: "claude-3-opus", inputCostPerToken: 15 / 1_000_000 },
-];
-function fallbackCost(modelID: string): number {
-  for (const { prefix, inputCostPerToken } of FALLBACK_COSTS) {
-    if (modelID.startsWith(prefix)) return inputCostPerToken;
-  }
-  // Unknown model — assume expensive so it doesn't get picked as a worker
-  return 100 / 1_000_000;
-}
-/** Shape of a model entry in the models.dev JSON API. */
-type ModelsDevEntry = {
-  id: string;
-  cost?: { input?: number };
-};
-/** Shape of the models.dev JSON API response (subset we care about). */
-type ModelsDevResponse = {
-  [provider: string]: {
-    models?: { [modelId: string]: ModelsDevEntry };
-  };
-};
-/**
- * Fetch the models.dev cost map for Anthropic models.
- *
- * Single HTTP request to the JSON API, cached for 1 hour.
- * Returns a map of modelID → per-million-token input cost.
- */
-export async function fetchCostMap(): Promise<Map<string, number>> {
-  // Return cache if fresh
-  if (cachedCostMap && Date.now() - cachedCostMapAt < COST_CACHE_TTL_MS) {
-    return cachedCostMap;
-  }
-  try {
-    const controller = new AbortController();
-    const timeout = setTimeout(() => controller.abort(), 10_000);
-    const response = await fetch(MODELS_DEV_API, { signal: controller.signal });
-    clearTimeout(timeout);
-    if (!response.ok) {
-      log.warn(`models.dev API failed: ${response.status} ${response.statusText}`);
-      return cachedCostMap ?? new Map();
-    }
-    const data = (await response.json()) as ModelsDevResponse;
-    const anthropic = data.anthropic?.models;
-    if (!anthropic) {
-      log.warn("models.dev API: no anthropic provider found");
-      return cachedCostMap ?? new Map();
-    }
-    const costMap = new Map<string, number>();
-    for (const [modelId, entry] of Object.entries(anthropic)) {
-      if (entry.cost?.input != null) {
-        costMap.set(modelId, entry.cost.input);
-      }
-    }
-    cachedCostMap = costMap;
-    cachedCostMapAt = Date.now();
-    log.info(`models.dev: loaded costs for ${costMap.size} anthropic models`);
-    return costMap;
-  } catch (e) {
-    log.warn("models.dev API error:", e);
-    return cachedCostMap ?? new Map();
-  }
-}
-/** Clear the cached cost map (for testing). */
-export function clearCostCache(): void {
-  cachedCostMap = null;
-  cachedCostMapAt = 0;
-}
-/**
- * Fetch per-model input cost from models.dev JSON API.
- *
- * Single HTTP request fetches all Anthropic model costs. Returns a map of
- * modelID → per-token cost. Models not found in models.dev get fallback costs.
- */
-export async function fetchModelCosts(
-  modelIDs: string[],
-): Promise<Map<string, number>> {
-  const costMap = await fetchCostMap();
-  const costs = new Map<string, number>();
-  for (const id of modelIDs) {
-    const costPerMillion = costMap.get(id);
-    if (costPerMillion != null) {
-      costs.set(id, costPerMillion / 1_000_000);
-    } else {
-      costs.set(id, fallbackCost(id));
-    }
-  }
-  return costs;
-}
-// ---------------------------------------------------------------------------
-// Anthropic /v1/models API types (subset we care about)
-// ---------------------------------------------------------------------------
-type AnthropicModelEntry = {
-  id: string;
-  display_name: string;
-  created_at: string;
-  capabilities?: {
-    thinking?: { supported: boolean };
-  };
-};
-type AnthropicModelsResponse = {
-  data: AnthropicModelEntry[];
-  has_more: boolean;
-  last_id?: string;
-};
-// ---------------------------------------------------------------------------
-// Model discovery — fetch from upstream /v1/models
-// ---------------------------------------------------------------------------
-/** Cached model list with TTL. */
-let cachedModels: workerModel.ModelInfo[] | null = null;
-let cachedModelsAt = 0;
-const MODEL_CACHE_TTL_MS = 60 * 60 * 1000; // 1 hour
-/**
- * Fetch available Anthropic models from the upstream API.
- *
- * Results are cached for 1 hour — model listings change rarely and we
- * don't want to hit the API on every idle cycle.
- *
- * Unlike the OpenCode SDK's `provider.list()`, the Anthropic `/v1/models`
- * API only returns models that actually exist — deprecated models are
- * removed, so we never get stale entries like `claude-3-haiku-20240307`.
- */
-export async function discoverModels(
-  upstreamUrl: string,
-  cred: AuthCredential,
-): Promise<workerModel.ModelInfo[]> {
-  // Return cache if fresh
-  if (cachedModels && Date.now() - cachedModelsAt < MODEL_CACHE_TTL_MS) {
-    return cachedModels;
-  }
-  try {
-    const entries: AnthropicModelEntry[] = [];
-    let afterId: string | undefined;
-    // Paginate through all models
-    do {
-      const url = new URL(`${upstreamUrl}/v1/models`);
-      url.searchParams.set("limit", "1000");
-      if (afterId) url.searchParams.set("after_id", afterId);
-      const response = await fetch(url.toString(), {
-        headers: {
-          "content-type": "application/json",
-          "anthropic-version": "2023-06-01",
-          ...authHeaders(cred),
-        },
-      });
-      if (!response.ok) {
-        const text = await response.text().catch(() => "(no body)");
-        log.warn(
-          `model discovery failed: ${response.status} ${response.statusText} — ${text}`,
-        );
-        return cachedModels ?? [];
-      }
-      const data = (await response.json()) as AnthropicModelsResponse;
-      for (const entry of data.data) {
-        entries.push(entry);
-      }
-      afterId = data.has_more ? data.last_id : undefined;
-    } while (afterId);
-    // Fetch costs from models.dev in parallel (with fallback to hardcoded)
-    const modelIDs = entries.map((e) => e.id);
-    const costs = await fetchModelCosts(modelIDs);
-    const models: workerModel.ModelInfo[] = entries.map((entry) => ({
-      id: entry.id,
-      providerID: "anthropic",
-      cost: { input: costs.get(entry.id) ?? fallbackCost(entry.id) },
-      status: "active", // Only active models are returned by the API
-      capabilities: {
-        input: { text: true }, // All Anthropic models accept text
-        reasoning: entry.capabilities?.thinking?.supported ?? false,
-      },
-    }));
-    cachedModels = models;
-    cachedModelsAt = Date.now();
-    log.info(
-      `model discovery: found ${models.length} models (${models.map((m) => m.id).join(", ")})`,
-    );
-    return models;
-  } catch (e) {
-    log.warn("model discovery error:", e);
-    return cachedModels ?? [];
-  }
-}
-/** Clear the cached model list (for testing). */
-export function clearModelCache(): void {
-  cachedModels = null;
-  cachedModelsAt = 0;
-}
-// ---------------------------------------------------------------------------
-// Worker model validation — gateway version of maybeValidateWorkerModel
-// ---------------------------------------------------------------------------
-/** Guard against concurrent validation runs. */
-let validating = false;
-/**
- * Run worker model validation if needed.
- *
- * Called on session idle — discovers available models, selects candidates,
- * checks if the stored validation is stale, and runs the two-phase
- * comparison (structural check + LLM judge) if needed.
- *
- * @param sessionModel  The model ID being used for conversation (frontier)
- * @param upstreamUrl   Anthropic API base URL
- * @param cred          Auth credential for API calls
- * @param llm           LLM client for validation prompts
- * @param projectPath   Project directory path
- * @param sessionID     Session ID for loading reference distillation data
- */
-export async function maybeValidateWorkerModel(
-  sessionModel: string,
-  upstreamUrl: string,
-  cred: AuthCredential,
-  llm: LLMClient,
-  projectPath: string,
-  sessionID: string,
-): Promise<void> {
-  if (validating) return;
-  const cfg = loreConfig();
-  if (cfg.workerModel) return; // explicit override — skip auto-selection
-  const models = await discoverModels(upstreamUrl, cred);
-  if (models.length === 0) return;
-  // Build the session model info for candidate selection.
-  // Use cost from discovered models if available, otherwise fallback.
-  const discoveredModel = models.find((m) => m.id === sessionModel);
-  const sessionModelInfo: Parameters<typeof workerModel.selectWorkerCandidates>[0] = {
-    id: sessionModel,
-    providerID: "anthropic",
-    cost: { input: discoveredModel?.cost.input ?? fallbackCost(sessionModel) },
-  };
-  const candidates = workerModel.selectWorkerCandidates(sessionModelInfo, models);
-  if (candidates.length === 0) return;
-  // If session model is already the cheapest, no comparison needed
-  if (candidates.length === 1 && candidates[0].id === sessionModel) return;
-  const fingerprint = workerModel.computeModelFingerprint(
-    "anthropic",
-    sessionModel,
-    models.filter((m) => m.providerID === "anthropic").map((m) => m.id),
-  );
-  const stored = workerModel.getValidatedWorkerModel("anthropic");
-  if (!workerModel.isValidationStale(stored, fingerprint)) return;
-  // Need reference distillation data
-  const distillations = distillationMod.loadForSession(projectPath, sessionID, true);
-  const gen0 = distillations.filter((d) => d.generation === 0);
-  if (gen0.length === 0) return;
-  const reference = gen0[gen0.length - 1]; // most recent gen-0
-  const sourceIds = reference.source_ids;
-  if (sourceIds.length === 0) return;
-  // Load source temporal messages
-  const allMessages = temporal.bySession(projectPath, sessionID);
-  const sourceSet = new Set(sourceIds);
-  const sourceMessages = allMessages.filter((m) => sourceSet.has(m.id));
-  if (sourceMessages.length === 0) return;
-  const messagesText = sourceMessages.map((m) => m.content).join("\n");
-  const date = new Date(sourceMessages[0].created_at).toLocaleDateString(
-    "en-US",
-    { year: "numeric", month: "long", day: "numeric" },
-  );
-  validating = true;
-  try {
-    const result = await workerModel.runValidation({
-      llm,
-      providerID: "anthropic",
-      sessionModelID: sessionModel,
-      candidates,
-      referenceObservations: reference.observations,
-      sourceMessagesText: messagesText,
-      date,
-    });
-    if (result) {
-      log.info(
-        `worker model validated: ${result.modelID} (judge=${result.judgeScore}) — saving 50%+ on worker calls`,
-      );
-    }
-  } catch (e) {
-    log.error("worker model validation error:", e);
-  } finally {
-    validating = false;
-  }
-}
-// ---------------------------------------------------------------------------
-// Resolution — wrapper around core's resolveWorkerModel
-// ---------------------------------------------------------------------------
-/**
- * Resolve the effective worker model for background calls.
- *
- * Checks (in order):
- *  1. Explicit config override (`workerModel` in lore config)
- *  2. Validated auto-selection from kv_meta (with 24h TTL)
- *  3. Config model fallback (frontier model)
- */
-export function getWorkerModel(): { providerID: string; modelID: string } | undefined {
-  const cfg = loreConfig();
-  return workerModel.resolveWorkerModel(
-    "anthropic",
-    cfg.workerModel,
-    cfg.model,
-  );
-}
-/** Reset module state (for testing). */
-export function resetWorkerModelState(): void {
-  clearModelCache();
-  clearCostCache();
-  validating = false;
-}