npm - @loreai/gateway - Versions diffs - 0.13.4 → 0.14.1 - Mend

@loreai/gateway 0.13.4 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/dist/bin.cjs +27 -0
package/dist/index.cjs +1042 -0
package/dist/index.d.cts +21 -0
package/package.json +21 -13
package/dist/index.js +0 -3548
package/dist/index.js.map +0 -7
package/src/auth.ts +0 -133
package/src/batch-queue.ts +0 -555
package/src/compaction.ts +0 -195
package/src/config.ts +0 -199
package/src/idle.ts +0 -246
package/src/index.ts +0 -41
package/src/llm-adapter.ts +0 -110
package/src/pipeline.ts +0 -1604
package/src/recall.ts +0 -301
package/src/recorder.ts +0 -192
package/src/server.ts +0 -250
package/src/session.ts +0 -207
package/src/stream/anthropic.ts +0 -708
package/src/temporal-adapter.ts +0 -307
package/src/translate/anthropic.ts +0 -425
package/src/translate/openai.ts +0 -536
package/src/translate/types.ts +0 -177
package/src/worker-model.ts +0 -408

package/src/recall.ts DELETED Viewed

@@ -1,301 +0,0 @@
-/**
- * Gateway recall interception — transparent memory search for any client.
- *
- * Injects a `recall` tool into upstream requests and handles the response
- * transparently. Two strategies based on whether recall is the only tool:
- *
- *  - **Case 1 (recall-only)**: "Pause and Continue" — pause client stream,
- *    execute recall, send follow-up request, resume streaming in the same
- *    HTTP response.
- *  - **Case 2 (mixed tools)**: "Strip and Inject" — suppress recall blocks
- *    from the client stream, execute recall in background, inject the result
- *    into the next request from the client.
- *
- * All recall execution delegates to `runRecall()` from `@loreai/core`.
- */
-import {
-  runRecall,
-  RECALL_TOOL_DESCRIPTION,
-  RECALL_PARAM_DESCRIPTIONS,
-  log,
-  config as loreConfig,
-  type RecallScope,
-} from "@loreai/core";
-import type {
-  GatewayTool,
-  GatewayRequest,
-  GatewayResponse,
-  GatewayToolUseBlock,
-  GatewayMessage,
-  PendingRecall,
-} from "./translate/types";
-// ---------------------------------------------------------------------------
-// Tool definition
-// ---------------------------------------------------------------------------
-/** Recall tool definition for injection into upstream requests. */
-export const RECALL_GATEWAY_TOOL: GatewayTool = {
-  name: "recall",
-  description: RECALL_TOOL_DESCRIPTION,
-  inputSchema: {
-    type: "object",
-    properties: {
-      query: {
-        type: "string",
-        description: RECALL_PARAM_DESCRIPTIONS.query,
-      },
-      scope: {
-        type: "string",
-        enum: ["all", "session", "project", "knowledge"],
-        description: RECALL_PARAM_DESCRIPTIONS.scope,
-      },
-    },
-    required: ["query"],
-  },
-};
-export const RECALL_TOOL_NAME = "recall";
-// ---------------------------------------------------------------------------
-// Pending recall state (cross-request, Case 2)
-// ---------------------------------------------------------------------------
-/** TTL for pending recall results — discard after 60 seconds. */
-const PENDING_RECALL_TTL_MS = 60_000;
-/** Check whether a pending recall is still valid (within TTL). */
-export function isPendingRecallValid(pending: PendingRecall): boolean {
-  return Date.now() - pending.timestamp < PENDING_RECALL_TTL_MS;
-}
-// ---------------------------------------------------------------------------
-// Detection helpers
-// ---------------------------------------------------------------------------
-/** Find the recall tool_use block in a GatewayResponse, if any. */
-export function findRecallToolUse(
-  resp: GatewayResponse,
-): GatewayToolUseBlock | undefined {
-  return resp.content.find(
-    (b): b is GatewayToolUseBlock =>
-      b.type === "tool_use" && b.name === RECALL_TOOL_NAME,
-  );
-}
-/** Check whether a response contains a recall tool_use. */
-export function hasRecallToolUse(resp: GatewayResponse): boolean {
-  return findRecallToolUse(resp) !== undefined;
-}
-/** Check whether the response contains non-recall tool_use blocks. */
-export function hasOtherToolUse(resp: GatewayResponse): boolean {
-  return resp.content.some(
-    (b) => b.type === "tool_use" && b.name !== RECALL_TOOL_NAME,
-  );
-}
-/** Check whether the client's tools list already includes a recall tool. */
-export function clientHasRecallTool(tools: GatewayTool[]): boolean {
-  return tools.some((t) => t.name === RECALL_TOOL_NAME);
-}
-// ---------------------------------------------------------------------------
-// Recall execution
-// ---------------------------------------------------------------------------
-/** Parse recall input from the tool_use block. */
-function parseRecallInput(block: GatewayToolUseBlock): {
-  query: string;
-  scope: RecallScope;
-} {
-  const input = block.input as Record<string, unknown>;
-  return {
-    query: typeof input.query === "string" ? input.query : "",
-    scope: (input.scope as RecallScope) ?? "all",
-  };
-}
-/**
- * Execute the recall tool and return formatted results.
- *
- * Wraps `runRecall()` with error handling — on failure returns a
- * user-friendly error string rather than throwing.
- */
-export async function executeRecall(
-  block: GatewayToolUseBlock,
-  projectPath: string,
-  sessionID: string,
-): Promise<{ result: string; input: { query: string; scope?: RecallScope } }> {
-  const { query, scope } = parseRecallInput(block);
-  const cfg = loreConfig();
-  try {
-    const result = await runRecall({
-      query,
-      scope,
-      projectPath,
-      sessionID,
-      knowledgeEnabled: cfg.knowledge?.enabled ?? true,
-      searchConfig: cfg.search,
-    });
-    return { result, input: { query, scope } };
-  } catch (e) {
-    log.error("gateway recall execution failed:", e);
-    return {
-      result: "Recall search failed. The memory system encountered an error.",
-      input: { query, scope },
-    };
-  }
-}
-// ---------------------------------------------------------------------------
-// Follow-up request builder (Case 1: recall-only)
-// ---------------------------------------------------------------------------
-/**
- * Build a follow-up request after recall execution.
- *
- * The follow-up includes:
- *  - All original messages
- *  - The assistant's full response (including the recall tool_use)
- *  - A user message with the recall tool_result
- *  - Tools list WITHOUT recall (so the model won't call it again)
- *
- * The model continues from where it left off, now with recall results
- * in context. Its new response streams directly to the client.
- */
-export function buildRecallFollowUp(
-  originalReq: GatewayRequest,
-  resp: GatewayResponse,
-  recallResult: string,
-  recallToolUseBlock: GatewayToolUseBlock,
-): GatewayRequest {
-  // Build assistant message with ONLY the recall tool_use block.
-  // Exclude any pre-recall text/thinking blocks — those were already streamed
-  // to the client. By presenting only the tool_use, the model understands it
-  // called recall and hasn't yet produced a substantive response, so it will
-  // generate new content after receiving the tool_result.
-  const assistantMessage: GatewayMessage = {
-    role: "assistant",
-    content: [recallToolUseBlock],
-  };
-  // Build user message with tool_result
-  const toolResultMessage: GatewayMessage = {
-    role: "user",
-    content: [
-      {
-        type: "tool_result",
-        toolUseId: recallToolUseBlock.id,
-        content: recallResult || "[No results found.]",
-      },
-    ],
-  };
-  // Strip recall from tools list
-  const toolsWithoutRecall = originalReq.tools.filter(
-    (t) => t.name !== RECALL_TOOL_NAME,
-  );
-  return {
-    ...originalReq,
-    messages: [
-      ...originalReq.messages,
-      assistantMessage,
-      toolResultMessage,
-    ],
-    tools: toolsWithoutRecall,
-  };
-}
-// ---------------------------------------------------------------------------
-// Pending recall injection (Case 2: next request enrichment)
-// ---------------------------------------------------------------------------
-/**
- * Inject a pending recall result into the current request.
- *
- * Finds the last assistant message in `req.messages`, inserts the recall
- * tool_use block at the recorded position, and inserts a tool_result block
- * into the following user message.
- *
- * Mutates the request in-place for efficiency. Returns true if injection
- * was performed, false if the conversation structure didn't match
- * (e.g., no trailing assistant→user pair).
- */
-export function injectPendingRecall(
-  req: GatewayRequest,
-  pending: PendingRecall,
-): boolean {
-  const messages = req.messages;
-  if (messages.length < 2) return false;
-  // Find the last assistant message followed by a user message.
-  // The pending recall was from the previous turn's assistant response.
-  let assistantIdx = -1;
-  for (let i = messages.length - 2; i >= 0; i--) {
-    if (
-      messages[i].role === "assistant" &&
-      messages[i + 1]?.role === "user"
-    ) {
-      assistantIdx = i;
-      break;
-    }
-  }
-  if (assistantIdx < 0) {
-    log.warn("injectPendingRecall: no assistant→user pair found");
-    return false;
-  }
-  const assistantMsg = messages[assistantIdx];
-  const userMsg = messages[assistantIdx + 1];
-  // Insert recall tool_use into assistant message at the recorded position.
-  // Clamp to content length in case the message was modified by gradient.
-  const insertPos = Math.min(pending.position, assistantMsg.content.length);
-  const recallToolUse: GatewayToolUseBlock = {
-    type: "tool_use",
-    id: pending.toolUseId,
-    name: RECALL_TOOL_NAME,
-    input: pending.input,
-  };
-  assistantMsg.content.splice(insertPos, 0, recallToolUse);
-  // Insert recall tool_result into the user message.
-  // Add it at the beginning alongside any other tool_results.
-  userMsg.content.unshift({
-    type: "tool_result",
-    toolUseId: pending.toolUseId,
-    content: pending.result,
-  });
-  // Strip recall from tools list for this request
-  req.tools = req.tools.filter((t) => t.name !== RECALL_TOOL_NAME);
-  return true;
-}
-// ---------------------------------------------------------------------------
-// Response content stripping (Case 2: remove recall from response)
-// ---------------------------------------------------------------------------
-/**
- * Build a GatewayResponse with recall tool_use blocks removed.
- *
- * Used for Case 2 to produce a clean response for `postResponse` storage
- * that excludes the gateway-internal recall blocks.
- */
-export function stripRecallFromResponse(
-  resp: GatewayResponse,
-): GatewayResponse {
-  return {
-    ...resp,
-    content: resp.content.filter(
-      (b) => !(b.type === "tool_use" && b.name === RECALL_TOOL_NAME),
-    ),
-  };
-}

package/src/recorder.ts DELETED Viewed

@@ -1,192 +0,0 @@
-/**
- * Fixture recorder and replayer for the Lore gateway.
- *
- * Recording mode: intercepts every upstream API call, writes the
- * (request, response) pair to an NDJSON fixture file, then returns
- * the real response to the caller unchanged.
- *
- * Replay mode: replays stored fixtures in sequence, never touching
- * the upstream API.  Useful for deterministic integration tests.
- */
-import { appendFileSync } from "node:fs";
-import { log } from "@loreai/core";
-// ---------------------------------------------------------------------------
-// Public types
-// ---------------------------------------------------------------------------
-/** One entry per upstream API call, stored in the fixture file. */
-export interface FixtureEntry {
-  /** Sequence number within the recording session (0-based). */
-  seq: number;
-  /** Wall-clock timestamp (ms since Unix epoch) when the call was made. */
-  ts: number;
-  /** The upstream request body as sent (Anthropic /v1/messages JSON). */
-  request: unknown;
-  /** The full upstream response body (non-streaming, even if original was streaming). */
-  response: unknown;
-  /** Whether the original request asked for a streaming response. */
-  wasStreaming: boolean;
-  /** Model that was used for the request. */
-  model: string;
-}
-/**
- * Interceptor function injected into the upstream forwarding path.
- *
- * @param requestBody  - The request body that will be sent upstream.
- * @param model        - Model identifier from the request.
- * @param wasStreaming - Whether the original request was streaming.
- * @param makeRealRequest - Thunk that performs the actual HTTP request.
- *                          The interceptor decides whether to call it.
- */
-export type UpstreamInterceptor = (
-  requestBody: unknown,
-  model: string,
-  wasStreaming: boolean,
-  makeRealRequest: () => Promise<Response>,
-) => Promise<Response>;
-// ---------------------------------------------------------------------------
-// Module-level state
-// ---------------------------------------------------------------------------
-/** Non-null when recording is active; holds the path of the fixture file. */
-let recordingPath: string | null = null;
-/** Monotonically increasing counter for fixture sequence numbers. */
-let seqCounter = 0;
-// ---------------------------------------------------------------------------
-// Recording control
-// ---------------------------------------------------------------------------
-/** Enable recording mode. All upstream calls will be appended to `fixturePath`. */
-export function startRecording(fixturePath: string): void {
-  recordingPath = fixturePath;
-  seqCounter = 0;
-  log.info(`[recorder] recording to: ${fixturePath}`);
-}
-/** Disable recording mode. */
-export function stopRecording(): void {
-  recordingPath = null;
-}
-// ---------------------------------------------------------------------------
-// Recording interceptor
-// ---------------------------------------------------------------------------
-/**
- * Returns an `UpstreamInterceptor` when recording mode is active, or
- * `null` when it is not.
- *
- * The returned interceptor:
- *  1. Calls `makeRealRequest()` to get the real upstream response.
- *  2. Reads the full response body text (works for both streaming and
- *     non-streaming — the raw body is always valid JSON from Anthropic
- *     even for streaming responses because we force `stream:false` when
- *     we need the body for the fixture; for streaming the body is SSE
- *     text which we store verbatim).
- *  3. Appends a `FixtureEntry` line to the fixture file.
- *  4. Returns a new `Response` with the same status, headers, and body
- *     (the original body stream is already consumed, so we reconstitute it).
- */
-export function getRecordedInterceptor(): UpstreamInterceptor | null {
-  if (!recordingPath) return null;
-  // Capture the path at interceptor creation time so closure is stable
-  const fixturePath = recordingPath;
-  return async (
-    requestBody: unknown,
-    model: string,
-    wasStreaming: boolean,
-    makeRealRequest: () => Promise<Response>,
-  ): Promise<Response> => {
-    const ts = Date.now();
-    const seq = seqCounter++;
-    // Perform the real upstream request
-    const realResponse = await makeRealRequest();
-    // Collect all response headers before consuming the body
-    const responseHeaders: Record<string, string> = {};
-    realResponse.headers.forEach((value, key) => {
-      responseHeaders[key] = value;
-    });
-    // Read the full body text — this consumes the stream
-    const bodyText = await realResponse.text();
-    // Parse body as JSON for structured storage; fall back to raw string
-    let responseBody: unknown;
-    try {
-      responseBody = JSON.parse(bodyText);
-    } catch {
-      responseBody = bodyText;
-    }
-    // Write the fixture entry
-    const entry: FixtureEntry = {
-      seq,
-      ts,
-      request: requestBody,
-      response: responseBody,
-      wasStreaming,
-      model,
-    };
-    appendFileSync(fixturePath, JSON.stringify(entry) + "\n", "utf8");
-    log.info(`[recorder] captured turn seq=${seq} model=${model}`);
-    // Return a new Response with the same status and headers but a fresh body
-    return new Response(bodyText, {
-      status: realResponse.status,
-      headers: responseHeaders,
-    });
-  };
-}
-// ---------------------------------------------------------------------------
-// Replay interceptor
-// ---------------------------------------------------------------------------
-/**
- * Returns an interceptor that replays the given fixtures in sequence,
- * without ever calling `makeRealRequest()`.
- *
- * Each call advances an internal counter.  When the counter exceeds
- * `fixtures.length`, an error is thrown.
- */
-export function getReplayInterceptor(fixtures: FixtureEntry[]): UpstreamInterceptor {
-  let replayCounter = 0;
-  return async (
-    _requestBody: unknown,
-    _model: string,
-    _wasStreaming: boolean,
-    _makeRealRequest: () => Promise<Response>,
-  ): Promise<Response> => {
-    if (replayCounter >= fixtures.length) {
-      throw new Error(
-        `Replay exhausted: no more fixtures (tried to replay entry ${replayCounter}, ` +
-          `but only ${fixtures.length} fixture(s) are available)`,
-      );
-    }
-    const fixture = fixtures[replayCounter++];
-    log.info(
-      `[recorder] replaying seq=${fixture.seq} model=${fixture.model} ` +
-        `(${replayCounter}/${fixtures.length})`,
-    );
-    // Always return a non-streaming JSON response — the pipeline handles
-    // re-streaming if the client originally requested SSE.
-    return new Response(JSON.stringify(fixture.response), {
-      status: 200,
-      headers: { "content-type": "application/json" },
-    });
-  };
-}