npm - @loreai/gateway - Versions diffs - 0.13.4 → 0.14.0 - Mend

@loreai/gateway 0.13.4 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/index.js +49694 -3155
package/package.json +14 -6
package/src/batch-queue.ts +21 -1
package/src/cache-analytics.ts +344 -0
package/src/cli/agents.ts +107 -0
package/src/cli/bin.ts +11 -0
package/src/cli/help.ts +55 -0
package/src/cli/lib/binary.ts +353 -0
package/src/cli/lib/bspatch.ts +306 -0
package/src/cli/lib/delta-upgrade.ts +790 -0
package/src/cli/lib/errors.ts +48 -0
package/src/cli/lib/ghcr.ts +389 -0
package/src/cli/lib/patch-cache.ts +342 -0
package/src/cli/lib/upgrade.ts +454 -0
package/src/cli/lib/version-check.ts +385 -0
package/src/cli/main.ts +152 -0
package/src/cli/run.ts +181 -0
package/src/cli/start.ts +82 -0
package/src/cli/upgrade.ts +311 -0
package/src/cli/version.ts +22 -0
package/src/idle.ts +0 -6
package/src/index.ts +27 -27
package/src/llm-adapter.ts +100 -28
package/src/pipeline.ts +254 -177
package/src/recall.ts +223 -91
package/src/temporal-adapter.ts +3 -0
package/src/translate/anthropic.ts +50 -6
package/src/translate/types.ts +54 -9
package/dist/index.js.map +0 -7

package/src/index.ts CHANGED Viewed

@@ -1,41 +1,41 @@
 /**
- * Lore Gateway — entry point.
+ * Lore Gateway — package entry point.
  *
- * Starts the HTTP proxy server that applies Lore's context management
- * pipeline to any AI coding client speaking the Anthropic or OpenAI
- * protocol.
+ * Library exports for programmatic use, plus `_cli()` for the CLI binary.
  *
- * Usage:
- *   bun run packages/gateway/src/index.ts
- *   ANTHROPIC_BASE_URL=http://127.0.0.1:6969 claude
+ * Library usage:
+ *   import { startServer, loadConfig } from "@loreai/gateway";
+ *
+ * CLI usage (via bin wrapper):
+ *   lore start
+ *   lore run claude
  */
-import { loadConfig } from "./config";
-import { startServer } from "./server";
-import { resetPipelineState } from "./pipeline";
+import "../instrument";
 // ---------------------------------------------------------------------------
-// Boot
+// Library API
 // ---------------------------------------------------------------------------
-const config = loadConfig();
-const server = startServer(config);
+export { loadConfig } from "./config";
+export type { GatewayConfig } from "./config";
+export { startServer } from "./server";
+export { handleRequest, resetPipelineState } from "./pipeline";
-const addr = `http://${config.host}:${server.port}`;
-console.error(`[lore] Gateway listening on ${addr}`);
-console.error(`[lore] Model routing: claude-* → Anthropic, nvidia/* → Nvidia NIM, gpt-* → OpenAI, …`);
-console.error(`[lore] Plugin auto-detects gateway — just start OpenCode normally`);
+// ---------------------------------------------------------------------------
+// CLI entry — called by dist/bin.cjs or `bun run src/index.ts`
+// ---------------------------------------------------------------------------
+export { _cli } from "./cli/main";
 // ---------------------------------------------------------------------------
-// Graceful shutdown
+// Direct execution — `bun run src/index.ts` still works as before
 // ---------------------------------------------------------------------------
-async function shutdown() {
-  console.error("[lore] Shutting down…");
-  server.stop();
-  // Gracefully shut down the batch queue (flushes pending items synchronously)
-  await resetPipelineState();
-  process.exit(0);
+if (typeof Bun !== "undefined" && Bun.main === import.meta.path) {
+  // Direct execution (e.g. `bun run src/index.ts` from the OpenCode plugin)
+  // defaults to server-only mode (`start`), not `run` — there's no TTY and
+  // no reason to auto-detect agents when launched as an embedded server.
+  // esbuild CJS output drops import.meta to `{}` so the condition is
+  // always false in the npm bundle — the await is dead-code-eliminated.
+  import("./cli/start").then(({ commandStart }) => commandStart({}));
 }
-process.on("SIGINT", () => shutdown());
-process.on("SIGTERM", () => shutdown());

package/src/llm-adapter.ts CHANGED Viewed

@@ -16,6 +16,36 @@ import { authHeaders } from "./auth";
 /** Tracks worker session IDs so temporal capture can skip them. */
 export const activeWorkerCalls = new Set<string>();
+// ---------------------------------------------------------------------------
+// Retry helpers
+// ---------------------------------------------------------------------------
+/** HTTP status codes that are transient and worth retrying. */
+const TRANSIENT_CODES = new Set([429, 500, 502, 503, 529]);
+const MAX_RETRIES = 3;
+/** Parse the Retry-After header into milliseconds, or null if absent/invalid. */
+function parseRetryAfter(response: Response): number | null {
+  const header = response.headers.get("retry-after");
+  if (!header) return null;
+  const seconds = Number(header);
+  if (!Number.isNaN(seconds)) return seconds * 1000;
+  const date = Date.parse(header);
+  if (!Number.isNaN(date)) return Math.max(0, date - Date.now());
+  return null;
+}
+/** Compute delay for a retry attempt, respecting Retry-After on the first try. */
+function backoffMs(attempt: number, retryAfterMs: number | null): number {
+  if (attempt === 0 && retryAfterMs != null)
+    return Math.min(retryAfterMs, 30_000); // cap Retry-After at 30s
+  return Math.min(1000 * 2 ** attempt, 8000); // 1s, 2s, 4s, capped at 8s
+}
+function sleep(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
 // ---------------------------------------------------------------------------
 // LLMClient factory
 // ---------------------------------------------------------------------------
@@ -64,41 +94,83 @@ export function createGatewayLLMClient(
             ]
           : undefined;
-        const response = await fetch(url, {
-          method: "POST",
-          headers: {
-            "Content-Type": "application/json",
-            "anthropic-version": "2023-06-01",
-            ...authHeaders(cred),
-          },
-          // opts.thinking is intentionally not forwarded — this bare API
-          // call never includes the `thinking` parameter so Anthropic
-          // models won't produce thinking tokens regardless.
-          body: JSON.stringify({
-            model: model.modelID,
-            max_tokens: 8192,
-            system: systemPayload ?? system,
-            messages: [{ role: "user", content: user }],
-          }),
+        const body = JSON.stringify({
+          model: model.modelID,
+          max_tokens: 8192,
+          system: systemPayload ?? system,
+          messages: [{ role: "user", content: user }],
         });
-        if (!response.ok) {
+        const headers = {
+          "Content-Type": "application/json",
+          "anthropic-version": "2023-06-01",
+          ...authHeaders(cred),
+        };
+        // Retry loop for transient errors (429, 5xx)
+        for (let attempt = 0; ; attempt++) {
+          let response: Response;
+          try {
+            response = await fetch(url, {
+              method: "POST",
+              headers,
+              // opts.thinking is intentionally not forwarded — this bare API
+              // call never includes the `thinking` parameter so Anthropic
+              // models won't produce thinking tokens regardless.
+              body,
+            });
+          } catch (e) {
+            // Network/fetch error — retry if attempts remain
+            if (attempt < MAX_RETRIES) {
+              const delay = backoffMs(attempt, null);
+              log.warn(
+                `worker request network error (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
+              );
+              await sleep(delay);
+              continue;
+            }
+            throw e; // exhausted retries — rethrow to outer catch
+          }
+          if (response.ok) {
+            const data = (await response.json()) as {
+              content?: Array<{ type: string; text?: string }>;
+            };
+            const textBlock = data.content?.find(
+              (b) => b.type === "text" && typeof b.text === "string",
+            );
+            return textBlock?.text ?? null;
+          }
+          // Non-transient error — fail immediately, no retry
+          if (!TRANSIENT_CODES.has(response.status)) {
+            const text = await response.text().catch(() => "(no body)");
+            log.error(
+              `worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
+            );
+            return null;
+          }
+          // Transient error — retry if attempts remain
+          if (attempt < MAX_RETRIES) {
+            const retryAfter = parseRetryAfter(response);
+            const delay = backoffMs(attempt, retryAfter);
+            log.warn(
+              `worker upstream ${response.status} (attempt ${attempt + 1}/${MAX_RETRIES + 1}), retrying in ${delay}ms`,
+            );
+            await sleep(delay);
+            continue;
+          }
+          // Exhausted retries
           const text = await response.text().catch(() => "(no body)");
           log.error(
-            `worker upstream request failed: ${response.status} ${response.statusText} — ${text}`,
+            `worker upstream request failed after ${MAX_RETRIES + 1} attempts: ${response.status} ${response.statusText} — ${text}`,
           );
           return null;
         }
-        const data = (await response.json()) as {
-          content?: Array<{ type: string; text?: string }>;
-        };
-        const textBlock = data.content?.find(
-          (b) => b.type === "text" && typeof b.text === "string",
-        );
-        return textBlock?.text ?? null;
       } catch (e) {
         log.error("worker prompt failed:", e);
         return null;