npm - pi-openrouter-realtime - Versions diffs - 0.2.2 → 0.3.2 - Mend

pi-openrouter-realtime 0.2.2 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +88 -15
package/extensions/openrouter-routing/api.ts +155 -0
package/extensions/openrouter-routing/index.ts +358 -380
package/extensions/openrouter-routing/models.ts +274 -0
package/extensions/openrouter-routing/picker.ts +333 -0
package/extensions/openrouter-routing/routing.ts +63 -0
package/extensions/openrouter-routing/state.ts +108 -0
package/extensions/openrouter-routing/types.ts +159 -0
package/package.json +3 -3

package/README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ![Preview](https://raw.githubusercontent.com/olixis/pi-openrouter-plus/main/assets/preview.png)
-# pi-openrouter-realtime
+# pi-openrouter-realtime v0.3.2
-Pi extension for OpenRouter that loads the latest models from OpenRouter in real time, keeps the default model list simple, and lets you enrich a specific model with provider and quantization variants.
+Pi extension for OpenRouter that loads the latest models from OpenRouter in real time, with provider/quantization enrichment, endpoint health indicators, credit balance display, interactive model picker, and tab-completion.
 Once the extension is installed and your OpenRouter credential is configured in pi, each new pi session automatically fetches the latest OpenRouter model list.
@@ -10,14 +10,51 @@ Npm package:
 - `pi-openrouter-realtime`
+## What's New in v0.3.2
+- **Context-safe info messages** — OpenRouter info panels still display in the UI, but are filtered out before LLM requests
+- **Lower token waste** — `/openrouter-preview`, `/openrouter-balance`, and `/openrouter-status` no longer consume context window space unnecessarily
+- **Less prompt contamination** — read-only extension output no longer gets echoed back into future model turns unless you explicitly include it
+How it works:
+- The extension still emits `openrouter-info` messages so you can see rich output in-session
+- Before each LLM call, a `context` hook removes those `openrouter-info` custom messages from the message list
+- Result: visible UX for humans, but no extra prompt baggage for the model
+## What's New in v0.3.1
+- **Fixed variant counting** — enriched variants are no longer presented as both base models and `+N variants`
+- **Clearer totals** — status/output now distinguishes total registered models from variant count
+- **Less intrusive account output** — removed the key label / redacted API-key style line from account/status output
+## What's New in v0.3.0
+- **Targeted enrichment** — enrich one model on demand without scanning the whole catalog
+- **Interactive model picker** — run `/openrouter-enrich` without args → type a search query → pick from filtered results
+- **Tab-completion** — autocomplete model IDs when typing commands
+- **`/openrouter-preview`** — inspect provider variants and endpoint health without changing your model list
+- **`/openrouter-balance`** — check your OpenRouter credit balance and usage
+- **`/openrouter-status`** — see current extension state, active enrichments, cache age
+- **Endpoint health data** — status, uptime, latency (TTFT), throughput per variant
+- **Snapshot-based routing** — eliminates race conditions with stale route maps
+- **Transactional sync** — state only updates on success, never left in a broken state
+- **Fixed cost parsing** — missing pricing no longer shows as "free"
+- **Auth detection fix** — works with both env vars and auth.json
+- **Fetch timeouts** — 15s timeout prevents hanging on OpenRouter API issues
+- **HTTP-Referer / X-Title headers** — proper app identification with OpenRouter
 ## Features
 - Loads the latest OpenRouter model list into pi in real time
 - Keeps startup behavior fast by default
 - Adds provider-specific variants on demand
-- Adds quantization-specific variants for a chosen model
+- Adds quantization-specific variants for chosen models
 - Routes enriched selections through OpenRouter provider routing
-- Enriches one model at a time to avoid slow full-catalog endpoint scans
+- Shows endpoint health: status, uptime, latency, throughput, caching support
+- Displays credit balance and usage statistics
+- Interactive model selection with searchable picker
+- Tab-completes model IDs for all commands
 ## Install
@@ -81,12 +118,7 @@ Using `~/.pi/agent/auth.json`:
 }
 ```
-Pi's official provider docs use:
-- Environment variable: `OPENROUTER_API_KEY`
-- `auth.json` key: `openrouter`
-After the key is available, this extension automatically syncs the latest plain OpenRouter model list at session start.
+After the key is available, this extension automatically syncs the latest OpenRouter model list at session start.
 ### 3) Try without installing
@@ -102,10 +134,19 @@ pi -e git:github.com/olixis/pi-openrouter-plus
 ## Commands
-- `/openrouter-sync` — fetch the latest OpenRouter model list in real time
-- `/openrouter-enrich <model-id>` — add provider and quantization variants for one specific OpenRouter model
+| Command | Description |
+|---|---|
+| `/openrouter-sync` | Fetch latest OpenRouter models and restore the plain model list |
+| `/openrouter-enrich <model-id>` | Add provider/quantization variants for one model |
+| `/openrouter-enrich` | Search → pick a model interactively (no args) |
+| `/openrouter-preview <model-id>` | Preview endpoint variants with health data (read-only) |
+| `/openrouter-preview` | Search → pick a model to preview (no args) |
+| `/openrouter-balance` | Show credit balance, remaining funds, and usage breakdown |
+| `/openrouter-status` | Show extension state: model count, enrichments, cache age |
-## Example
+## Examples
+### Enrich a model
 ```bash
 /openrouter-enrich kwaipilot/kat-coder-pro-v2
@@ -116,13 +157,45 @@ This keeps the normal OpenRouter catalog and adds variants like:
 - `StreamLake — Kwaipilot: KAT-Coder-Pro V2`
 - `AtlasCloud · fp8 — Kwaipilot: KAT-Coder-Pro V2`
+### Preview endpoints before enriching
+```bash
+/openrouter-preview deepseek/deepseek-r1
+```
+Shows provider variants with pricing and health data:
+```
+DeepSeek: DeepSeek R1 (deepseek/deepseek-r1)
+8 endpoints across 5 provider/quantization variants:
+• DeepInfra — $0.55/M in · $2.19/M out · ✅ healthy · uptime: 99% · TTFT: 450ms · 85 tok/s
+• DeepSeek — $0.55/M in · $2.19/M out · ✅ healthy · uptime: 100% · TTFT: 320ms · 120 tok/s · 📦 caching
+• Fireworks · fp8 — $0.60/M in · $2.40/M out · ⚠️ degraded · uptime: 95% · TTFT: 600ms · 60 tok/s
+```
+### Check your balance
+```bash
+/openrouter-balance
+```
 ## Behavior
-- After the extension is installed and OpenRouter auth is configured, each new pi session syncs the latest plain OpenRouter model list from OpenRouter automatically
-- Enrichment is manual and targeted to one model at a time
+- After the extension is installed and OpenRouter auth is configured, each new pi session syncs the latest OpenRouter model list automatically
+- Enrichment is intentionally simple: you enrich one selected model at a time
 - Quantization variants are exposed as separate model choices when available
 - Enriched variants are translated into OpenRouter provider routing fields at request time
 - If you want to refresh manually or go back to the default list, run `/openrouter-sync`
+- Preview output also includes search-related model info (id, name, terms, description) plus pricing and endpoint health
+## Architecture (v0.3.x improvements)
+- **Snapshot-based routing** — the stream factory captures a frozen route map at registration time, eliminating race conditions when syncing
+- **Generation counter** — overlapping sync calls are safely discarded if a newer sync has started
+- **Transactional state** — caches are not cleared before fetch; state only commits on success
+- **Auth-keyed caching** — model cache invalidates when the API key changes
+- **Fetch timeouts** — all OpenRouter API calls have a 15-second timeout via AbortController
 ## Development

package/extensions/openrouter-routing/api.ts ADDED Viewed

@@ -0,0 +1,155 @@
+import {
+  OPENROUTER_MODELS_URL,
+  OPENROUTER_BASE_URL,
+  CACHE_TTL_MS,
+  FETCH_TIMEOUT_MS,
+  type OpenRouterModel,
+  type OpenRouterEndpoint,
+  type OpenRouterEndpointsResponse,
+  type OpenRouterKeyInfo,
+  type OpenRouterCreditsInfo,
+  type EndpointCacheEntry,
+} from "./types.js";
+let cachedModels: OpenRouterModel[] | null = null;
+let cacheTimestamp = 0;
+let cachedApiKeyHash = "";
+const endpointCache = new Map<string, EndpointCacheEntry>();
+function hashKey(key?: string): string {
+  if (!key) return "";
+  return key.slice(0, 8) + key.slice(-4);
+}
+function makeHeaders(apiKey?: string): Record<string, string> {
+  const headers: Record<string, string> = {};
+  if (apiKey) headers["Authorization"] = `Bearer ${apiKey}`;
+  return headers;
+}
+async function fetchWithTimeout(url: string, options: RequestInit = {}): Promise<Response> {
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), FETCH_TIMEOUT_MS);
+  try {
+    return await fetch(url, { ...options, signal: controller.signal });
+  } finally {
+    clearTimeout(timeout);
+  }
+}
+function formatFetchError(res: Response, context: string): Error {
+  const status = res.status;
+  let hint = "";
+  if (status === 401 || status === 403) {
+    hint = " — check your OpenRouter API key";
+  } else if (status === 429) {
+    hint = " — rate limited, try again shortly";
+  } else if (status >= 500) {
+    hint = " — OpenRouter is having issues, try again later";
+  }
+  return new Error(`${context}: ${status} ${res.statusText}${hint}`);
+}
+export function invalidateModelCache(): void {
+  cachedModels = null;
+  cacheTimestamp = 0;
+}
+export function invalidateEndpointCache(modelId?: string): void {
+  if (modelId) {
+    endpointCache.delete(modelId);
+  } else {
+    endpointCache.clear();
+  }
+}
+export function invalidateAllCaches(): void {
+  invalidateModelCache();
+  invalidateEndpointCache();
+}
+export async function fetchModels(apiKey?: string, force = false): Promise<OpenRouterModel[]> {
+  const keyHash = hashKey(apiKey);
+  if (keyHash !== cachedApiKeyHash) {
+    cachedModels = null;
+    cacheTimestamp = 0;
+    cachedApiKeyHash = keyHash;
+  }
+  const now = Date.now();
+  if (!force && cachedModels && now - cacheTimestamp < CACHE_TTL_MS) {
+    return cachedModels;
+  }
+  const res = await fetchWithTimeout(OPENROUTER_MODELS_URL, {
+    headers: makeHeaders(apiKey),
+  });
+  if (!res.ok) throw formatFetchError(res, "OpenRouter models API");
+  const json = (await res.json()) as { data?: OpenRouterModel[] };
+  cachedModels = json.data || [];
+  cacheTimestamp = now;
+  return cachedModels;
+}
+function buildEndpointsUrl(modelId: string): string {
+  const path = modelId
+    .split("/")
+    .map((part) => encodeURIComponent(part))
+    .join("/");
+  return `${OPENROUTER_BASE_URL}/models/${path}/endpoints`;
+}
+export async function fetchModelEndpoints(
+  modelId: string,
+  apiKey?: string,
+  force = false,
+): Promise<OpenRouterEndpoint[]> {
+  const cached = endpointCache.get(modelId);
+  const now = Date.now();
+  if (!force && cached && now - cached.timestamp < CACHE_TTL_MS) {
+    return cached.endpoints;
+  }
+  const res = await fetchWithTimeout(buildEndpointsUrl(modelId), {
+    headers: makeHeaders(apiKey),
+  });
+  if (res.status === 404) {
+    endpointCache.set(modelId, { timestamp: now, endpoints: [] });
+    return [];
+  }
+  if (!res.ok) throw formatFetchError(res, "OpenRouter endpoints API");
+  const json = (await res.json()) as OpenRouterEndpointsResponse;
+  const endpoints = json.data?.endpoints || [];
+  endpointCache.set(modelId, { timestamp: now, endpoints });
+  return endpoints;
+}
+export async function fetchKeyInfo(apiKey: string): Promise<OpenRouterKeyInfo> {
+  const res = await fetchWithTimeout(`${OPENROUTER_BASE_URL}/key`, {
+    headers: makeHeaders(apiKey),
+  });
+  if (!res.ok) throw formatFetchError(res, "OpenRouter key API");
+  const json = (await res.json()) as { data?: OpenRouterKeyInfo };
+  return json.data || {};
+}
+export async function fetchCredits(apiKey: string): Promise<OpenRouterCreditsInfo | null> {
+  try {
+    const res = await fetchWithTimeout(`${OPENROUTER_BASE_URL}/credits`, {
+      headers: makeHeaders(apiKey),
+    });
+    if (!res.ok) return null; // requires management key, may fail with regular key
+    const json = (await res.json()) as { data?: OpenRouterCreditsInfo };
+    return json.data || null;
+  } catch {
+    return null;
+  }
+}
+export function getCachedModels(): OpenRouterModel[] | null {
+  return cachedModels;
+}