npm - torus-ai - Versions diffs - 0.1.0 → 0.2.0 - Mend

torus-ai 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +50 -17
package/dist/index.d.ts +87 -6
package/dist/index.js +224 -16
package/dist/index.js.map +1 -1
package/models/POLICY.md +58 -0
package/models/registry.json +52 -0
package/package.json +5 -4
package/src/index.ts +30 -8
package/src/providers/anthropic.ts +13 -4
package/src/providers/cascade.ts +107 -0
package/src/providers/gemini.ts +21 -11
package/src/providers/nvidia.ts +163 -0
package/src/types.ts +17 -1

package/README.md CHANGED Viewed

@@ -36,7 +36,7 @@ folder; open them to inspect the handoff.
 | Context management | [`src/context.ts`](./src/context.ts) — layered, scoped loading (Layers 0–4) |
 | `query()` streaming | [`src/index.ts`](./src/index.ts) — single-shot run yielding events |
 | Pipeline orchestration | [`src/pipeline.ts`](./src/pipeline.ts) — sequential stages + review gates |
-| Model backends | [`src/providers/`](./src/providers/) — `MockProvider`, `AnthropicProvider` |
+| Model backends | [`src/providers/`](./src/providers/) — `NvidiaProvider`, `GeminiProvider`, `AnthropicProvider`, `MockProvider` + `CascadeProvider` |
 ## Three ways to use it
@@ -77,33 +77,66 @@ const claude = new AnthropicProvider({ model: "claude-sonnet-4-6" });
 const gemini = new GeminiProvider({ model: "gemini-2.5-flash" });
 ```
-## Providers & cost routing
+## Providers & the default cascade
-Two pluggable providers implement the same `ModelProvider` interface, so they
-drop into `query()`, `runPipeline()`, or `runLoop()` interchangeably:
+Four pluggable providers implement the same `ModelProvider` interface and drop
+into `query()`, `runPipeline()`, or `runLoop()` interchangeably:
-| Provider | Package | Env | Default |
+| Provider | Package | Env | Default model |
 |---|---|---|---|
-| `AnthropicProvider` | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
+| `NvidiaProvider` | none (`fetch`) | `NVIDIA_API_KEY` | `moonshotai/kimi-k2.6` |
 | `GeminiProvider` | `@google/genai` | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
+| `AnthropicProvider` | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
+| `MockProvider` | none | — | offline |
-Both support **intelligent cost routing** — set `route: true` and each request is
-classified (fast keyword/length heuristics first, then a structured-output "judge"
-call on the *cheap* model) and sent to the cheap or expensive model accordingly.
-The classifier never throws: on any failure it falls back to the expensive model.
+**The default is a free-first cascade.** If you don't pass a provider, `query()`
+uses `createDefaultProvider()` — it tries each step and falls through on failure:
+1. **NVIDIA Kimi K2.6** — main; agentic + multimodal (image/video), free NIM endpoint
+2. **NVIDIA DeepSeek V4 Pro** — 1M-context text model, free; *skipped for image/video*
+3. **Gemini 2.5 Flash** — final fallback, different provider for resilience
 ```ts
-const provider = new GeminiProvider({ route: true });
-//        cheap: gemini-2.5-flash-lite   expensive: gemini-2.5-pro
-// (AnthropicProvider({ route: true }) → claude-haiku-4-5 vs claude-sonnet-4-6)
+import { query } from "torus-ai";          // NVIDIA_API_KEY in env → cascade default
+for await (const ev of query("Explain MoE in one line")) { /* ... */ }
+import { createDefaultProvider } from "torus-ai";
+const provider = createDefaultProvider({ mainModel: "moonshotai/kimi-k2.6" });
+```
+It's **capability-aware**: image/video requests automatically skip text-only steps.
+### Multimodal (image now, video experimental)
+Pass content blocks instead of a string. Images route to a vision-capable step
+(Kimi / Gemini / Claude); video is best-effort to Kimi.
+```ts
+await query([
+  { type: "text", text: "What's in this image?" },
+  { type: "image", url: "https://example.com/cat.png" },         // or { data, mimeType }
+]);
+```
+### Cost routing (per provider)
+Each model provider also supports `route: true` — fast heuristics, then a
+structured "judge" call on the *cheap* model, picking cheap vs expensive (never
+throws; falls back to expensive). Exposed for Claude and Gemini today:
+```ts
+new GeminiProvider({ route: true });   // gemini-2.5-flash-lite ↔ gemini-2.5-pro
+new AnthropicProvider({ route: true }); // claude-haiku-4-5 ↔ claude-sonnet-4-6
 import { getRoutingStats } from "torus-ai";
-console.log(getRoutingStats()); // { cheap, expensive, cheapPct, expensivePct, total }
 ```
-Model constants (`CHEAP_MODEL`, `EXPENSIVE_MODEL`, `GEMINI_CHEAP_MODEL`,
-`GEMINI_EXPENSIVE_MODEL`) and the low-level `selectModel` / `selectGeminiModel`
-are exported if you want to route outside the providers.
+## Keeping models fresh
+[`models/registry.json`](./models/registry.json) is the source of truth for the
+cascade; [`models/POLICY.md`](./models/POLICY.md) is the rule for what earns a slot.
+A weekly GitHub Action ([model-watch.yml](./.github/workflows/model-watch.yml))
+pulls NVIDIA's live `/v1/models`, flags new free endpoints as candidates, and opens
+a PR for human review against the policy. Run it locally with `npm run model-watch`.
 ## The stage contract (Layer 2)

package/dist/index.d.ts CHANGED Viewed

@@ -15,7 +15,20 @@ interface ToolResultBlock {
     content: string;
     isError?: boolean;
 }
-type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock;
+/**
+ * Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
+ * Image is broadly supported; video is experimental and model-dependent (routed
+ * to a video-capable model like Kimi K2.6).
+ */
+interface MediaBlock {
+    type: "image" | "video";
+    url?: string;
+    data?: string;
+    mimeType?: string;
+}
+type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
+/** True if a message list carries any image/video content (drives vision routing). */
+declare function hasMedia(messages: Message[]): boolean;
 interface Message {
     role: Role;
     content: ContentBlock[];
@@ -312,6 +325,70 @@ declare class GeminiProvider implements ModelProvider {
     generate(req: ModelRequest): Promise<ModelResponse>;
 }
+declare const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
+declare const KIMI_K2_6 = "moonshotai/kimi-k2.6";
+declare const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
+declare const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
+interface NvidiaOptions {
+    model?: string;
+    apiKey?: string;
+    baseURL?: string;
+    maxTokens?: number;
+    temperature?: number;
+}
+declare class NvidiaProvider implements ModelProvider {
+    readonly name = "nvidia";
+    private model;
+    private apiKey?;
+    private baseURL;
+    private maxTokens;
+    private temperature;
+    constructor(opts?: NvidiaOptions);
+    generate(req: ModelRequest): Promise<ModelResponse>;
+}
+interface CascadeStep {
+    provider: ModelProvider;
+    label: string;
+    vision: boolean;
+}
+interface CascadeOptions {
+    steps: CascadeStep[];
+    /** Called when a step is skipped or fails and the cascade falls through. */
+    onFallback?: (info: {
+        from: string;
+        reason: string;
+        needsVision: boolean;
+    }) => void;
+}
+declare class CascadeProvider implements ModelProvider {
+    readonly name = "cascade";
+    private steps;
+    private onFallback?;
+    constructor(opts: CascadeOptions);
+    generate(req: ModelRequest): Promise<ModelResponse>;
+}
+interface DefaultProviderOptions {
+    nvidiaApiKey?: string;
+    googleApiKey?: string;
+    /** Override the main NVIDIA model (default Kimi K2.6). */
+    mainModel?: string;
+    /** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
+    secondaryModel?: string;
+    /** Gemini model used as the final fallback option (default gemini-2.5-flash). */
+    geminiModel?: string;
+    onFallback?: CascadeOptions["onFallback"];
+}
+/**
+ * The SDK's recommended default: free NVIDIA endpoints first, Google as one
+ * fallback option.
+ *
+ *   1. NVIDIA Kimi K2.6        — main; agentic + multimodal (image/video)
+ *   2. NVIDIA DeepSeek V4 Pro  — text-only; skipped for image/video requests
+ *   3. Gemini 2.5 Flash        — final fallback; multimodal
+ */
+declare function createDefaultProvider(opts?: DefaultProviderOptions): CascadeProvider;
 declare const CHEAP_MODEL = "claude-haiku-4-5";
 declare const EXPENSIVE_MODEL = "claude-sonnet-4-6";
 declare const GEMINI_CHEAP_MODEL = "gemini-2.5-flash-lite";
@@ -354,7 +431,8 @@ declare function getRoutingStats(): RoutingStats;
 declare function latestUserText(messages: Message[]): string;
 interface QueryOptions {
-    provider: ModelProvider;
+    /** Defaults to the NVIDIA-first cascade (Kimi K2.6 → DeepSeek V4 → Gemini). */
+    provider?: ModelProvider;
     system?: string;
     mcpServers?: SdkMcpServer[];
     includeBuiltins?: boolean;
@@ -364,10 +442,13 @@ interface QueryOptions {
 }
 /**
  * Single-shot agent run (no pipeline). Mirrors the Claude Agent SDK's streaming
- * `query()`: yields events as they happen and a final `result` event.
+ * `query()`: yields events as they happen and a final `result` event. The prompt
+ * may be a string or an array of content blocks (e.g. text + image for vision).
  *
- *   for await (const ev of query("Summarize X", { provider, mcpServers: [srv] })) { ... }
+ *   for await (const ev of query("Summarize X", { mcpServers: [srv] })) { ... }
+ *   for await (const ev of query([{ type: "text", text: "What's this?" },
+ *                                 { type: "image", url: "https://..." }])) { ... }
  */
-declare function query(prompt: string, options: QueryOptions): AsyncGenerator<AgentEvent>;
+declare function query(prompt: string | ContentBlock[], options?: QueryOptions): AsyncGenerator<AgentEvent>;
-export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type Complexity, type ContentBlock, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, type LoadedContext, type LoopOptions, type LoopResult, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createSdkMcpServer, fastHeuristic, getRoutingStats, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
+export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type CascadeOptions, CascadeProvider, type CascadeStep, type Complexity, type ContentBlock, DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, type DefaultProviderOptions, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, KIMI_K2_6, type LoadedContext, type LoopOptions, type LoopResult, type MediaBlock, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, NVIDIA_BASE_URL, type NvidiaOptions, NvidiaProvider, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createDefaultProvider, createSdkMcpServer, fastHeuristic, getRoutingStats, hasMedia, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };

package/dist/index.js CHANGED Viewed

@@ -1,3 +1,8 @@
+// src/types.ts
+function hasMedia(messages) {
+  return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
+}
 // src/tools.ts
 function tool(name, description, inputSchema, handler) {
   return { name, description, inputSchema, handler };
@@ -591,10 +596,17 @@ var AnthropicProvider = class {
 function toApiMessage(m) {
   return {
     role: m.role,
-    content: m.content.map((b) => {
-      if (b.type === "text") return { type: "text", text: b.text };
-      if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
-      return { type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError };
+    content: m.content.flatMap((b) => {
+      if (b.type === "text") return [{ type: "text", text: b.text }];
+      if (b.type === "tool_use") return [{ type: "tool_use", id: b.id, name: b.name, input: b.input }];
+      if (b.type === "tool_result") {
+        return [{ type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError }];
+      }
+      if (b.type === "image") {
+        const source = b.data ? { type: "base64", media_type: b.mimeType ?? "image/png", data: b.data } : { type: "url", url: b.url };
+        return [{ type: "image", source }];
+      }
+      return [];
     })
   };
 }
@@ -667,27 +679,215 @@ function toolUseNames(messages) {
 function toGeminiContent(m, idToName) {
   const role = m.role === "assistant" ? "model" : "user";
   const parts = m.content.map((b) => {
-    if (b.type === "text") return { text: b.text };
-    if (b.type === "tool_use") return { functionCall: { id: b.id, name: b.name, args: b.input } };
-    return {
-      functionResponse: {
-        id: b.toolUseId,
-        name: idToName.get(b.toolUseId) ?? b.toolUseId,
-        response: b.isError ? { error: b.content } : { result: b.content }
-      }
-    };
+    switch (b.type) {
+      case "text":
+        return { text: b.text };
+      case "image":
+      case "video":
+        return b.data ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } } : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
+      case "tool_use":
+        return { functionCall: { id: b.id, name: b.name, args: b.input } };
+      case "tool_result":
+        return {
+          functionResponse: {
+            id: b.toolUseId,
+            name: idToName.get(b.toolUseId) ?? b.toolUseId,
+            response: b.isError ? { error: b.content } : { result: b.content }
+          }
+        };
+    }
   });
   return { role, parts };
 }
+// src/providers/nvidia.ts
+var NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
+var KIMI_K2_6 = "moonshotai/kimi-k2.6";
+var DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
+var DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
+var NvidiaProvider = class {
+  name = "nvidia";
+  model;
+  apiKey;
+  baseURL;
+  maxTokens;
+  temperature;
+  constructor(opts = {}) {
+    this.model = opts.model ?? KIMI_K2_6;
+    this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
+    this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
+    this.maxTokens = opts.maxTokens ?? 2048;
+    this.temperature = opts.temperature ?? 0.6;
+  }
+  async generate(req) {
+    if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
+    const body = {
+      model: this.model,
+      messages: toOpenAIMessages(req),
+      max_tokens: this.maxTokens,
+      temperature: this.temperature
+    };
+    if (req.tools.length) {
+      body.tools = req.tools.map((t) => ({
+        type: "function",
+        function: { name: t.name, description: t.description, parameters: t.inputSchema }
+      }));
+      body.tool_choice = "auto";
+    }
+    const res = await fetch(`${this.baseURL}/chat/completions`, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${this.apiKey}`,
+        "Content-Type": "application/json",
+        Accept: "application/json"
+      },
+      body: JSON.stringify(body)
+    });
+    if (!res.ok) {
+      throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
+    }
+    const json = await res.json();
+    const choice = json.choices?.[0];
+    const msg = choice?.message ?? {};
+    const content = [];
+    if (typeof msg.content === "string" && msg.content.trim()) {
+      content.push({ type: "text", text: msg.content });
+    }
+    const toolCalls = msg.tool_calls ?? [];
+    for (const tc of toolCalls) {
+      content.push({
+        type: "tool_use",
+        id: tc.id ?? "",
+        name: tc.function?.name ?? "",
+        input: safeParse(tc.function?.arguments)
+      });
+    }
+    if (content.length === 0) content.push({ type: "text", text: "" });
+    const stopReason = choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
+    return { content, stopReason };
+  }
+};
+function safeParse(args) {
+  if (typeof args !== "string") return args ?? {};
+  try {
+    return JSON.parse(args);
+  } catch {
+    return {};
+  }
+}
+function toOpenAIMessages(req) {
+  const out = [];
+  if (req.system) out.push({ role: "system", content: req.system });
+  for (const m of req.messages) {
+    if (m.role === "user") {
+      for (const b of m.content) {
+        if (b.type === "tool_result") {
+          out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
+        }
+      }
+      const parts = m.content.filter(
+        (b) => b.type === "text" || b.type === "image" || b.type === "video"
+      );
+      if (parts.length) {
+        const multimodal = parts.some((b) => b.type !== "text");
+        out.push({
+          role: "user",
+          content: multimodal ? parts.map(toOpenAIPart) : parts.map((b) => b.text).join("\n")
+        });
+      }
+    } else {
+      const text = m.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
+      const toolUses = m.content.filter((b) => b.type === "tool_use");
+      const msg = { role: "assistant", content: text || null };
+      if (toolUses.length) {
+        msg.tool_calls = toolUses.map((b) => ({
+          id: b.id,
+          type: "function",
+          function: { name: b.name, arguments: JSON.stringify(b.input) }
+        }));
+      }
+      out.push(msg);
+    }
+  }
+  return out;
+}
+function toOpenAIPart(b) {
+  if (b.type === "text") return { type: "text", text: b.text };
+  const media = b;
+  const url = media.url ?? (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
+  if (media.type === "video") return { type: "video_url", video_url: { url } };
+  return { type: "image_url", image_url: { url } };
+}
+// src/providers/cascade.ts
+var CascadeProvider = class {
+  name = "cascade";
+  steps;
+  onFallback;
+  constructor(opts) {
+    if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
+    this.steps = opts.steps;
+    this.onFallback = opts.onFallback;
+  }
+  async generate(req) {
+    const needsVision = hasMedia(req.messages);
+    const eligible = this.steps.filter((s) => !needsVision || s.vision);
+    if (!eligible.length) {
+      throw new Error("Cascade: request needs vision but no step supports image/video input.");
+    }
+    let lastErr;
+    for (const step of eligible) {
+      try {
+        return await step.provider.generate(req);
+      } catch (err) {
+        lastErr = err;
+        this.onFallback?.({
+          from: step.label,
+          reason: err.message?.slice(0, 200) ?? "unknown",
+          needsVision
+        });
+      }
+    }
+    throw new Error(`Cascade exhausted all steps. Last error: ${lastErr?.message}`);
+  }
+};
+function createDefaultProvider(opts = {}) {
+  const main = opts.mainModel ?? KIMI_K2_6;
+  const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
+  const gemini = opts.geminiModel ?? "gemini-2.5-flash";
+  return new CascadeProvider({
+    onFallback: opts.onFallback ?? ((info) => console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
+    steps: [
+      {
+        provider: new NvidiaProvider({ model: main, apiKey: opts.nvidiaApiKey }),
+        label: `nvidia:${main}`,
+        vision: true
+        // Kimi K2.6 accepts image + video
+      },
+      {
+        provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
+        label: `nvidia:${secondary}`,
+        vision: false
+        // DeepSeek V4 is text-only
+      },
+      {
+        provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
+        label: `gemini:${gemini}`,
+        vision: true
+      }
+    ]
+  });
+}
 // src/index.ts
-async function* query(prompt, options) {
+async function* query(prompt, options = {}) {
   const registry = new ToolRegistry();
   if (options.includeBuiltins ?? true) registry.addBuiltins(builtinTools);
   for (const s of options.mcpServers ?? []) registry.addServer(s);
-  const messages = [{ role: "user", content: [{ type: "text", text: prompt }] }];
+  const content = typeof prompt === "string" ? [{ type: "text", text: prompt }] : prompt;
+  const messages = [{ role: "user", content }];
   const result = yield* runLoop({
-    provider: options.provider,
+    provider: options.provider ?? createDefaultProvider(),
     registry,
     permissions: new PermissionEngine(options.permissions ?? {}),
     system: options.system ?? "You are a helpful agent.",
@@ -700,19 +900,27 @@ async function* query(prompt, options) {
 export {
   AnthropicProvider,
   CHEAP_MODEL,
+  CascadeProvider,
+  DEEPSEEK_V4_FLASH,
+  DEEPSEEK_V4_PRO,
   EXPENSIVE_MODEL,
   GEMINI_CHEAP_MODEL,
   GEMINI_EXPENSIVE_MODEL,
   GeminiProvider,
+  KIMI_K2_6,
   MockProvider,
+  NVIDIA_BASE_URL,
+  NvidiaProvider,
   PermissionEngine,
   ToolRegistry,
   builtinTools,
   classifyComplexity,
   classifyComplexityGemini,
+  createDefaultProvider,
   createSdkMcpServer,
   fastHeuristic,
   getRoutingStats,
+  hasMedia,
   judgeComplexity,
   judgeComplexityGemini,
   latestUserText,