npm - torus-ai - Versions diffs - 0.1.0 → 0.2.1 - Mend

torus-ai 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +56 -17
package/dist/index.d.ts +93 -6
package/dist/index.js +225 -16
package/dist/index.js.map +1 -1
package/models/POLICY.md +58 -0
package/models/registry.json +63 -0
package/package.json +5 -4
package/src/index.ts +31 -8
package/src/providers/anthropic.ts +13 -4
package/src/providers/cascade.ts +118 -0
package/src/providers/gemini.ts +21 -11
package/src/providers/nvidia.ts +164 -0
package/src/types.ts +17 -1

package/src/providers/anthropic.ts CHANGED Viewed

@@ -85,10 +85,19 @@ export class AnthropicProvider implements ModelProvider {
 function toApiMessage(m: Message): any {
   return {
     role: m.role,
-    content: m.content.map((b) => {
-      if (b.type === "text") return { type: "text", text: b.text };
-      if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
-      return { type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError };
+    content: m.content.flatMap((b): any[] => {
+      if (b.type === "text") return [{ type: "text", text: b.text }];
+      if (b.type === "tool_use") return [{ type: "tool_use", id: b.id, name: b.name, input: b.input }];
+      if (b.type === "tool_result") {
+        return [{ type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError }];
+      }
+      if (b.type === "image") {
+        const source = b.data
+          ? { type: "base64", media_type: b.mimeType ?? "image/png", data: b.data }
+          : { type: "url", url: b.url };
+        return [{ type: "image", source }];
+      }
+      return []; // video unsupported on Anthropic — drop the block
     }),
   };
 }

package/src/providers/cascade.ts ADDED Viewed

@@ -0,0 +1,118 @@
+import type { ModelProvider, ModelRequest, ModelResponse } from "../types.ts";
+import { DEEPSEEK_V4_PRO, KIMI_K2_6, LLAMA_VISION, NvidiaProvider } from "./nvidia.ts";
+import { GeminiProvider } from "./gemini.ts";
+// Orchestration: try a prioritized list of (provider, model) steps, falling
+// through to the next on failure (rate limit, error, or capability mismatch).
+// Capability-aware: image requests only go to vision steps; video requests only
+// to video steps — text-only models (Kimi, DeepSeek) are skipped for those.
+export interface CascadeStep {
+  provider: ModelProvider;
+  label: string; // e.g. "nvidia:kimi-k2.6"
+  vision: boolean; // accepts image input?
+  video?: boolean; // accepts video input? (default false)
+}
+export interface CascadeOptions {
+  steps: CascadeStep[];
+  /** Called when a step is skipped or fails and the cascade falls through. */
+  onFallback?: (info: { from: string; reason: string; needsVision: boolean }) => void;
+}
+export class CascadeProvider implements ModelProvider {
+  readonly name = "cascade";
+  private steps: CascadeStep[];
+  private onFallback?: CascadeOptions["onFallback"];
+  constructor(opts: CascadeOptions) {
+    if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
+    this.steps = opts.steps;
+    this.onFallback = opts.onFallback;
+  }
+  async generate(req: ModelRequest): Promise<ModelResponse> {
+    const has = (t: "image" | "video") =>
+      req.messages.some((m) => m.content.some((b) => b.type === t));
+    const needsVideo = has("video");
+    const needsImage = has("image");
+    const needsVision = needsImage || needsVideo;
+    const eligible = needsVideo
+      ? this.steps.filter((s) => s.video)
+      : needsImage
+        ? this.steps.filter((s) => s.vision)
+        : this.steps;
+    if (!eligible.length) {
+      throw new Error(
+        `Cascade: request needs ${needsVideo ? "video" : "image"} input but no step supports it.`,
+      );
+    }
+    let lastErr: unknown;
+    for (const step of eligible) {
+      try {
+        return await step.provider.generate(req);
+      } catch (err) {
+        lastErr = err;
+        this.onFallback?.({
+          from: step.label,
+          reason: (err as Error).message?.slice(0, 200) ?? "unknown",
+          needsVision,
+        });
+      }
+    }
+    throw new Error(`Cascade exhausted all steps. Last error: ${(lastErr as Error)?.message}`);
+  }
+}
+export interface DefaultProviderOptions {
+  nvidiaApiKey?: string;
+  googleApiKey?: string;
+  /** Override the main NVIDIA model (default Kimi K2.6). */
+  mainModel?: string;
+  /** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
+  secondaryModel?: string;
+  /** NVIDIA vision model for image requests (default llama-3.2-90b-vision). */
+  visionModel?: string;
+  /** Gemini model used as the final fallback option (default gemini-2.5-flash). */
+  geminiModel?: string;
+  onFallback?: CascadeOptions["onFallback"];
+}
+/**
+ * The SDK's recommended default: free NVIDIA endpoints first, Google as one
+ * fallback option. Capability-aware — image/video requests skip the text-only
+ * steps automatically.
+ *
+ *   1. NVIDIA Kimi K2.6                  — main; agentic + tools (text)
+ *   2. NVIDIA DeepSeek V4 Pro            — 1M-ctx text; skipped for media
+ *   3. NVIDIA Llama-3.2-90B-Vision       — image requests
+ *   4. Gemini 2.5 Flash                  — final fallback; image + video
+ */
+export function createDefaultProvider(opts: DefaultProviderOptions = {}): CascadeProvider {
+  const main = opts.mainModel ?? KIMI_K2_6;
+  const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
+  const vision = opts.visionModel ?? LLAMA_VISION;
+  const gemini = opts.geminiModel ?? "gemini-2.5-flash";
+  const nv = (model: string) => new NvidiaProvider({ model, apiKey: opts.nvidiaApiKey });
+  return new CascadeProvider({
+    onFallback:
+      opts.onFallback ??
+      ((info) =>
+        console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
+    steps: [
+      { provider: nv(main), label: `nvidia:${main}`, vision: false, video: false },
+      { provider: nv(secondary), label: `nvidia:${secondary}`, vision: false, video: false },
+      { provider: nv(vision), label: `nvidia:${vision}`, vision: true, video: false },
+      {
+        provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
+        label: `gemini:${gemini}`,
+        vision: true,
+        video: true,
+      },
+    ],
+  });
+}

package/src/providers/gemini.ts CHANGED Viewed

@@ -104,17 +104,27 @@ function toolUseNames(messages: Message[]): Map<string, string> {
 /** Translate one of our Messages into a Gemini `Content` (role + parts). */
 function toGeminiContent(m: Message, idToName: Map<string, string>): any {
   const role = m.role === "assistant" ? "model" : "user";
-  const parts = m.content.map((b) => {
-    if (b.type === "text") return { text: b.text };
-    if (b.type === "tool_use") return { functionCall: { id: b.id, name: b.name, args: b.input } };
-    // tool_result -> functionResponse
-    return {
-      functionResponse: {
-        id: b.toolUseId,
-        name: idToName.get(b.toolUseId) ?? b.toolUseId,
-        response: b.isError ? { error: b.content } : { result: b.content },
-      },
-    };
+  const parts = m.content.map((b): any => {
+    switch (b.type) {
+      case "text":
+        return { text: b.text };
+      case "image":
+      case "video":
+        // base64 -> inlineData; remote URL -> fileData
+        return b.data
+          ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } }
+          : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
+      case "tool_use":
+        return { functionCall: { id: b.id, name: b.name, args: b.input } };
+      case "tool_result":
+        return {
+          functionResponse: {
+            id: b.toolUseId,
+            name: idToName.get(b.toolUseId) ?? b.toolUseId,
+            response: b.isError ? { error: b.content } : { result: b.content },
+          },
+        };
+    }
   });
   return { role, parts };
 }

package/src/providers/nvidia.ts ADDED Viewed

@@ -0,0 +1,164 @@
+import type {
+  ContentBlock,
+  MediaBlock,
+  ModelProvider,
+  ModelRequest,
+  ModelResponse,
+} from "../types.ts";
+// NVIDIA NIM exposes an OpenAI-compatible Chat Completions API, so this provider
+// talks to it with plain `fetch` — no extra SDK dependency. Free hosted endpoints
+// (e.g. Kimi K2.6, DeepSeek V4) are the SDK's default models via the cascade.
+export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
+// Exact IDs confirmed against GET /v1/models.
+export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools, agentic — text-only on NIM (verified)
+export const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro"; // 1M ctx, tools, text-only
+export const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash"; // faster/cheaper, text-only
+export const LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct"; // free NVIDIA vision model (image), verified
+export interface NvidiaOptions {
+  model?: string;
+  apiKey?: string;
+  baseURL?: string;
+  maxTokens?: number;
+  temperature?: number;
+}
+export class NvidiaProvider implements ModelProvider {
+  readonly name = "nvidia";
+  private model: string;
+  private apiKey?: string;
+  private baseURL: string;
+  private maxTokens: number;
+  private temperature: number;
+  constructor(opts: NvidiaOptions = {}) {
+    this.model = opts.model ?? KIMI_K2_6;
+    this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
+    this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
+    this.maxTokens = opts.maxTokens ?? 2048;
+    this.temperature = opts.temperature ?? 0.2; // low default for deterministic agent behavior
+  }
+  async generate(req: ModelRequest): Promise<ModelResponse> {
+    if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
+    const body: Record<string, unknown> = {
+      model: this.model,
+      messages: toOpenAIMessages(req),
+      max_tokens: this.maxTokens,
+      temperature: this.temperature,
+    };
+    if (req.tools.length) {
+      body.tools = req.tools.map((t) => ({
+        type: "function",
+        function: { name: t.name, description: t.description, parameters: t.inputSchema },
+      }));
+      body.tool_choice = "auto";
+    }
+    const res = await fetch(`${this.baseURL}/chat/completions`, {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${this.apiKey}`,
+        "Content-Type": "application/json",
+        Accept: "application/json",
+      },
+      body: JSON.stringify(body),
+    });
+    if (!res.ok) {
+      throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
+    }
+    const json: any = await res.json();
+    const choice = json.choices?.[0];
+    const msg = choice?.message ?? {};
+    const content: ContentBlock[] = [];
+    if (typeof msg.content === "string" && msg.content.trim()) {
+      content.push({ type: "text", text: msg.content });
+    }
+    const toolCalls: any[] = msg.tool_calls ?? [];
+    for (const tc of toolCalls) {
+      content.push({
+        type: "tool_use",
+        id: tc.id ?? "",
+        name: tc.function?.name ?? "",
+        input: safeParse(tc.function?.arguments),
+      });
+    }
+    if (content.length === 0) content.push({ type: "text", text: "" });
+    const stopReason =
+      choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
+    return { content, stopReason };
+  }
+}
+function safeParse(args: unknown): Record<string, unknown> {
+  if (typeof args !== "string") return (args as Record<string, unknown>) ?? {};
+  try {
+    return JSON.parse(args);
+  } catch {
+    return {};
+  }
+}
+/** Map our Messages into OpenAI-style chat messages (tool calls + tool results + media). */
+function toOpenAIMessages(req: ModelRequest): any[] {
+  const out: any[] = [];
+  if (req.system) out.push({ role: "system", content: req.system });
+  for (const m of req.messages) {
+    if (m.role === "user") {
+      // tool_result blocks become individual {role:"tool"} messages
+      for (const b of m.content) {
+        if (b.type === "tool_result") {
+          out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
+        }
+      }
+      const parts = m.content.filter(
+        (b) => b.type === "text" || b.type === "image" || b.type === "video",
+      );
+      if (parts.length) {
+        const multimodal = parts.some((b) => b.type !== "text");
+        out.push({
+          role: "user",
+          content: multimodal
+            ? parts.map(toOpenAIPart)
+            : parts.map((b) => (b as { text: string }).text).join("\n"),
+        });
+      }
+    } else {
+      // assistant
+      const text = m.content
+        .filter((b) => b.type === "text")
+        .map((b) => (b as { text: string }).text)
+        .join("\n");
+      const toolUses = m.content.filter((b) => b.type === "tool_use");
+      const msg: any = { role: "assistant", content: text || null };
+      if (toolUses.length) {
+        msg.tool_calls = toolUses.map((b: any) => ({
+          id: b.id,
+          type: "function",
+          function: { name: b.name, arguments: JSON.stringify(b.input) },
+        }));
+      }
+      out.push(msg);
+    }
+  }
+  return out;
+}
+function toOpenAIPart(b: ContentBlock): any {
+  if (b.type === "text") return { type: "text", text: b.text };
+  const media = b as MediaBlock;
+  const url =
+    media.url ??
+    (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
+  if (media.type === "video") return { type: "video_url", video_url: { url } }; // experimental
+  return { type: "image_url", image_url: { url } };
+}

package/src/types.ts CHANGED Viewed

@@ -22,7 +22,23 @@ export interface ToolResultBlock {
   content: string;
   isError?: boolean;
 }
-export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock;
+/**
+ * Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
+ * Image is broadly supported; video is experimental and model-dependent (routed
+ * to a video-capable model like Kimi K2.6).
+ */
+export interface MediaBlock {
+  type: "image" | "video";
+  url?: string; // remote URL or a data: URL
+  data?: string; // raw base64 (paired with mimeType)
+  mimeType?: string; // e.g. "image/png", "image/jpeg", "video/mp4"
+}
+export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
+/** True if a message list carries any image/video content (drives vision routing). */
+export function hasMedia(messages: Message[]): boolean {
+  return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
+}
 export interface Message {
   role: Role;