torus-ai 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,10 +85,19 @@ export class AnthropicProvider implements ModelProvider {
85
85
  function toApiMessage(m: Message): any {
86
86
  return {
87
87
  role: m.role,
88
- content: m.content.map((b) => {
89
- if (b.type === "text") return { type: "text", text: b.text };
90
- if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
91
- return { type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError };
88
+ content: m.content.flatMap((b): any[] => {
89
+ if (b.type === "text") return [{ type: "text", text: b.text }];
90
+ if (b.type === "tool_use") return [{ type: "tool_use", id: b.id, name: b.name, input: b.input }];
91
+ if (b.type === "tool_result") {
92
+ return [{ type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError }];
93
+ }
94
+ if (b.type === "image") {
95
+ const source = b.data
96
+ ? { type: "base64", media_type: b.mimeType ?? "image/png", data: b.data }
97
+ : { type: "url", url: b.url };
98
+ return [{ type: "image", source }];
99
+ }
100
+ return []; // video unsupported on Anthropic — drop the block
92
101
  }),
93
102
  };
94
103
  }
@@ -0,0 +1,118 @@
1
+ import type { ModelProvider, ModelRequest, ModelResponse } from "../types.ts";
2
+ import { DEEPSEEK_V4_PRO, KIMI_K2_6, LLAMA_VISION, NvidiaProvider } from "./nvidia.ts";
3
+ import { GeminiProvider } from "./gemini.ts";
4
+
5
+ // Orchestration: try a prioritized list of (provider, model) steps, falling
6
+ // through to the next on failure (rate limit, error, or capability mismatch).
7
+ // Capability-aware: image requests only go to vision steps; video requests only
8
+ // to video steps — text-only models (Kimi, DeepSeek) are skipped for those.
9
+
10
+ export interface CascadeStep {
11
+ provider: ModelProvider;
12
+ label: string; // e.g. "nvidia:kimi-k2.6"
13
+ vision: boolean; // accepts image input?
14
+ video?: boolean; // accepts video input? (default false)
15
+ }
16
+
17
+ export interface CascadeOptions {
18
+ steps: CascadeStep[];
19
+ /** Called when a step is skipped or fails and the cascade falls through. */
20
+ onFallback?: (info: { from: string; reason: string; needsVision: boolean }) => void;
21
+ }
22
+
23
+ export class CascadeProvider implements ModelProvider {
24
+ readonly name = "cascade";
25
+ private steps: CascadeStep[];
26
+ private onFallback?: CascadeOptions["onFallback"];
27
+
28
+ constructor(opts: CascadeOptions) {
29
+ if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
30
+ this.steps = opts.steps;
31
+ this.onFallback = opts.onFallback;
32
+ }
33
+
34
+ async generate(req: ModelRequest): Promise<ModelResponse> {
35
+ const has = (t: "image" | "video") =>
36
+ req.messages.some((m) => m.content.some((b) => b.type === t));
37
+ const needsVideo = has("video");
38
+ const needsImage = has("image");
39
+ const needsVision = needsImage || needsVideo;
40
+
41
+ const eligible = needsVideo
42
+ ? this.steps.filter((s) => s.video)
43
+ : needsImage
44
+ ? this.steps.filter((s) => s.vision)
45
+ : this.steps;
46
+
47
+ if (!eligible.length) {
48
+ throw new Error(
49
+ `Cascade: request needs ${needsVideo ? "video" : "image"} input but no step supports it.`,
50
+ );
51
+ }
52
+
53
+ let lastErr: unknown;
54
+ for (const step of eligible) {
55
+ try {
56
+ return await step.provider.generate(req);
57
+ } catch (err) {
58
+ lastErr = err;
59
+ this.onFallback?.({
60
+ from: step.label,
61
+ reason: (err as Error).message?.slice(0, 200) ?? "unknown",
62
+ needsVision,
63
+ });
64
+ }
65
+ }
66
+ throw new Error(`Cascade exhausted all steps. Last error: ${(lastErr as Error)?.message}`);
67
+ }
68
+ }
69
+
70
+ export interface DefaultProviderOptions {
71
+ nvidiaApiKey?: string;
72
+ googleApiKey?: string;
73
+ /** Override the main NVIDIA model (default Kimi K2.6). */
74
+ mainModel?: string;
75
+ /** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
76
+ secondaryModel?: string;
77
+ /** NVIDIA vision model for image requests (default llama-3.2-90b-vision). */
78
+ visionModel?: string;
79
+ /** Gemini model used as the final fallback option (default gemini-2.5-flash). */
80
+ geminiModel?: string;
81
+ onFallback?: CascadeOptions["onFallback"];
82
+ }
83
+
84
+ /**
85
+ * The SDK's recommended default: free NVIDIA endpoints first, Google as one
86
+ * fallback option. Capability-aware — image/video requests skip the text-only
87
+ * steps automatically.
88
+ *
89
+ * 1. NVIDIA Kimi K2.6 — main; agentic + tools (text)
90
+ * 2. NVIDIA DeepSeek V4 Pro — 1M-ctx text; skipped for media
91
+ * 3. NVIDIA Llama-3.2-90B-Vision — image requests
92
+ * 4. Gemini 2.5 Flash — final fallback; image + video
93
+ */
94
+ export function createDefaultProvider(opts: DefaultProviderOptions = {}): CascadeProvider {
95
+ const main = opts.mainModel ?? KIMI_K2_6;
96
+ const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
97
+ const vision = opts.visionModel ?? LLAMA_VISION;
98
+ const gemini = opts.geminiModel ?? "gemini-2.5-flash";
99
+ const nv = (model: string) => new NvidiaProvider({ model, apiKey: opts.nvidiaApiKey });
100
+
101
+ return new CascadeProvider({
102
+ onFallback:
103
+ opts.onFallback ??
104
+ ((info) =>
105
+ console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
106
+ steps: [
107
+ { provider: nv(main), label: `nvidia:${main}`, vision: false, video: false },
108
+ { provider: nv(secondary), label: `nvidia:${secondary}`, vision: false, video: false },
109
+ { provider: nv(vision), label: `nvidia:${vision}`, vision: true, video: false },
110
+ {
111
+ provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
112
+ label: `gemini:${gemini}`,
113
+ vision: true,
114
+ video: true,
115
+ },
116
+ ],
117
+ });
118
+ }
@@ -104,17 +104,27 @@ function toolUseNames(messages: Message[]): Map<string, string> {
104
104
  /** Translate one of our Messages into a Gemini `Content` (role + parts). */
105
105
  function toGeminiContent(m: Message, idToName: Map<string, string>): any {
106
106
  const role = m.role === "assistant" ? "model" : "user";
107
- const parts = m.content.map((b) => {
108
- if (b.type === "text") return { text: b.text };
109
- if (b.type === "tool_use") return { functionCall: { id: b.id, name: b.name, args: b.input } };
110
- // tool_result -> functionResponse
111
- return {
112
- functionResponse: {
113
- id: b.toolUseId,
114
- name: idToName.get(b.toolUseId) ?? b.toolUseId,
115
- response: b.isError ? { error: b.content } : { result: b.content },
116
- },
117
- };
107
+ const parts = m.content.map((b): any => {
108
+ switch (b.type) {
109
+ case "text":
110
+ return { text: b.text };
111
+ case "image":
112
+ case "video":
113
+ // base64 -> inlineData; remote URL -> fileData
114
+ return b.data
115
+ ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } }
116
+ : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
117
+ case "tool_use":
118
+ return { functionCall: { id: b.id, name: b.name, args: b.input } };
119
+ case "tool_result":
120
+ return {
121
+ functionResponse: {
122
+ id: b.toolUseId,
123
+ name: idToName.get(b.toolUseId) ?? b.toolUseId,
124
+ response: b.isError ? { error: b.content } : { result: b.content },
125
+ },
126
+ };
127
+ }
118
128
  });
119
129
  return { role, parts };
120
130
  }
@@ -0,0 +1,164 @@
1
+ import type {
2
+ ContentBlock,
3
+ MediaBlock,
4
+ ModelProvider,
5
+ ModelRequest,
6
+ ModelResponse,
7
+ } from "../types.ts";
8
+
9
+ // NVIDIA NIM exposes an OpenAI-compatible Chat Completions API, so this provider
10
+ // talks to it with plain `fetch` — no extra SDK dependency. Free hosted endpoints
11
+ // (e.g. Kimi K2.6, DeepSeek V4) are the SDK's default models via the cascade.
12
+
13
+ export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
14
+
15
+ // Exact IDs confirmed against GET /v1/models.
16
+ export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools, agentic — text-only on NIM (verified)
17
+ export const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro"; // 1M ctx, tools, text-only
18
+ export const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash"; // faster/cheaper, text-only
19
+ export const LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct"; // free NVIDIA vision model (image), verified
20
+
21
+ export interface NvidiaOptions {
22
+ model?: string;
23
+ apiKey?: string;
24
+ baseURL?: string;
25
+ maxTokens?: number;
26
+ temperature?: number;
27
+ }
28
+
29
+ export class NvidiaProvider implements ModelProvider {
30
+ readonly name = "nvidia";
31
+ private model: string;
32
+ private apiKey?: string;
33
+ private baseURL: string;
34
+ private maxTokens: number;
35
+ private temperature: number;
36
+
37
+ constructor(opts: NvidiaOptions = {}) {
38
+ this.model = opts.model ?? KIMI_K2_6;
39
+ this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
40
+ this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
41
+ this.maxTokens = opts.maxTokens ?? 2048;
42
+ this.temperature = opts.temperature ?? 0.2; // low default for deterministic agent behavior
43
+ }
44
+
45
+ async generate(req: ModelRequest): Promise<ModelResponse> {
46
+ if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
47
+
48
+ const body: Record<string, unknown> = {
49
+ model: this.model,
50
+ messages: toOpenAIMessages(req),
51
+ max_tokens: this.maxTokens,
52
+ temperature: this.temperature,
53
+ };
54
+ if (req.tools.length) {
55
+ body.tools = req.tools.map((t) => ({
56
+ type: "function",
57
+ function: { name: t.name, description: t.description, parameters: t.inputSchema },
58
+ }));
59
+ body.tool_choice = "auto";
60
+ }
61
+
62
+ const res = await fetch(`${this.baseURL}/chat/completions`, {
63
+ method: "POST",
64
+ headers: {
65
+ Authorization: `Bearer ${this.apiKey}`,
66
+ "Content-Type": "application/json",
67
+ Accept: "application/json",
68
+ },
69
+ body: JSON.stringify(body),
70
+ });
71
+
72
+ if (!res.ok) {
73
+ throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
74
+ }
75
+
76
+ const json: any = await res.json();
77
+ const choice = json.choices?.[0];
78
+ const msg = choice?.message ?? {};
79
+
80
+ const content: ContentBlock[] = [];
81
+ if (typeof msg.content === "string" && msg.content.trim()) {
82
+ content.push({ type: "text", text: msg.content });
83
+ }
84
+ const toolCalls: any[] = msg.tool_calls ?? [];
85
+ for (const tc of toolCalls) {
86
+ content.push({
87
+ type: "tool_use",
88
+ id: tc.id ?? "",
89
+ name: tc.function?.name ?? "",
90
+ input: safeParse(tc.function?.arguments),
91
+ });
92
+ }
93
+ if (content.length === 0) content.push({ type: "text", text: "" });
94
+
95
+ const stopReason =
96
+ choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
97
+ return { content, stopReason };
98
+ }
99
+ }
100
+
101
+ function safeParse(args: unknown): Record<string, unknown> {
102
+ if (typeof args !== "string") return (args as Record<string, unknown>) ?? {};
103
+ try {
104
+ return JSON.parse(args);
105
+ } catch {
106
+ return {};
107
+ }
108
+ }
109
+
110
+ /** Map our Messages into OpenAI-style chat messages (tool calls + tool results + media). */
111
+ function toOpenAIMessages(req: ModelRequest): any[] {
112
+ const out: any[] = [];
113
+ if (req.system) out.push({ role: "system", content: req.system });
114
+
115
+ for (const m of req.messages) {
116
+ if (m.role === "user") {
117
+ // tool_result blocks become individual {role:"tool"} messages
118
+ for (const b of m.content) {
119
+ if (b.type === "tool_result") {
120
+ out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
121
+ }
122
+ }
123
+ const parts = m.content.filter(
124
+ (b) => b.type === "text" || b.type === "image" || b.type === "video",
125
+ );
126
+ if (parts.length) {
127
+ const multimodal = parts.some((b) => b.type !== "text");
128
+ out.push({
129
+ role: "user",
130
+ content: multimodal
131
+ ? parts.map(toOpenAIPart)
132
+ : parts.map((b) => (b as { text: string }).text).join("\n"),
133
+ });
134
+ }
135
+ } else {
136
+ // assistant
137
+ const text = m.content
138
+ .filter((b) => b.type === "text")
139
+ .map((b) => (b as { text: string }).text)
140
+ .join("\n");
141
+ const toolUses = m.content.filter((b) => b.type === "tool_use");
142
+ const msg: any = { role: "assistant", content: text || null };
143
+ if (toolUses.length) {
144
+ msg.tool_calls = toolUses.map((b: any) => ({
145
+ id: b.id,
146
+ type: "function",
147
+ function: { name: b.name, arguments: JSON.stringify(b.input) },
148
+ }));
149
+ }
150
+ out.push(msg);
151
+ }
152
+ }
153
+ return out;
154
+ }
155
+
156
+ function toOpenAIPart(b: ContentBlock): any {
157
+ if (b.type === "text") return { type: "text", text: b.text };
158
+ const media = b as MediaBlock;
159
+ const url =
160
+ media.url ??
161
+ (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
162
+ if (media.type === "video") return { type: "video_url", video_url: { url } }; // experimental
163
+ return { type: "image_url", image_url: { url } };
164
+ }
package/src/types.ts CHANGED
@@ -22,7 +22,23 @@ export interface ToolResultBlock {
22
22
  content: string;
23
23
  isError?: boolean;
24
24
  }
25
- export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock;
25
+ /**
26
+ * Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
27
+ * Image is broadly supported; video is experimental and model-dependent (routed
28
+ * to a video-capable model like Kimi K2.6).
29
+ */
30
+ export interface MediaBlock {
31
+ type: "image" | "video";
32
+ url?: string; // remote URL or a data: URL
33
+ data?: string; // raw base64 (paired with mimeType)
34
+ mimeType?: string; // e.g. "image/png", "image/jpeg", "video/mp4"
35
+ }
36
+ export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
37
+
38
+ /** True if a message list carries any image/video content (drives vision routing). */
39
+ export function hasMedia(messages: Message[]): boolean {
40
+ return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
41
+ }
26
42
 
27
43
  export interface Message {
28
44
  role: Role;