torus-ai 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,107 @@
1
+ import { hasMedia } from "../types.ts";
2
+ import type { ModelProvider, ModelRequest, ModelResponse } from "../types.ts";
3
+ import { DEEPSEEK_V4_PRO, KIMI_K2_6, NvidiaProvider } from "./nvidia.ts";
4
+ import { GeminiProvider } from "./gemini.ts";
5
+
6
+ // Orchestration: try a prioritized list of (provider, model) steps, falling
7
+ // through to the next on failure (rate limit, error, or capability mismatch).
8
+ // Capability-aware: requests that carry image/video are only sent to steps that
9
+ // support vision — text-only models (e.g. DeepSeek) are skipped for those.
10
+
11
+ export interface CascadeStep {
12
+ provider: ModelProvider;
13
+ label: string; // e.g. "nvidia:kimi-k2.6"
14
+ vision: boolean; // does this step's model accept image/video input?
15
+ }
16
+
17
+ export interface CascadeOptions {
18
+ steps: CascadeStep[];
19
+ /** Called when a step is skipped or fails and the cascade falls through. */
20
+ onFallback?: (info: { from: string; reason: string; needsVision: boolean }) => void;
21
+ }
22
+
23
+ export class CascadeProvider implements ModelProvider {
24
+ readonly name = "cascade";
25
+ private steps: CascadeStep[];
26
+ private onFallback?: CascadeOptions["onFallback"];
27
+
28
+ constructor(opts: CascadeOptions) {
29
+ if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
30
+ this.steps = opts.steps;
31
+ this.onFallback = opts.onFallback;
32
+ }
33
+
34
+ async generate(req: ModelRequest): Promise<ModelResponse> {
35
+ const needsVision = hasMedia(req.messages);
36
+ const eligible = this.steps.filter((s) => !needsVision || s.vision);
37
+
38
+ if (!eligible.length) {
39
+ throw new Error("Cascade: request needs vision but no step supports image/video input.");
40
+ }
41
+
42
+ let lastErr: unknown;
43
+ for (const step of eligible) {
44
+ try {
45
+ return await step.provider.generate(req);
46
+ } catch (err) {
47
+ lastErr = err;
48
+ this.onFallback?.({
49
+ from: step.label,
50
+ reason: (err as Error).message?.slice(0, 200) ?? "unknown",
51
+ needsVision,
52
+ });
53
+ }
54
+ }
55
+ throw new Error(`Cascade exhausted all steps. Last error: ${(lastErr as Error)?.message}`);
56
+ }
57
+ }
58
+
59
+ export interface DefaultProviderOptions {
60
+ nvidiaApiKey?: string;
61
+ googleApiKey?: string;
62
+ /** Override the main NVIDIA model (default Kimi K2.6). */
63
+ mainModel?: string;
64
+ /** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
65
+ secondaryModel?: string;
66
+ /** Gemini model used as the final fallback option (default gemini-2.5-flash). */
67
+ geminiModel?: string;
68
+ onFallback?: CascadeOptions["onFallback"];
69
+ }
70
+
71
+ /**
72
+ * The SDK's recommended default: free NVIDIA endpoints first, Google as one
73
+ * fallback option.
74
+ *
75
+ * 1. NVIDIA Kimi K2.6 — main; agentic + multimodal (image/video)
76
+ * 2. NVIDIA DeepSeek V4 Pro — text-only; skipped for image/video requests
77
+ * 3. Gemini 2.5 Flash — final fallback; multimodal
78
+ */
79
+ export function createDefaultProvider(opts: DefaultProviderOptions = {}): CascadeProvider {
80
+ const main = opts.mainModel ?? KIMI_K2_6;
81
+ const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
82
+ const gemini = opts.geminiModel ?? "gemini-2.5-flash";
83
+
84
+ return new CascadeProvider({
85
+ onFallback:
86
+ opts.onFallback ??
87
+ ((info) =>
88
+ console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
89
+ steps: [
90
+ {
91
+ provider: new NvidiaProvider({ model: main, apiKey: opts.nvidiaApiKey }),
92
+ label: `nvidia:${main}`,
93
+ vision: true, // Kimi K2.6 accepts image + video
94
+ },
95
+ {
96
+ provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
97
+ label: `nvidia:${secondary}`,
98
+ vision: false, // DeepSeek V4 is text-only
99
+ },
100
+ {
101
+ provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
102
+ label: `gemini:${gemini}`,
103
+ vision: true,
104
+ },
105
+ ],
106
+ });
107
+ }
@@ -104,17 +104,27 @@ function toolUseNames(messages: Message[]): Map<string, string> {
104
104
  /** Translate one of our Messages into a Gemini `Content` (role + parts). */
105
105
  function toGeminiContent(m: Message, idToName: Map<string, string>): any {
106
106
  const role = m.role === "assistant" ? "model" : "user";
107
- const parts = m.content.map((b) => {
108
- if (b.type === "text") return { text: b.text };
109
- if (b.type === "tool_use") return { functionCall: { id: b.id, name: b.name, args: b.input } };
110
- // tool_result -> functionResponse
111
- return {
112
- functionResponse: {
113
- id: b.toolUseId,
114
- name: idToName.get(b.toolUseId) ?? b.toolUseId,
115
- response: b.isError ? { error: b.content } : { result: b.content },
116
- },
117
- };
107
+ const parts = m.content.map((b): any => {
108
+ switch (b.type) {
109
+ case "text":
110
+ return { text: b.text };
111
+ case "image":
112
+ case "video":
113
+ // base64 -> inlineData; remote URL -> fileData
114
+ return b.data
115
+ ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } }
116
+ : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
117
+ case "tool_use":
118
+ return { functionCall: { id: b.id, name: b.name, args: b.input } };
119
+ case "tool_result":
120
+ return {
121
+ functionResponse: {
122
+ id: b.toolUseId,
123
+ name: idToName.get(b.toolUseId) ?? b.toolUseId,
124
+ response: b.isError ? { error: b.content } : { result: b.content },
125
+ },
126
+ };
127
+ }
118
128
  });
119
129
  return { role, parts };
120
130
  }
@@ -0,0 +1,163 @@
1
+ import type {
2
+ ContentBlock,
3
+ MediaBlock,
4
+ ModelProvider,
5
+ ModelRequest,
6
+ ModelResponse,
7
+ } from "../types.ts";
8
+
9
+ // NVIDIA NIM exposes an OpenAI-compatible Chat Completions API, so this provider
10
+ // talks to it with plain `fetch` — no extra SDK dependency. Free hosted endpoints
11
+ // (e.g. Kimi K2.6, DeepSeek V4) are the SDK's default models via the cascade.
12
+
13
+ export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
14
+
15
+ // Exact IDs confirmed against GET /v1/models.
16
+ export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools, vision (image+video)
17
+ export const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro"; // 1M ctx, tools, text-only
18
+ export const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash"; // faster/cheaper, text-only
19
+
20
+ export interface NvidiaOptions {
21
+ model?: string;
22
+ apiKey?: string;
23
+ baseURL?: string;
24
+ maxTokens?: number;
25
+ temperature?: number;
26
+ }
27
+
28
+ export class NvidiaProvider implements ModelProvider {
29
+ readonly name = "nvidia";
30
+ private model: string;
31
+ private apiKey?: string;
32
+ private baseURL: string;
33
+ private maxTokens: number;
34
+ private temperature: number;
35
+
36
+ constructor(opts: NvidiaOptions = {}) {
37
+ this.model = opts.model ?? KIMI_K2_6;
38
+ this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
39
+ this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
40
+ this.maxTokens = opts.maxTokens ?? 2048;
41
+ this.temperature = opts.temperature ?? 0.6;
42
+ }
43
+
44
+ async generate(req: ModelRequest): Promise<ModelResponse> {
45
+ if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
46
+
47
+ const body: Record<string, unknown> = {
48
+ model: this.model,
49
+ messages: toOpenAIMessages(req),
50
+ max_tokens: this.maxTokens,
51
+ temperature: this.temperature,
52
+ };
53
+ if (req.tools.length) {
54
+ body.tools = req.tools.map((t) => ({
55
+ type: "function",
56
+ function: { name: t.name, description: t.description, parameters: t.inputSchema },
57
+ }));
58
+ body.tool_choice = "auto";
59
+ }
60
+
61
+ const res = await fetch(`${this.baseURL}/chat/completions`, {
62
+ method: "POST",
63
+ headers: {
64
+ Authorization: `Bearer ${this.apiKey}`,
65
+ "Content-Type": "application/json",
66
+ Accept: "application/json",
67
+ },
68
+ body: JSON.stringify(body),
69
+ });
70
+
71
+ if (!res.ok) {
72
+ throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
73
+ }
74
+
75
+ const json: any = await res.json();
76
+ const choice = json.choices?.[0];
77
+ const msg = choice?.message ?? {};
78
+
79
+ const content: ContentBlock[] = [];
80
+ if (typeof msg.content === "string" && msg.content.trim()) {
81
+ content.push({ type: "text", text: msg.content });
82
+ }
83
+ const toolCalls: any[] = msg.tool_calls ?? [];
84
+ for (const tc of toolCalls) {
85
+ content.push({
86
+ type: "tool_use",
87
+ id: tc.id ?? "",
88
+ name: tc.function?.name ?? "",
89
+ input: safeParse(tc.function?.arguments),
90
+ });
91
+ }
92
+ if (content.length === 0) content.push({ type: "text", text: "" });
93
+
94
+ const stopReason =
95
+ choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
96
+ return { content, stopReason };
97
+ }
98
+ }
99
+
100
+ function safeParse(args: unknown): Record<string, unknown> {
101
+ if (typeof args !== "string") return (args as Record<string, unknown>) ?? {};
102
+ try {
103
+ return JSON.parse(args);
104
+ } catch {
105
+ return {};
106
+ }
107
+ }
108
+
109
+ /** Map our Messages into OpenAI-style chat messages (tool calls + tool results + media). */
110
+ function toOpenAIMessages(req: ModelRequest): any[] {
111
+ const out: any[] = [];
112
+ if (req.system) out.push({ role: "system", content: req.system });
113
+
114
+ for (const m of req.messages) {
115
+ if (m.role === "user") {
116
+ // tool_result blocks become individual {role:"tool"} messages
117
+ for (const b of m.content) {
118
+ if (b.type === "tool_result") {
119
+ out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
120
+ }
121
+ }
122
+ const parts = m.content.filter(
123
+ (b) => b.type === "text" || b.type === "image" || b.type === "video",
124
+ );
125
+ if (parts.length) {
126
+ const multimodal = parts.some((b) => b.type !== "text");
127
+ out.push({
128
+ role: "user",
129
+ content: multimodal
130
+ ? parts.map(toOpenAIPart)
131
+ : parts.map((b) => (b as { text: string }).text).join("\n"),
132
+ });
133
+ }
134
+ } else {
135
+ // assistant
136
+ const text = m.content
137
+ .filter((b) => b.type === "text")
138
+ .map((b) => (b as { text: string }).text)
139
+ .join("\n");
140
+ const toolUses = m.content.filter((b) => b.type === "tool_use");
141
+ const msg: any = { role: "assistant", content: text || null };
142
+ if (toolUses.length) {
143
+ msg.tool_calls = toolUses.map((b: any) => ({
144
+ id: b.id,
145
+ type: "function",
146
+ function: { name: b.name, arguments: JSON.stringify(b.input) },
147
+ }));
148
+ }
149
+ out.push(msg);
150
+ }
151
+ }
152
+ return out;
153
+ }
154
+
155
+ function toOpenAIPart(b: ContentBlock): any {
156
+ if (b.type === "text") return { type: "text", text: b.text };
157
+ const media = b as MediaBlock;
158
+ const url =
159
+ media.url ??
160
+ (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
161
+ if (media.type === "video") return { type: "video_url", video_url: { url } }; // experimental
162
+ return { type: "image_url", image_url: { url } };
163
+ }
package/src/types.ts CHANGED
@@ -22,7 +22,23 @@ export interface ToolResultBlock {
22
22
  content: string;
23
23
  isError?: boolean;
24
24
  }
25
- export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock;
25
+ /**
26
+ * Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
27
+ * Image is broadly supported; video is experimental and model-dependent (routed
28
+ * to a video-capable model like Kimi K2.6).
29
+ */
30
+ export interface MediaBlock {
31
+ type: "image" | "video";
32
+ url?: string; // remote URL or a data: URL
33
+ data?: string; // raw base64 (paired with mimeType)
34
+ mimeType?: string; // e.g. "image/png", "image/jpeg", "video/mp4"
35
+ }
36
+ export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
37
+
38
+ /** True if a message list carries any image/video content (drives vision routing). */
39
+ export function hasMedia(messages: Message[]): boolean {
40
+ return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
41
+ }
26
42
 
27
43
  export interface Message {
28
44
  role: Role;