torus-ai 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -17
- package/dist/index.d.ts +87 -6
- package/dist/index.js +224 -16
- package/dist/index.js.map +1 -1
- package/models/POLICY.md +58 -0
- package/models/registry.json +52 -0
- package/package.json +5 -4
- package/src/index.ts +30 -8
- package/src/providers/anthropic.ts +13 -4
- package/src/providers/cascade.ts +107 -0
- package/src/providers/gemini.ts +21 -11
- package/src/providers/nvidia.ts +163 -0
- package/src/types.ts +17 -1
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { hasMedia } from "../types.ts";
|
|
2
|
+
import type { ModelProvider, ModelRequest, ModelResponse } from "../types.ts";
|
|
3
|
+
import { DEEPSEEK_V4_PRO, KIMI_K2_6, NvidiaProvider } from "./nvidia.ts";
|
|
4
|
+
import { GeminiProvider } from "./gemini.ts";
|
|
5
|
+
|
|
6
|
+
// Orchestration: try a prioritized list of (provider, model) steps, falling
|
|
7
|
+
// through to the next on failure (rate limit, error, or capability mismatch).
|
|
8
|
+
// Capability-aware: requests that carry image/video are only sent to steps that
|
|
9
|
+
// support vision — text-only models (e.g. DeepSeek) are skipped for those.
|
|
10
|
+
|
|
11
|
+
export interface CascadeStep {
|
|
12
|
+
provider: ModelProvider;
|
|
13
|
+
label: string; // e.g. "nvidia:kimi-k2.6"
|
|
14
|
+
vision: boolean; // does this step's model accept image/video input?
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface CascadeOptions {
|
|
18
|
+
steps: CascadeStep[];
|
|
19
|
+
/** Called when a step is skipped or fails and the cascade falls through. */
|
|
20
|
+
onFallback?: (info: { from: string; reason: string; needsVision: boolean }) => void;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class CascadeProvider implements ModelProvider {
|
|
24
|
+
readonly name = "cascade";
|
|
25
|
+
private steps: CascadeStep[];
|
|
26
|
+
private onFallback?: CascadeOptions["onFallback"];
|
|
27
|
+
|
|
28
|
+
constructor(opts: CascadeOptions) {
|
|
29
|
+
if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
|
|
30
|
+
this.steps = opts.steps;
|
|
31
|
+
this.onFallback = opts.onFallback;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
35
|
+
const needsVision = hasMedia(req.messages);
|
|
36
|
+
const eligible = this.steps.filter((s) => !needsVision || s.vision);
|
|
37
|
+
|
|
38
|
+
if (!eligible.length) {
|
|
39
|
+
throw new Error("Cascade: request needs vision but no step supports image/video input.");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
let lastErr: unknown;
|
|
43
|
+
for (const step of eligible) {
|
|
44
|
+
try {
|
|
45
|
+
return await step.provider.generate(req);
|
|
46
|
+
} catch (err) {
|
|
47
|
+
lastErr = err;
|
|
48
|
+
this.onFallback?.({
|
|
49
|
+
from: step.label,
|
|
50
|
+
reason: (err as Error).message?.slice(0, 200) ?? "unknown",
|
|
51
|
+
needsVision,
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
throw new Error(`Cascade exhausted all steps. Last error: ${(lastErr as Error)?.message}`);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface DefaultProviderOptions {
|
|
60
|
+
nvidiaApiKey?: string;
|
|
61
|
+
googleApiKey?: string;
|
|
62
|
+
/** Override the main NVIDIA model (default Kimi K2.6). */
|
|
63
|
+
mainModel?: string;
|
|
64
|
+
/** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
|
|
65
|
+
secondaryModel?: string;
|
|
66
|
+
/** Gemini model used as the final fallback option (default gemini-2.5-flash). */
|
|
67
|
+
geminiModel?: string;
|
|
68
|
+
onFallback?: CascadeOptions["onFallback"];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* The SDK's recommended default: free NVIDIA endpoints first, Google as one
|
|
73
|
+
* fallback option.
|
|
74
|
+
*
|
|
75
|
+
* 1. NVIDIA Kimi K2.6 — main; agentic + multimodal (image/video)
|
|
76
|
+
* 2. NVIDIA DeepSeek V4 Pro — text-only; skipped for image/video requests
|
|
77
|
+
* 3. Gemini 2.5 Flash — final fallback; multimodal
|
|
78
|
+
*/
|
|
79
|
+
export function createDefaultProvider(opts: DefaultProviderOptions = {}): CascadeProvider {
|
|
80
|
+
const main = opts.mainModel ?? KIMI_K2_6;
|
|
81
|
+
const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
|
|
82
|
+
const gemini = opts.geminiModel ?? "gemini-2.5-flash";
|
|
83
|
+
|
|
84
|
+
return new CascadeProvider({
|
|
85
|
+
onFallback:
|
|
86
|
+
opts.onFallback ??
|
|
87
|
+
((info) =>
|
|
88
|
+
console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
|
|
89
|
+
steps: [
|
|
90
|
+
{
|
|
91
|
+
provider: new NvidiaProvider({ model: main, apiKey: opts.nvidiaApiKey }),
|
|
92
|
+
label: `nvidia:${main}`,
|
|
93
|
+
vision: true, // Kimi K2.6 accepts image + video
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
|
|
97
|
+
label: `nvidia:${secondary}`,
|
|
98
|
+
vision: false, // DeepSeek V4 is text-only
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
|
|
102
|
+
label: `gemini:${gemini}`,
|
|
103
|
+
vision: true,
|
|
104
|
+
},
|
|
105
|
+
],
|
|
106
|
+
});
|
|
107
|
+
}
|
package/src/providers/gemini.ts
CHANGED
|
@@ -104,17 +104,27 @@ function toolUseNames(messages: Message[]): Map<string, string> {
|
|
|
104
104
|
/** Translate one of our Messages into a Gemini `Content` (role + parts). */
|
|
105
105
|
function toGeminiContent(m: Message, idToName: Map<string, string>): any {
|
|
106
106
|
const role = m.role === "assistant" ? "model" : "user";
|
|
107
|
-
const parts = m.content.map((b) => {
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
107
|
+
const parts = m.content.map((b): any => {
|
|
108
|
+
switch (b.type) {
|
|
109
|
+
case "text":
|
|
110
|
+
return { text: b.text };
|
|
111
|
+
case "image":
|
|
112
|
+
case "video":
|
|
113
|
+
// base64 -> inlineData; remote URL -> fileData
|
|
114
|
+
return b.data
|
|
115
|
+
? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } }
|
|
116
|
+
: { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
|
|
117
|
+
case "tool_use":
|
|
118
|
+
return { functionCall: { id: b.id, name: b.name, args: b.input } };
|
|
119
|
+
case "tool_result":
|
|
120
|
+
return {
|
|
121
|
+
functionResponse: {
|
|
122
|
+
id: b.toolUseId,
|
|
123
|
+
name: idToName.get(b.toolUseId) ?? b.toolUseId,
|
|
124
|
+
response: b.isError ? { error: b.content } : { result: b.content },
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
118
128
|
});
|
|
119
129
|
return { role, parts };
|
|
120
130
|
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ContentBlock,
|
|
3
|
+
MediaBlock,
|
|
4
|
+
ModelProvider,
|
|
5
|
+
ModelRequest,
|
|
6
|
+
ModelResponse,
|
|
7
|
+
} from "../types.ts";
|
|
8
|
+
|
|
9
|
+
// NVIDIA NIM exposes an OpenAI-compatible Chat Completions API, so this provider
|
|
10
|
+
// talks to it with plain `fetch` — no extra SDK dependency. Free hosted endpoints
|
|
11
|
+
// (e.g. Kimi K2.6, DeepSeek V4) are the SDK's default models via the cascade.
|
|
12
|
+
|
|
13
|
+
export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
14
|
+
|
|
15
|
+
// Exact IDs confirmed against GET /v1/models.
|
|
16
|
+
export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools, vision (image+video)
|
|
17
|
+
export const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro"; // 1M ctx, tools, text-only
|
|
18
|
+
export const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash"; // faster/cheaper, text-only
|
|
19
|
+
|
|
20
|
+
export interface NvidiaOptions {
|
|
21
|
+
model?: string;
|
|
22
|
+
apiKey?: string;
|
|
23
|
+
baseURL?: string;
|
|
24
|
+
maxTokens?: number;
|
|
25
|
+
temperature?: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class NvidiaProvider implements ModelProvider {
|
|
29
|
+
readonly name = "nvidia";
|
|
30
|
+
private model: string;
|
|
31
|
+
private apiKey?: string;
|
|
32
|
+
private baseURL: string;
|
|
33
|
+
private maxTokens: number;
|
|
34
|
+
private temperature: number;
|
|
35
|
+
|
|
36
|
+
constructor(opts: NvidiaOptions = {}) {
|
|
37
|
+
this.model = opts.model ?? KIMI_K2_6;
|
|
38
|
+
this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
|
|
39
|
+
this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
|
|
40
|
+
this.maxTokens = opts.maxTokens ?? 2048;
|
|
41
|
+
this.temperature = opts.temperature ?? 0.6;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
45
|
+
if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
|
|
46
|
+
|
|
47
|
+
const body: Record<string, unknown> = {
|
|
48
|
+
model: this.model,
|
|
49
|
+
messages: toOpenAIMessages(req),
|
|
50
|
+
max_tokens: this.maxTokens,
|
|
51
|
+
temperature: this.temperature,
|
|
52
|
+
};
|
|
53
|
+
if (req.tools.length) {
|
|
54
|
+
body.tools = req.tools.map((t) => ({
|
|
55
|
+
type: "function",
|
|
56
|
+
function: { name: t.name, description: t.description, parameters: t.inputSchema },
|
|
57
|
+
}));
|
|
58
|
+
body.tool_choice = "auto";
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const res = await fetch(`${this.baseURL}/chat/completions`, {
|
|
62
|
+
method: "POST",
|
|
63
|
+
headers: {
|
|
64
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
65
|
+
"Content-Type": "application/json",
|
|
66
|
+
Accept: "application/json",
|
|
67
|
+
},
|
|
68
|
+
body: JSON.stringify(body),
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
if (!res.ok) {
|
|
72
|
+
throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const json: any = await res.json();
|
|
76
|
+
const choice = json.choices?.[0];
|
|
77
|
+
const msg = choice?.message ?? {};
|
|
78
|
+
|
|
79
|
+
const content: ContentBlock[] = [];
|
|
80
|
+
if (typeof msg.content === "string" && msg.content.trim()) {
|
|
81
|
+
content.push({ type: "text", text: msg.content });
|
|
82
|
+
}
|
|
83
|
+
const toolCalls: any[] = msg.tool_calls ?? [];
|
|
84
|
+
for (const tc of toolCalls) {
|
|
85
|
+
content.push({
|
|
86
|
+
type: "tool_use",
|
|
87
|
+
id: tc.id ?? "",
|
|
88
|
+
name: tc.function?.name ?? "",
|
|
89
|
+
input: safeParse(tc.function?.arguments),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
if (content.length === 0) content.push({ type: "text", text: "" });
|
|
93
|
+
|
|
94
|
+
const stopReason =
|
|
95
|
+
choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
|
|
96
|
+
return { content, stopReason };
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
function safeParse(args: unknown): Record<string, unknown> {
|
|
101
|
+
if (typeof args !== "string") return (args as Record<string, unknown>) ?? {};
|
|
102
|
+
try {
|
|
103
|
+
return JSON.parse(args);
|
|
104
|
+
} catch {
|
|
105
|
+
return {};
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/** Map our Messages into OpenAI-style chat messages (tool calls + tool results + media). */
|
|
110
|
+
function toOpenAIMessages(req: ModelRequest): any[] {
|
|
111
|
+
const out: any[] = [];
|
|
112
|
+
if (req.system) out.push({ role: "system", content: req.system });
|
|
113
|
+
|
|
114
|
+
for (const m of req.messages) {
|
|
115
|
+
if (m.role === "user") {
|
|
116
|
+
// tool_result blocks become individual {role:"tool"} messages
|
|
117
|
+
for (const b of m.content) {
|
|
118
|
+
if (b.type === "tool_result") {
|
|
119
|
+
out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const parts = m.content.filter(
|
|
123
|
+
(b) => b.type === "text" || b.type === "image" || b.type === "video",
|
|
124
|
+
);
|
|
125
|
+
if (parts.length) {
|
|
126
|
+
const multimodal = parts.some((b) => b.type !== "text");
|
|
127
|
+
out.push({
|
|
128
|
+
role: "user",
|
|
129
|
+
content: multimodal
|
|
130
|
+
? parts.map(toOpenAIPart)
|
|
131
|
+
: parts.map((b) => (b as { text: string }).text).join("\n"),
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
} else {
|
|
135
|
+
// assistant
|
|
136
|
+
const text = m.content
|
|
137
|
+
.filter((b) => b.type === "text")
|
|
138
|
+
.map((b) => (b as { text: string }).text)
|
|
139
|
+
.join("\n");
|
|
140
|
+
const toolUses = m.content.filter((b) => b.type === "tool_use");
|
|
141
|
+
const msg: any = { role: "assistant", content: text || null };
|
|
142
|
+
if (toolUses.length) {
|
|
143
|
+
msg.tool_calls = toolUses.map((b: any) => ({
|
|
144
|
+
id: b.id,
|
|
145
|
+
type: "function",
|
|
146
|
+
function: { name: b.name, arguments: JSON.stringify(b.input) },
|
|
147
|
+
}));
|
|
148
|
+
}
|
|
149
|
+
out.push(msg);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return out;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function toOpenAIPart(b: ContentBlock): any {
|
|
156
|
+
if (b.type === "text") return { type: "text", text: b.text };
|
|
157
|
+
const media = b as MediaBlock;
|
|
158
|
+
const url =
|
|
159
|
+
media.url ??
|
|
160
|
+
(media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
|
|
161
|
+
if (media.type === "video") return { type: "video_url", video_url: { url } }; // experimental
|
|
162
|
+
return { type: "image_url", image_url: { url } };
|
|
163
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -22,7 +22,23 @@ export interface ToolResultBlock {
|
|
|
22
22
|
content: string;
|
|
23
23
|
isError?: boolean;
|
|
24
24
|
}
|
|
25
|
-
|
|
25
|
+
/**
|
|
26
|
+
* Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
|
|
27
|
+
* Image is broadly supported; video is experimental and model-dependent (routed
|
|
28
|
+
* to a video-capable model like Kimi K2.6).
|
|
29
|
+
*/
|
|
30
|
+
export interface MediaBlock {
|
|
31
|
+
type: "image" | "video";
|
|
32
|
+
url?: string; // remote URL or a data: URL
|
|
33
|
+
data?: string; // raw base64 (paired with mimeType)
|
|
34
|
+
mimeType?: string; // e.g. "image/png", "image/jpeg", "video/mp4"
|
|
35
|
+
}
|
|
36
|
+
export type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
|
|
37
|
+
|
|
38
|
+
/** True if a message list carries any image/video content (drives vision routing). */
|
|
39
|
+
export function hasMedia(messages: Message[]): boolean {
|
|
40
|
+
return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
|
|
41
|
+
}
|
|
26
42
|
|
|
27
43
|
export interface Message {
|
|
28
44
|
role: Role;
|