torus-ai 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -17
- package/dist/index.d.ts +93 -6
- package/dist/index.js +225 -16
- package/dist/index.js.map +1 -1
- package/models/POLICY.md +58 -0
- package/models/registry.json +63 -0
- package/package.json +5 -4
- package/src/index.ts +31 -8
- package/src/providers/anthropic.ts +13 -4
- package/src/providers/cascade.ts +118 -0
- package/src/providers/gemini.ts +21 -11
- package/src/providers/nvidia.ts +164 -0
- package/src/types.ts +17 -1
package/README.md
CHANGED
|
@@ -36,7 +36,7 @@ folder; open them to inspect the handoff.
|
|
|
36
36
|
| Context management | [`src/context.ts`](./src/context.ts) — layered, scoped loading (Layers 0–4) |
|
|
37
37
|
| `query()` streaming | [`src/index.ts`](./src/index.ts) — single-shot run yielding events |
|
|
38
38
|
| Pipeline orchestration | [`src/pipeline.ts`](./src/pipeline.ts) — sequential stages + review gates |
|
|
39
|
-
| Model backends | [`src/providers/`](./src/providers/) — `
|
|
39
|
+
| Model backends | [`src/providers/`](./src/providers/) — `NvidiaProvider`, `GeminiProvider`, `AnthropicProvider`, `MockProvider` + `CascadeProvider` |
|
|
40
40
|
|
|
41
41
|
## Three ways to use it
|
|
42
42
|
|
|
@@ -77,33 +77,72 @@ const claude = new AnthropicProvider({ model: "claude-sonnet-4-6" });
|
|
|
77
77
|
const gemini = new GeminiProvider({ model: "gemini-2.5-flash" });
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
-
## Providers &
|
|
80
|
+
## Providers & the default cascade
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
Four pluggable providers implement the same `ModelProvider` interface and drop
|
|
83
|
+
into `query()`, `runPipeline()`, or `runLoop()` interchangeably:
|
|
84
84
|
|
|
85
|
-
| Provider | Package | Env | Default |
|
|
85
|
+
| Provider | Package | Env | Default model |
|
|
86
86
|
|---|---|---|---|
|
|
87
|
-
| `
|
|
87
|
+
| `NvidiaProvider` | none (`fetch`) | `NVIDIA_API_KEY` | `moonshotai/kimi-k2.6` |
|
|
88
88
|
| `GeminiProvider` | `@google/genai` | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
|
|
89
|
+
| `AnthropicProvider` | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
|
|
90
|
+
| `MockProvider` | none | — | offline |
|
|
91
|
+
|
|
92
|
+
**The default is a free-first cascade.** If you don't pass a provider, `query()`
|
|
93
|
+
uses `createDefaultProvider()` — it tries each step and falls through on failure:
|
|
94
|
+
|
|
95
|
+
1. **NVIDIA Kimi K2.6** — main; agentic + tools (text), free NIM endpoint
|
|
96
|
+
2. **NVIDIA DeepSeek V4 Pro** — 1M-context text model, free; *skipped for media*
|
|
97
|
+
3. **NVIDIA Llama-3.2-90B-Vision** — image requests, free
|
|
98
|
+
4. **Gemini 2.5 Flash** — final fallback (image + video), different provider for resilience
|
|
99
|
+
|
|
100
|
+
```ts
|
|
101
|
+
import { query } from "torus-ai"; // NVIDIA_API_KEY in env → cascade default
|
|
102
|
+
for await (const ev of query("Explain MoE in one line")) { /* ... */ }
|
|
103
|
+
|
|
104
|
+
import { createDefaultProvider } from "torus-ai";
|
|
105
|
+
const provider = createDefaultProvider({ mainModel: "moonshotai/kimi-k2.6" });
|
|
106
|
+
```
|
|
89
107
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
108
|
+
It's **capability-aware**: image requests skip text-only steps and route to a
|
|
109
|
+
vision model; video requests route only to a video-capable step.
|
|
110
|
+
|
|
111
|
+
### Multimodal (image verified, video experimental)
|
|
112
|
+
|
|
113
|
+
Pass content blocks instead of a string. Images route to a vision step
|
|
114
|
+
(NVIDIA Llama-Vision → Gemini); video routes to Gemini.
|
|
94
115
|
|
|
95
116
|
```ts
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
117
|
+
await query([
|
|
118
|
+
{ type: "text", text: "What animal is this?" },
|
|
119
|
+
{ type: "image", url: "https://example.com/cat.jpg" }, // or { data: base64, mimeType }
|
|
120
|
+
]);
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
> Note: Kimi K2.6's docs claim vision, but its NIM endpoint is **text-only in
|
|
124
|
+
> practice** (verified) — so the cascade sends images to a real vision model
|
|
125
|
+
> instead. Video is experimental and currently served only by Gemini.
|
|
99
126
|
|
|
127
|
+
### Cost routing (per provider)
|
|
128
|
+
|
|
129
|
+
Each model provider also supports `route: true` — fast heuristics, then a
|
|
130
|
+
structured "judge" call on the *cheap* model, picking cheap vs expensive (never
|
|
131
|
+
throws; falls back to expensive). Exposed for Claude and Gemini today:
|
|
132
|
+
|
|
133
|
+
```ts
|
|
134
|
+
new GeminiProvider({ route: true }); // gemini-2.5-flash-lite ↔ gemini-2.5-pro
|
|
135
|
+
new AnthropicProvider({ route: true }); // claude-haiku-4-5 ↔ claude-sonnet-4-6
|
|
100
136
|
import { getRoutingStats } from "torus-ai";
|
|
101
|
-
console.log(getRoutingStats()); // { cheap, expensive, cheapPct, expensivePct, total }
|
|
102
137
|
```
|
|
103
138
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
139
|
+
## Keeping models fresh
|
|
140
|
+
|
|
141
|
+
[`models/registry.json`](./models/registry.json) is the source of truth for the
|
|
142
|
+
cascade; [`models/POLICY.md`](./models/POLICY.md) is the rule for what earns a slot.
|
|
143
|
+
A weekly GitHub Action ([model-watch.yml](./.github/workflows/model-watch.yml))
|
|
144
|
+
pulls NVIDIA's live `/v1/models`, flags new free endpoints as candidates, and opens
|
|
145
|
+
a PR for human review against the policy. Run it locally with `npm run model-watch`.
|
|
107
146
|
|
|
108
147
|
## The stage contract (Layer 2)
|
|
109
148
|
|
package/dist/index.d.ts
CHANGED
|
@@ -15,7 +15,20 @@ interface ToolResultBlock {
|
|
|
15
15
|
content: string;
|
|
16
16
|
isError?: boolean;
|
|
17
17
|
}
|
|
18
|
-
|
|
18
|
+
/**
|
|
19
|
+
* Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
|
|
20
|
+
* Image is broadly supported; video is experimental and model-dependent (routed
|
|
21
|
+
* to a video-capable model like Kimi K2.6).
|
|
22
|
+
*/
|
|
23
|
+
interface MediaBlock {
|
|
24
|
+
type: "image" | "video";
|
|
25
|
+
url?: string;
|
|
26
|
+
data?: string;
|
|
27
|
+
mimeType?: string;
|
|
28
|
+
}
|
|
29
|
+
type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
|
|
30
|
+
/** True if a message list carries any image/video content (drives vision routing). */
|
|
31
|
+
declare function hasMedia(messages: Message[]): boolean;
|
|
19
32
|
interface Message {
|
|
20
33
|
role: Role;
|
|
21
34
|
content: ContentBlock[];
|
|
@@ -312,6 +325,76 @@ declare class GeminiProvider implements ModelProvider {
|
|
|
312
325
|
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
313
326
|
}
|
|
314
327
|
|
|
328
|
+
declare const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
329
|
+
declare const KIMI_K2_6 = "moonshotai/kimi-k2.6";
|
|
330
|
+
declare const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
|
|
331
|
+
declare const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
|
|
332
|
+
declare const LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct";
|
|
333
|
+
interface NvidiaOptions {
|
|
334
|
+
model?: string;
|
|
335
|
+
apiKey?: string;
|
|
336
|
+
baseURL?: string;
|
|
337
|
+
maxTokens?: number;
|
|
338
|
+
temperature?: number;
|
|
339
|
+
}
|
|
340
|
+
declare class NvidiaProvider implements ModelProvider {
|
|
341
|
+
readonly name = "nvidia";
|
|
342
|
+
private model;
|
|
343
|
+
private apiKey?;
|
|
344
|
+
private baseURL;
|
|
345
|
+
private maxTokens;
|
|
346
|
+
private temperature;
|
|
347
|
+
constructor(opts?: NvidiaOptions);
|
|
348
|
+
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
interface CascadeStep {
|
|
352
|
+
provider: ModelProvider;
|
|
353
|
+
label: string;
|
|
354
|
+
vision: boolean;
|
|
355
|
+
video?: boolean;
|
|
356
|
+
}
|
|
357
|
+
interface CascadeOptions {
|
|
358
|
+
steps: CascadeStep[];
|
|
359
|
+
/** Called when a step is skipped or fails and the cascade falls through. */
|
|
360
|
+
onFallback?: (info: {
|
|
361
|
+
from: string;
|
|
362
|
+
reason: string;
|
|
363
|
+
needsVision: boolean;
|
|
364
|
+
}) => void;
|
|
365
|
+
}
|
|
366
|
+
declare class CascadeProvider implements ModelProvider {
|
|
367
|
+
readonly name = "cascade";
|
|
368
|
+
private steps;
|
|
369
|
+
private onFallback?;
|
|
370
|
+
constructor(opts: CascadeOptions);
|
|
371
|
+
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
372
|
+
}
|
|
373
|
+
interface DefaultProviderOptions {
|
|
374
|
+
nvidiaApiKey?: string;
|
|
375
|
+
googleApiKey?: string;
|
|
376
|
+
/** Override the main NVIDIA model (default Kimi K2.6). */
|
|
377
|
+
mainModel?: string;
|
|
378
|
+
/** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
|
|
379
|
+
secondaryModel?: string;
|
|
380
|
+
/** NVIDIA vision model for image requests (default llama-3.2-90b-vision). */
|
|
381
|
+
visionModel?: string;
|
|
382
|
+
/** Gemini model used as the final fallback option (default gemini-2.5-flash). */
|
|
383
|
+
geminiModel?: string;
|
|
384
|
+
onFallback?: CascadeOptions["onFallback"];
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* The SDK's recommended default: free NVIDIA endpoints first, Google as one
|
|
388
|
+
* fallback option. Capability-aware — image/video requests skip the text-only
|
|
389
|
+
* steps automatically.
|
|
390
|
+
*
|
|
391
|
+
* 1. NVIDIA Kimi K2.6 — main; agentic + tools (text)
|
|
392
|
+
* 2. NVIDIA DeepSeek V4 Pro — 1M-ctx text; skipped for media
|
|
393
|
+
* 3. NVIDIA Llama-3.2-90B-Vision — image requests
|
|
394
|
+
* 4. Gemini 2.5 Flash — final fallback; image + video
|
|
395
|
+
*/
|
|
396
|
+
declare function createDefaultProvider(opts?: DefaultProviderOptions): CascadeProvider;
|
|
397
|
+
|
|
315
398
|
declare const CHEAP_MODEL = "claude-haiku-4-5";
|
|
316
399
|
declare const EXPENSIVE_MODEL = "claude-sonnet-4-6";
|
|
317
400
|
declare const GEMINI_CHEAP_MODEL = "gemini-2.5-flash-lite";
|
|
@@ -354,7 +437,8 @@ declare function getRoutingStats(): RoutingStats;
|
|
|
354
437
|
declare function latestUserText(messages: Message[]): string;
|
|
355
438
|
|
|
356
439
|
interface QueryOptions {
|
|
357
|
-
|
|
440
|
+
/** Defaults to the NVIDIA-first cascade (Kimi K2.6 → DeepSeek V4 → Gemini). */
|
|
441
|
+
provider?: ModelProvider;
|
|
358
442
|
system?: string;
|
|
359
443
|
mcpServers?: SdkMcpServer[];
|
|
360
444
|
includeBuiltins?: boolean;
|
|
@@ -364,10 +448,13 @@ interface QueryOptions {
|
|
|
364
448
|
}
|
|
365
449
|
/**
|
|
366
450
|
* Single-shot agent run (no pipeline). Mirrors the Claude Agent SDK's streaming
|
|
367
|
-
* `query()`: yields events as they happen and a final `result` event.
|
|
451
|
+
* `query()`: yields events as they happen and a final `result` event. The prompt
|
|
452
|
+
* may be a string or an array of content blocks (e.g. text + image for vision).
|
|
368
453
|
*
|
|
369
|
-
* for await (const ev of query("Summarize X", {
|
|
454
|
+
* for await (const ev of query("Summarize X", { mcpServers: [srv] })) { ... }
|
|
455
|
+
* for await (const ev of query([{ type: "text", text: "What's this?" },
|
|
456
|
+
* { type: "image", url: "https://..." }])) { ... }
|
|
370
457
|
*/
|
|
371
|
-
declare function query(prompt: string, options
|
|
458
|
+
declare function query(prompt: string | ContentBlock[], options?: QueryOptions): AsyncGenerator<AgentEvent>;
|
|
372
459
|
|
|
373
|
-
export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type Complexity, type ContentBlock, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, type LoadedContext, type LoopOptions, type LoopResult, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createSdkMcpServer, fastHeuristic, getRoutingStats, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
|
|
460
|
+
export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type CascadeOptions, CascadeProvider, type CascadeStep, type Complexity, type ContentBlock, DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, type DefaultProviderOptions, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, KIMI_K2_6, LLAMA_VISION, type LoadedContext, type LoopOptions, type LoopResult, type MediaBlock, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, NVIDIA_BASE_URL, type NvidiaOptions, NvidiaProvider, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createDefaultProvider, createSdkMcpServer, fastHeuristic, getRoutingStats, hasMedia, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
// src/types.ts
|
|
2
|
+
function hasMedia(messages) {
|
|
3
|
+
return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
|
|
4
|
+
}
|
|
5
|
+
|
|
1
6
|
// src/tools.ts
|
|
2
7
|
function tool(name, description, inputSchema, handler) {
|
|
3
8
|
return { name, description, inputSchema, handler };
|
|
@@ -591,10 +596,17 @@ var AnthropicProvider = class {
|
|
|
591
596
|
function toApiMessage(m) {
|
|
592
597
|
return {
|
|
593
598
|
role: m.role,
|
|
594
|
-
content: m.content.
|
|
595
|
-
if (b.type === "text") return { type: "text", text: b.text };
|
|
596
|
-
if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
|
|
597
|
-
|
|
599
|
+
content: m.content.flatMap((b) => {
|
|
600
|
+
if (b.type === "text") return [{ type: "text", text: b.text }];
|
|
601
|
+
if (b.type === "tool_use") return [{ type: "tool_use", id: b.id, name: b.name, input: b.input }];
|
|
602
|
+
if (b.type === "tool_result") {
|
|
603
|
+
return [{ type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError }];
|
|
604
|
+
}
|
|
605
|
+
if (b.type === "image") {
|
|
606
|
+
const source = b.data ? { type: "base64", media_type: b.mimeType ?? "image/png", data: b.data } : { type: "url", url: b.url };
|
|
607
|
+
return [{ type: "image", source }];
|
|
608
|
+
}
|
|
609
|
+
return [];
|
|
598
610
|
})
|
|
599
611
|
};
|
|
600
612
|
}
|
|
@@ -667,27 +679,215 @@ function toolUseNames(messages) {
|
|
|
667
679
|
function toGeminiContent(m, idToName) {
|
|
668
680
|
const role = m.role === "assistant" ? "model" : "user";
|
|
669
681
|
const parts = m.content.map((b) => {
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
682
|
+
switch (b.type) {
|
|
683
|
+
case "text":
|
|
684
|
+
return { text: b.text };
|
|
685
|
+
case "image":
|
|
686
|
+
case "video":
|
|
687
|
+
return b.data ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } } : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
|
|
688
|
+
case "tool_use":
|
|
689
|
+
return { functionCall: { id: b.id, name: b.name, args: b.input } };
|
|
690
|
+
case "tool_result":
|
|
691
|
+
return {
|
|
692
|
+
functionResponse: {
|
|
693
|
+
id: b.toolUseId,
|
|
694
|
+
name: idToName.get(b.toolUseId) ?? b.toolUseId,
|
|
695
|
+
response: b.isError ? { error: b.content } : { result: b.content }
|
|
696
|
+
}
|
|
697
|
+
};
|
|
698
|
+
}
|
|
679
699
|
});
|
|
680
700
|
return { role, parts };
|
|
681
701
|
}
|
|
682
702
|
|
|
703
|
+
// src/providers/nvidia.ts
|
|
704
|
+
var NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
705
|
+
var KIMI_K2_6 = "moonshotai/kimi-k2.6";
|
|
706
|
+
var DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
|
|
707
|
+
var DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
|
|
708
|
+
var LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct";
|
|
709
|
+
var NvidiaProvider = class {
|
|
710
|
+
name = "nvidia";
|
|
711
|
+
model;
|
|
712
|
+
apiKey;
|
|
713
|
+
baseURL;
|
|
714
|
+
maxTokens;
|
|
715
|
+
temperature;
|
|
716
|
+
constructor(opts = {}) {
|
|
717
|
+
this.model = opts.model ?? KIMI_K2_6;
|
|
718
|
+
this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
|
|
719
|
+
this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
|
|
720
|
+
this.maxTokens = opts.maxTokens ?? 2048;
|
|
721
|
+
this.temperature = opts.temperature ?? 0.2;
|
|
722
|
+
}
|
|
723
|
+
async generate(req) {
|
|
724
|
+
if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
|
|
725
|
+
const body = {
|
|
726
|
+
model: this.model,
|
|
727
|
+
messages: toOpenAIMessages(req),
|
|
728
|
+
max_tokens: this.maxTokens,
|
|
729
|
+
temperature: this.temperature
|
|
730
|
+
};
|
|
731
|
+
if (req.tools.length) {
|
|
732
|
+
body.tools = req.tools.map((t) => ({
|
|
733
|
+
type: "function",
|
|
734
|
+
function: { name: t.name, description: t.description, parameters: t.inputSchema }
|
|
735
|
+
}));
|
|
736
|
+
body.tool_choice = "auto";
|
|
737
|
+
}
|
|
738
|
+
const res = await fetch(`${this.baseURL}/chat/completions`, {
|
|
739
|
+
method: "POST",
|
|
740
|
+
headers: {
|
|
741
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
742
|
+
"Content-Type": "application/json",
|
|
743
|
+
Accept: "application/json"
|
|
744
|
+
},
|
|
745
|
+
body: JSON.stringify(body)
|
|
746
|
+
});
|
|
747
|
+
if (!res.ok) {
|
|
748
|
+
throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
749
|
+
}
|
|
750
|
+
const json = await res.json();
|
|
751
|
+
const choice = json.choices?.[0];
|
|
752
|
+
const msg = choice?.message ?? {};
|
|
753
|
+
const content = [];
|
|
754
|
+
if (typeof msg.content === "string" && msg.content.trim()) {
|
|
755
|
+
content.push({ type: "text", text: msg.content });
|
|
756
|
+
}
|
|
757
|
+
const toolCalls = msg.tool_calls ?? [];
|
|
758
|
+
for (const tc of toolCalls) {
|
|
759
|
+
content.push({
|
|
760
|
+
type: "tool_use",
|
|
761
|
+
id: tc.id ?? "",
|
|
762
|
+
name: tc.function?.name ?? "",
|
|
763
|
+
input: safeParse(tc.function?.arguments)
|
|
764
|
+
});
|
|
765
|
+
}
|
|
766
|
+
if (content.length === 0) content.push({ type: "text", text: "" });
|
|
767
|
+
const stopReason = choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
|
|
768
|
+
return { content, stopReason };
|
|
769
|
+
}
|
|
770
|
+
};
|
|
771
|
+
function safeParse(args) {
|
|
772
|
+
if (typeof args !== "string") return args ?? {};
|
|
773
|
+
try {
|
|
774
|
+
return JSON.parse(args);
|
|
775
|
+
} catch {
|
|
776
|
+
return {};
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
function toOpenAIMessages(req) {
|
|
780
|
+
const out = [];
|
|
781
|
+
if (req.system) out.push({ role: "system", content: req.system });
|
|
782
|
+
for (const m of req.messages) {
|
|
783
|
+
if (m.role === "user") {
|
|
784
|
+
for (const b of m.content) {
|
|
785
|
+
if (b.type === "tool_result") {
|
|
786
|
+
out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
const parts = m.content.filter(
|
|
790
|
+
(b) => b.type === "text" || b.type === "image" || b.type === "video"
|
|
791
|
+
);
|
|
792
|
+
if (parts.length) {
|
|
793
|
+
const multimodal = parts.some((b) => b.type !== "text");
|
|
794
|
+
out.push({
|
|
795
|
+
role: "user",
|
|
796
|
+
content: multimodal ? parts.map(toOpenAIPart) : parts.map((b) => b.text).join("\n")
|
|
797
|
+
});
|
|
798
|
+
}
|
|
799
|
+
} else {
|
|
800
|
+
const text = m.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
|
|
801
|
+
const toolUses = m.content.filter((b) => b.type === "tool_use");
|
|
802
|
+
const msg = { role: "assistant", content: text || null };
|
|
803
|
+
if (toolUses.length) {
|
|
804
|
+
msg.tool_calls = toolUses.map((b) => ({
|
|
805
|
+
id: b.id,
|
|
806
|
+
type: "function",
|
|
807
|
+
function: { name: b.name, arguments: JSON.stringify(b.input) }
|
|
808
|
+
}));
|
|
809
|
+
}
|
|
810
|
+
out.push(msg);
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
return out;
|
|
814
|
+
}
|
|
815
|
+
function toOpenAIPart(b) {
|
|
816
|
+
if (b.type === "text") return { type: "text", text: b.text };
|
|
817
|
+
const media = b;
|
|
818
|
+
const url = media.url ?? (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
|
|
819
|
+
if (media.type === "video") return { type: "video_url", video_url: { url } };
|
|
820
|
+
return { type: "image_url", image_url: { url } };
|
|
821
|
+
}
|
|
822
|
+
|
|
823
|
+
// src/providers/cascade.ts
|
|
824
|
+
var CascadeProvider = class {
|
|
825
|
+
name = "cascade";
|
|
826
|
+
steps;
|
|
827
|
+
onFallback;
|
|
828
|
+
constructor(opts) {
|
|
829
|
+
if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
|
|
830
|
+
this.steps = opts.steps;
|
|
831
|
+
this.onFallback = opts.onFallback;
|
|
832
|
+
}
|
|
833
|
+
async generate(req) {
|
|
834
|
+
const has = (t) => req.messages.some((m) => m.content.some((b) => b.type === t));
|
|
835
|
+
const needsVideo = has("video");
|
|
836
|
+
const needsImage = has("image");
|
|
837
|
+
const needsVision = needsImage || needsVideo;
|
|
838
|
+
const eligible = needsVideo ? this.steps.filter((s) => s.video) : needsImage ? this.steps.filter((s) => s.vision) : this.steps;
|
|
839
|
+
if (!eligible.length) {
|
|
840
|
+
throw new Error(
|
|
841
|
+
`Cascade: request needs ${needsVideo ? "video" : "image"} input but no step supports it.`
|
|
842
|
+
);
|
|
843
|
+
}
|
|
844
|
+
let lastErr;
|
|
845
|
+
for (const step of eligible) {
|
|
846
|
+
try {
|
|
847
|
+
return await step.provider.generate(req);
|
|
848
|
+
} catch (err) {
|
|
849
|
+
lastErr = err;
|
|
850
|
+
this.onFallback?.({
|
|
851
|
+
from: step.label,
|
|
852
|
+
reason: err.message?.slice(0, 200) ?? "unknown",
|
|
853
|
+
needsVision
|
|
854
|
+
});
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
throw new Error(`Cascade exhausted all steps. Last error: ${lastErr?.message}`);
|
|
858
|
+
}
|
|
859
|
+
};
|
|
860
|
+
function createDefaultProvider(opts = {}) {
|
|
861
|
+
const main = opts.mainModel ?? KIMI_K2_6;
|
|
862
|
+
const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
|
|
863
|
+
const vision = opts.visionModel ?? LLAMA_VISION;
|
|
864
|
+
const gemini = opts.geminiModel ?? "gemini-2.5-flash";
|
|
865
|
+
const nv = (model) => new NvidiaProvider({ model, apiKey: opts.nvidiaApiKey });
|
|
866
|
+
return new CascadeProvider({
|
|
867
|
+
onFallback: opts.onFallback ?? ((info) => console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
|
|
868
|
+
steps: [
|
|
869
|
+
{ provider: nv(main), label: `nvidia:${main}`, vision: false, video: false },
|
|
870
|
+
{ provider: nv(secondary), label: `nvidia:${secondary}`, vision: false, video: false },
|
|
871
|
+
{ provider: nv(vision), label: `nvidia:${vision}`, vision: true, video: false },
|
|
872
|
+
{
|
|
873
|
+
provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
|
|
874
|
+
label: `gemini:${gemini}`,
|
|
875
|
+
vision: true,
|
|
876
|
+
video: true
|
|
877
|
+
}
|
|
878
|
+
]
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
|
|
683
882
|
// src/index.ts
|
|
684
|
-
async function* query(prompt, options) {
|
|
883
|
+
async function* query(prompt, options = {}) {
|
|
685
884
|
const registry = new ToolRegistry();
|
|
686
885
|
if (options.includeBuiltins ?? true) registry.addBuiltins(builtinTools);
|
|
687
886
|
for (const s of options.mcpServers ?? []) registry.addServer(s);
|
|
688
|
-
const
|
|
887
|
+
const content = typeof prompt === "string" ? [{ type: "text", text: prompt }] : prompt;
|
|
888
|
+
const messages = [{ role: "user", content }];
|
|
689
889
|
const result = yield* runLoop({
|
|
690
|
-
provider: options.provider,
|
|
890
|
+
provider: options.provider ?? createDefaultProvider(),
|
|
691
891
|
registry,
|
|
692
892
|
permissions: new PermissionEngine(options.permissions ?? {}),
|
|
693
893
|
system: options.system ?? "You are a helpful agent.",
|
|
@@ -700,19 +900,28 @@ async function* query(prompt, options) {
|
|
|
700
900
|
export {
|
|
701
901
|
AnthropicProvider,
|
|
702
902
|
CHEAP_MODEL,
|
|
903
|
+
CascadeProvider,
|
|
904
|
+
DEEPSEEK_V4_FLASH,
|
|
905
|
+
DEEPSEEK_V4_PRO,
|
|
703
906
|
EXPENSIVE_MODEL,
|
|
704
907
|
GEMINI_CHEAP_MODEL,
|
|
705
908
|
GEMINI_EXPENSIVE_MODEL,
|
|
706
909
|
GeminiProvider,
|
|
910
|
+
KIMI_K2_6,
|
|
911
|
+
LLAMA_VISION,
|
|
707
912
|
MockProvider,
|
|
913
|
+
NVIDIA_BASE_URL,
|
|
914
|
+
NvidiaProvider,
|
|
708
915
|
PermissionEngine,
|
|
709
916
|
ToolRegistry,
|
|
710
917
|
builtinTools,
|
|
711
918
|
classifyComplexity,
|
|
712
919
|
classifyComplexityGemini,
|
|
920
|
+
createDefaultProvider,
|
|
713
921
|
createSdkMcpServer,
|
|
714
922
|
fastHeuristic,
|
|
715
923
|
getRoutingStats,
|
|
924
|
+
hasMedia,
|
|
716
925
|
judgeComplexity,
|
|
717
926
|
judgeComplexityGemini,
|
|
718
927
|
latestUserText,
|