torus-ai 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -17
- package/dist/index.d.ts +87 -6
- package/dist/index.js +224 -16
- package/dist/index.js.map +1 -1
- package/models/POLICY.md +58 -0
- package/models/registry.json +52 -0
- package/package.json +5 -4
- package/src/index.ts +30 -8
- package/src/providers/anthropic.ts +13 -4
- package/src/providers/cascade.ts +107 -0
- package/src/providers/gemini.ts +21 -11
- package/src/providers/nvidia.ts +163 -0
- package/src/types.ts +17 -1
package/README.md
CHANGED
|
@@ -36,7 +36,7 @@ folder; open them to inspect the handoff.
|
|
|
36
36
|
| Context management | [`src/context.ts`](./src/context.ts) — layered, scoped loading (Layers 0–4) |
|
|
37
37
|
| `query()` streaming | [`src/index.ts`](./src/index.ts) — single-shot run yielding events |
|
|
38
38
|
| Pipeline orchestration | [`src/pipeline.ts`](./src/pipeline.ts) — sequential stages + review gates |
|
|
39
|
-
| Model backends | [`src/providers/`](./src/providers/) — `
|
|
39
|
+
| Model backends | [`src/providers/`](./src/providers/) — `NvidiaProvider`, `GeminiProvider`, `AnthropicProvider`, `MockProvider` + `CascadeProvider` |
|
|
40
40
|
|
|
41
41
|
## Three ways to use it
|
|
42
42
|
|
|
@@ -77,33 +77,66 @@ const claude = new AnthropicProvider({ model: "claude-sonnet-4-6" });
|
|
|
77
77
|
const gemini = new GeminiProvider({ model: "gemini-2.5-flash" });
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
-
## Providers &
|
|
80
|
+
## Providers & the default cascade
|
|
81
81
|
|
|
82
|
-
|
|
83
|
-
|
|
82
|
+
Four pluggable providers implement the same `ModelProvider` interface and drop
|
|
83
|
+
into `query()`, `runPipeline()`, or `runLoop()` interchangeably:
|
|
84
84
|
|
|
85
|
-
| Provider | Package | Env | Default |
|
|
85
|
+
| Provider | Package | Env | Default model |
|
|
86
86
|
|---|---|---|---|
|
|
87
|
-
| `
|
|
87
|
+
| `NvidiaProvider` | none (`fetch`) | `NVIDIA_API_KEY` | `moonshotai/kimi-k2.6` |
|
|
88
88
|
| `GeminiProvider` | `@google/genai` | `GOOGLE_API_KEY` | `gemini-2.5-flash` |
|
|
89
|
+
| `AnthropicProvider` | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` | `claude-sonnet-4-6` |
|
|
90
|
+
| `MockProvider` | none | — | offline |
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
**The default is a free-first cascade.** If you don't pass a provider, `query()`
|
|
93
|
+
uses `createDefaultProvider()` — it tries each step and falls through on failure:
|
|
94
|
+
|
|
95
|
+
1. **NVIDIA Kimi K2.6** — main; agentic + multimodal (image/video), free NIM endpoint
|
|
96
|
+
2. **NVIDIA DeepSeek V4 Pro** — 1M-context text model, free; *skipped for image/video*
|
|
97
|
+
3. **Gemini 2.5 Flash** — final fallback, different provider for resilience
|
|
94
98
|
|
|
95
99
|
```ts
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
100
|
+
import { query } from "torus-ai"; // NVIDIA_API_KEY in env → cascade default
|
|
101
|
+
for await (const ev of query("Explain MoE in one line")) { /* ... */ }
|
|
102
|
+
|
|
103
|
+
import { createDefaultProvider } from "torus-ai";
|
|
104
|
+
const provider = createDefaultProvider({ mainModel: "moonshotai/kimi-k2.6" });
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
It's **capability-aware**: image/video requests automatically skip text-only steps.
|
|
108
|
+
|
|
109
|
+
### Multimodal (image now, video experimental)
|
|
110
|
+
|
|
111
|
+
Pass content blocks instead of a string. Images route to a vision-capable step
|
|
112
|
+
(Kimi / Gemini / Claude); video is best-effort to Kimi.
|
|
99
113
|
|
|
114
|
+
```ts
|
|
115
|
+
await query([
|
|
116
|
+
{ type: "text", text: "What's in this image?" },
|
|
117
|
+
{ type: "image", url: "https://example.com/cat.png" }, // or { data, mimeType }
|
|
118
|
+
]);
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Cost routing (per provider)
|
|
122
|
+
|
|
123
|
+
Each model provider also supports `route: true` — fast heuristics, then a
|
|
124
|
+
structured "judge" call on the *cheap* model, picking cheap vs expensive (never
|
|
125
|
+
throws; falls back to expensive). Exposed for Claude and Gemini today:
|
|
126
|
+
|
|
127
|
+
```ts
|
|
128
|
+
new GeminiProvider({ route: true }); // gemini-2.5-flash-lite ↔ gemini-2.5-pro
|
|
129
|
+
new AnthropicProvider({ route: true }); // claude-haiku-4-5 ↔ claude-sonnet-4-6
|
|
100
130
|
import { getRoutingStats } from "torus-ai";
|
|
101
|
-
console.log(getRoutingStats()); // { cheap, expensive, cheapPct, expensivePct, total }
|
|
102
131
|
```
|
|
103
132
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
133
|
+
## Keeping models fresh
|
|
134
|
+
|
|
135
|
+
[`models/registry.json`](./models/registry.json) is the source of truth for the
|
|
136
|
+
cascade; [`models/POLICY.md`](./models/POLICY.md) is the rule for what earns a slot.
|
|
137
|
+
A weekly GitHub Action ([model-watch.yml](./.github/workflows/model-watch.yml))
|
|
138
|
+
pulls NVIDIA's live `/v1/models`, flags new free endpoints as candidates, and opens
|
|
139
|
+
a PR for human review against the policy. Run it locally with `npm run model-watch`.
|
|
107
140
|
|
|
108
141
|
## The stage contract (Layer 2)
|
|
109
142
|
|
package/dist/index.d.ts
CHANGED
|
@@ -15,7 +15,20 @@ interface ToolResultBlock {
|
|
|
15
15
|
content: string;
|
|
16
16
|
isError?: boolean;
|
|
17
17
|
}
|
|
18
|
-
|
|
18
|
+
/**
|
|
19
|
+
* Multimodal input. Provide either a remote `url` or base64 `data` (+ `mimeType`).
|
|
20
|
+
* Image is broadly supported; video is experimental and model-dependent (routed
|
|
21
|
+
* to a video-capable model like Kimi K2.6).
|
|
22
|
+
*/
|
|
23
|
+
interface MediaBlock {
|
|
24
|
+
type: "image" | "video";
|
|
25
|
+
url?: string;
|
|
26
|
+
data?: string;
|
|
27
|
+
mimeType?: string;
|
|
28
|
+
}
|
|
29
|
+
type ContentBlock = TextBlock | ToolUseBlock | ToolResultBlock | MediaBlock;
|
|
30
|
+
/** True if a message list carries any image/video content (drives vision routing). */
|
|
31
|
+
declare function hasMedia(messages: Message[]): boolean;
|
|
19
32
|
interface Message {
|
|
20
33
|
role: Role;
|
|
21
34
|
content: ContentBlock[];
|
|
@@ -312,6 +325,70 @@ declare class GeminiProvider implements ModelProvider {
|
|
|
312
325
|
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
313
326
|
}
|
|
314
327
|
|
|
328
|
+
declare const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
329
|
+
declare const KIMI_K2_6 = "moonshotai/kimi-k2.6";
|
|
330
|
+
declare const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
|
|
331
|
+
declare const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
|
|
332
|
+
interface NvidiaOptions {
|
|
333
|
+
model?: string;
|
|
334
|
+
apiKey?: string;
|
|
335
|
+
baseURL?: string;
|
|
336
|
+
maxTokens?: number;
|
|
337
|
+
temperature?: number;
|
|
338
|
+
}
|
|
339
|
+
declare class NvidiaProvider implements ModelProvider {
|
|
340
|
+
readonly name = "nvidia";
|
|
341
|
+
private model;
|
|
342
|
+
private apiKey?;
|
|
343
|
+
private baseURL;
|
|
344
|
+
private maxTokens;
|
|
345
|
+
private temperature;
|
|
346
|
+
constructor(opts?: NvidiaOptions);
|
|
347
|
+
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
interface CascadeStep {
|
|
351
|
+
provider: ModelProvider;
|
|
352
|
+
label: string;
|
|
353
|
+
vision: boolean;
|
|
354
|
+
}
|
|
355
|
+
interface CascadeOptions {
|
|
356
|
+
steps: CascadeStep[];
|
|
357
|
+
/** Called when a step is skipped or fails and the cascade falls through. */
|
|
358
|
+
onFallback?: (info: {
|
|
359
|
+
from: string;
|
|
360
|
+
reason: string;
|
|
361
|
+
needsVision: boolean;
|
|
362
|
+
}) => void;
|
|
363
|
+
}
|
|
364
|
+
declare class CascadeProvider implements ModelProvider {
|
|
365
|
+
readonly name = "cascade";
|
|
366
|
+
private steps;
|
|
367
|
+
private onFallback?;
|
|
368
|
+
constructor(opts: CascadeOptions);
|
|
369
|
+
generate(req: ModelRequest): Promise<ModelResponse>;
|
|
370
|
+
}
|
|
371
|
+
interface DefaultProviderOptions {
|
|
372
|
+
nvidiaApiKey?: string;
|
|
373
|
+
googleApiKey?: string;
|
|
374
|
+
/** Override the main NVIDIA model (default Kimi K2.6). */
|
|
375
|
+
mainModel?: string;
|
|
376
|
+
/** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
|
|
377
|
+
secondaryModel?: string;
|
|
378
|
+
/** Gemini model used as the final fallback option (default gemini-2.5-flash). */
|
|
379
|
+
geminiModel?: string;
|
|
380
|
+
onFallback?: CascadeOptions["onFallback"];
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* The SDK's recommended default: free NVIDIA endpoints first, Google as one
|
|
384
|
+
* fallback option.
|
|
385
|
+
*
|
|
386
|
+
* 1. NVIDIA Kimi K2.6 — main; agentic + multimodal (image/video)
|
|
387
|
+
* 2. NVIDIA DeepSeek V4 Pro — text-only; skipped for image/video requests
|
|
388
|
+
* 3. Gemini 2.5 Flash — final fallback; multimodal
|
|
389
|
+
*/
|
|
390
|
+
declare function createDefaultProvider(opts?: DefaultProviderOptions): CascadeProvider;
|
|
391
|
+
|
|
315
392
|
declare const CHEAP_MODEL = "claude-haiku-4-5";
|
|
316
393
|
declare const EXPENSIVE_MODEL = "claude-sonnet-4-6";
|
|
317
394
|
declare const GEMINI_CHEAP_MODEL = "gemini-2.5-flash-lite";
|
|
@@ -354,7 +431,8 @@ declare function getRoutingStats(): RoutingStats;
|
|
|
354
431
|
declare function latestUserText(messages: Message[]): string;
|
|
355
432
|
|
|
356
433
|
interface QueryOptions {
|
|
357
|
-
|
|
434
|
+
/** Defaults to the NVIDIA-first cascade (Kimi K2.6 → DeepSeek V4 → Gemini). */
|
|
435
|
+
provider?: ModelProvider;
|
|
358
436
|
system?: string;
|
|
359
437
|
mcpServers?: SdkMcpServer[];
|
|
360
438
|
includeBuiltins?: boolean;
|
|
@@ -364,10 +442,13 @@ interface QueryOptions {
|
|
|
364
442
|
}
|
|
365
443
|
/**
|
|
366
444
|
* Single-shot agent run (no pipeline). Mirrors the Claude Agent SDK's streaming
|
|
367
|
-
* `query()`: yields events as they happen and a final `result` event.
|
|
445
|
+
* `query()`: yields events as they happen and a final `result` event. The prompt
|
|
446
|
+
* may be a string or an array of content blocks (e.g. text + image for vision).
|
|
368
447
|
*
|
|
369
|
-
* for await (const ev of query("Summarize X", {
|
|
448
|
+
* for await (const ev of query("Summarize X", { mcpServers: [srv] })) { ... }
|
|
449
|
+
* for await (const ev of query([{ type: "text", text: "What's this?" },
|
|
450
|
+
* { type: "image", url: "https://..." }])) { ... }
|
|
370
451
|
*/
|
|
371
|
-
declare function query(prompt: string, options
|
|
452
|
+
declare function query(prompt: string | ContentBlock[], options?: QueryOptions): AsyncGenerator<AgentEvent>;
|
|
372
453
|
|
|
373
|
-
export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type Complexity, type ContentBlock, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, type LoadedContext, type LoopOptions, type LoopResult, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createSdkMcpServer, fastHeuristic, getRoutingStats, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
|
|
454
|
+
export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type CascadeOptions, CascadeProvider, type CascadeStep, type Complexity, type ContentBlock, DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, type DefaultProviderOptions, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, KIMI_K2_6, type LoadedContext, type LoopOptions, type LoopResult, type MediaBlock, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, NVIDIA_BASE_URL, type NvidiaOptions, NvidiaProvider, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createDefaultProvider, createSdkMcpServer, fastHeuristic, getRoutingStats, hasMedia, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
|
package/dist/index.js
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
// src/types.ts
|
|
2
|
+
function hasMedia(messages) {
|
|
3
|
+
return messages.some((m) => m.content.some((b) => b.type === "image" || b.type === "video"));
|
|
4
|
+
}
|
|
5
|
+
|
|
1
6
|
// src/tools.ts
|
|
2
7
|
function tool(name, description, inputSchema, handler) {
|
|
3
8
|
return { name, description, inputSchema, handler };
|
|
@@ -591,10 +596,17 @@ var AnthropicProvider = class {
|
|
|
591
596
|
function toApiMessage(m) {
|
|
592
597
|
return {
|
|
593
598
|
role: m.role,
|
|
594
|
-
content: m.content.
|
|
595
|
-
if (b.type === "text") return { type: "text", text: b.text };
|
|
596
|
-
if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
|
|
597
|
-
|
|
599
|
+
content: m.content.flatMap((b) => {
|
|
600
|
+
if (b.type === "text") return [{ type: "text", text: b.text }];
|
|
601
|
+
if (b.type === "tool_use") return [{ type: "tool_use", id: b.id, name: b.name, input: b.input }];
|
|
602
|
+
if (b.type === "tool_result") {
|
|
603
|
+
return [{ type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError }];
|
|
604
|
+
}
|
|
605
|
+
if (b.type === "image") {
|
|
606
|
+
const source = b.data ? { type: "base64", media_type: b.mimeType ?? "image/png", data: b.data } : { type: "url", url: b.url };
|
|
607
|
+
return [{ type: "image", source }];
|
|
608
|
+
}
|
|
609
|
+
return [];
|
|
598
610
|
})
|
|
599
611
|
};
|
|
600
612
|
}
|
|
@@ -667,27 +679,215 @@ function toolUseNames(messages) {
|
|
|
667
679
|
function toGeminiContent(m, idToName) {
|
|
668
680
|
const role = m.role === "assistant" ? "model" : "user";
|
|
669
681
|
const parts = m.content.map((b) => {
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
682
|
+
switch (b.type) {
|
|
683
|
+
case "text":
|
|
684
|
+
return { text: b.text };
|
|
685
|
+
case "image":
|
|
686
|
+
case "video":
|
|
687
|
+
return b.data ? { inlineData: { mimeType: b.mimeType ?? "image/png", data: b.data } } : { fileData: { mimeType: b.mimeType ?? "image/png", fileUri: b.url ?? "" } };
|
|
688
|
+
case "tool_use":
|
|
689
|
+
return { functionCall: { id: b.id, name: b.name, args: b.input } };
|
|
690
|
+
case "tool_result":
|
|
691
|
+
return {
|
|
692
|
+
functionResponse: {
|
|
693
|
+
id: b.toolUseId,
|
|
694
|
+
name: idToName.get(b.toolUseId) ?? b.toolUseId,
|
|
695
|
+
response: b.isError ? { error: b.content } : { result: b.content }
|
|
696
|
+
}
|
|
697
|
+
};
|
|
698
|
+
}
|
|
679
699
|
});
|
|
680
700
|
return { role, parts };
|
|
681
701
|
}
|
|
682
702
|
|
|
703
|
+
// src/providers/nvidia.ts
|
|
704
|
+
var NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
705
|
+
var KIMI_K2_6 = "moonshotai/kimi-k2.6";
|
|
706
|
+
var DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
|
|
707
|
+
var DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
|
|
708
|
+
var NvidiaProvider = class {
|
|
709
|
+
name = "nvidia";
|
|
710
|
+
model;
|
|
711
|
+
apiKey;
|
|
712
|
+
baseURL;
|
|
713
|
+
maxTokens;
|
|
714
|
+
temperature;
|
|
715
|
+
constructor(opts = {}) {
|
|
716
|
+
this.model = opts.model ?? KIMI_K2_6;
|
|
717
|
+
this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
|
|
718
|
+
this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
|
|
719
|
+
this.maxTokens = opts.maxTokens ?? 2048;
|
|
720
|
+
this.temperature = opts.temperature ?? 0.6;
|
|
721
|
+
}
|
|
722
|
+
async generate(req) {
|
|
723
|
+
if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
|
|
724
|
+
const body = {
|
|
725
|
+
model: this.model,
|
|
726
|
+
messages: toOpenAIMessages(req),
|
|
727
|
+
max_tokens: this.maxTokens,
|
|
728
|
+
temperature: this.temperature
|
|
729
|
+
};
|
|
730
|
+
if (req.tools.length) {
|
|
731
|
+
body.tools = req.tools.map((t) => ({
|
|
732
|
+
type: "function",
|
|
733
|
+
function: { name: t.name, description: t.description, parameters: t.inputSchema }
|
|
734
|
+
}));
|
|
735
|
+
body.tool_choice = "auto";
|
|
736
|
+
}
|
|
737
|
+
const res = await fetch(`${this.baseURL}/chat/completions`, {
|
|
738
|
+
method: "POST",
|
|
739
|
+
headers: {
|
|
740
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
741
|
+
"Content-Type": "application/json",
|
|
742
|
+
Accept: "application/json"
|
|
743
|
+
},
|
|
744
|
+
body: JSON.stringify(body)
|
|
745
|
+
});
|
|
746
|
+
if (!res.ok) {
|
|
747
|
+
throw new Error(`NVIDIA ${this.model} ${res.status}: ${(await res.text()).slice(0, 300)}`);
|
|
748
|
+
}
|
|
749
|
+
const json = await res.json();
|
|
750
|
+
const choice = json.choices?.[0];
|
|
751
|
+
const msg = choice?.message ?? {};
|
|
752
|
+
const content = [];
|
|
753
|
+
if (typeof msg.content === "string" && msg.content.trim()) {
|
|
754
|
+
content.push({ type: "text", text: msg.content });
|
|
755
|
+
}
|
|
756
|
+
const toolCalls = msg.tool_calls ?? [];
|
|
757
|
+
for (const tc of toolCalls) {
|
|
758
|
+
content.push({
|
|
759
|
+
type: "tool_use",
|
|
760
|
+
id: tc.id ?? "",
|
|
761
|
+
name: tc.function?.name ?? "",
|
|
762
|
+
input: safeParse(tc.function?.arguments)
|
|
763
|
+
});
|
|
764
|
+
}
|
|
765
|
+
if (content.length === 0) content.push({ type: "text", text: "" });
|
|
766
|
+
const stopReason = choice?.finish_reason === "tool_calls" || toolCalls.length ? "tool_use" : "end_turn";
|
|
767
|
+
return { content, stopReason };
|
|
768
|
+
}
|
|
769
|
+
};
|
|
770
|
+
function safeParse(args) {
|
|
771
|
+
if (typeof args !== "string") return args ?? {};
|
|
772
|
+
try {
|
|
773
|
+
return JSON.parse(args);
|
|
774
|
+
} catch {
|
|
775
|
+
return {};
|
|
776
|
+
}
|
|
777
|
+
}
|
|
778
|
+
function toOpenAIMessages(req) {
|
|
779
|
+
const out = [];
|
|
780
|
+
if (req.system) out.push({ role: "system", content: req.system });
|
|
781
|
+
for (const m of req.messages) {
|
|
782
|
+
if (m.role === "user") {
|
|
783
|
+
for (const b of m.content) {
|
|
784
|
+
if (b.type === "tool_result") {
|
|
785
|
+
out.push({ role: "tool", tool_call_id: b.toolUseId, content: b.content });
|
|
786
|
+
}
|
|
787
|
+
}
|
|
788
|
+
const parts = m.content.filter(
|
|
789
|
+
(b) => b.type === "text" || b.type === "image" || b.type === "video"
|
|
790
|
+
);
|
|
791
|
+
if (parts.length) {
|
|
792
|
+
const multimodal = parts.some((b) => b.type !== "text");
|
|
793
|
+
out.push({
|
|
794
|
+
role: "user",
|
|
795
|
+
content: multimodal ? parts.map(toOpenAIPart) : parts.map((b) => b.text).join("\n")
|
|
796
|
+
});
|
|
797
|
+
}
|
|
798
|
+
} else {
|
|
799
|
+
const text = m.content.filter((b) => b.type === "text").map((b) => b.text).join("\n");
|
|
800
|
+
const toolUses = m.content.filter((b) => b.type === "tool_use");
|
|
801
|
+
const msg = { role: "assistant", content: text || null };
|
|
802
|
+
if (toolUses.length) {
|
|
803
|
+
msg.tool_calls = toolUses.map((b) => ({
|
|
804
|
+
id: b.id,
|
|
805
|
+
type: "function",
|
|
806
|
+
function: { name: b.name, arguments: JSON.stringify(b.input) }
|
|
807
|
+
}));
|
|
808
|
+
}
|
|
809
|
+
out.push(msg);
|
|
810
|
+
}
|
|
811
|
+
}
|
|
812
|
+
return out;
|
|
813
|
+
}
|
|
814
|
+
function toOpenAIPart(b) {
|
|
815
|
+
if (b.type === "text") return { type: "text", text: b.text };
|
|
816
|
+
const media = b;
|
|
817
|
+
const url = media.url ?? (media.data ? `data:${media.mimeType ?? "application/octet-stream"};base64,${media.data}` : "");
|
|
818
|
+
if (media.type === "video") return { type: "video_url", video_url: { url } };
|
|
819
|
+
return { type: "image_url", image_url: { url } };
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// src/providers/cascade.ts
|
|
823
|
+
var CascadeProvider = class {
|
|
824
|
+
name = "cascade";
|
|
825
|
+
steps;
|
|
826
|
+
onFallback;
|
|
827
|
+
constructor(opts) {
|
|
828
|
+
if (!opts.steps.length) throw new Error("CascadeProvider needs at least one step.");
|
|
829
|
+
this.steps = opts.steps;
|
|
830
|
+
this.onFallback = opts.onFallback;
|
|
831
|
+
}
|
|
832
|
+
async generate(req) {
|
|
833
|
+
const needsVision = hasMedia(req.messages);
|
|
834
|
+
const eligible = this.steps.filter((s) => !needsVision || s.vision);
|
|
835
|
+
if (!eligible.length) {
|
|
836
|
+
throw new Error("Cascade: request needs vision but no step supports image/video input.");
|
|
837
|
+
}
|
|
838
|
+
let lastErr;
|
|
839
|
+
for (const step of eligible) {
|
|
840
|
+
try {
|
|
841
|
+
return await step.provider.generate(req);
|
|
842
|
+
} catch (err) {
|
|
843
|
+
lastErr = err;
|
|
844
|
+
this.onFallback?.({
|
|
845
|
+
from: step.label,
|
|
846
|
+
reason: err.message?.slice(0, 200) ?? "unknown",
|
|
847
|
+
needsVision
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
throw new Error(`Cascade exhausted all steps. Last error: ${lastErr?.message}`);
|
|
852
|
+
}
|
|
853
|
+
};
|
|
854
|
+
function createDefaultProvider(opts = {}) {
|
|
855
|
+
const main = opts.mainModel ?? KIMI_K2_6;
|
|
856
|
+
const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
|
|
857
|
+
const gemini = opts.geminiModel ?? "gemini-2.5-flash";
|
|
858
|
+
return new CascadeProvider({
|
|
859
|
+
onFallback: opts.onFallback ?? ((info) => console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
|
|
860
|
+
steps: [
|
|
861
|
+
{
|
|
862
|
+
provider: new NvidiaProvider({ model: main, apiKey: opts.nvidiaApiKey }),
|
|
863
|
+
label: `nvidia:${main}`,
|
|
864
|
+
vision: true
|
|
865
|
+
// Kimi K2.6 accepts image + video
|
|
866
|
+
},
|
|
867
|
+
{
|
|
868
|
+
provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
|
|
869
|
+
label: `nvidia:${secondary}`,
|
|
870
|
+
vision: false
|
|
871
|
+
// DeepSeek V4 is text-only
|
|
872
|
+
},
|
|
873
|
+
{
|
|
874
|
+
provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
|
|
875
|
+
label: `gemini:${gemini}`,
|
|
876
|
+
vision: true
|
|
877
|
+
}
|
|
878
|
+
]
|
|
879
|
+
});
|
|
880
|
+
}
|
|
881
|
+
|
|
683
882
|
// src/index.ts
|
|
684
|
-
async function* query(prompt, options) {
|
|
883
|
+
async function* query(prompt, options = {}) {
|
|
685
884
|
const registry = new ToolRegistry();
|
|
686
885
|
if (options.includeBuiltins ?? true) registry.addBuiltins(builtinTools);
|
|
687
886
|
for (const s of options.mcpServers ?? []) registry.addServer(s);
|
|
688
|
-
const
|
|
887
|
+
const content = typeof prompt === "string" ? [{ type: "text", text: prompt }] : prompt;
|
|
888
|
+
const messages = [{ role: "user", content }];
|
|
689
889
|
const result = yield* runLoop({
|
|
690
|
-
provider: options.provider,
|
|
890
|
+
provider: options.provider ?? createDefaultProvider(),
|
|
691
891
|
registry,
|
|
692
892
|
permissions: new PermissionEngine(options.permissions ?? {}),
|
|
693
893
|
system: options.system ?? "You are a helpful agent.",
|
|
@@ -700,19 +900,27 @@ async function* query(prompt, options) {
|
|
|
700
900
|
export {
|
|
701
901
|
AnthropicProvider,
|
|
702
902
|
CHEAP_MODEL,
|
|
903
|
+
CascadeProvider,
|
|
904
|
+
DEEPSEEK_V4_FLASH,
|
|
905
|
+
DEEPSEEK_V4_PRO,
|
|
703
906
|
EXPENSIVE_MODEL,
|
|
704
907
|
GEMINI_CHEAP_MODEL,
|
|
705
908
|
GEMINI_EXPENSIVE_MODEL,
|
|
706
909
|
GeminiProvider,
|
|
910
|
+
KIMI_K2_6,
|
|
707
911
|
MockProvider,
|
|
912
|
+
NVIDIA_BASE_URL,
|
|
913
|
+
NvidiaProvider,
|
|
708
914
|
PermissionEngine,
|
|
709
915
|
ToolRegistry,
|
|
710
916
|
builtinTools,
|
|
711
917
|
classifyComplexity,
|
|
712
918
|
classifyComplexityGemini,
|
|
919
|
+
createDefaultProvider,
|
|
713
920
|
createSdkMcpServer,
|
|
714
921
|
fastHeuristic,
|
|
715
922
|
getRoutingStats,
|
|
923
|
+
hasMedia,
|
|
716
924
|
judgeComplexity,
|
|
717
925
|
judgeComplexityGemini,
|
|
718
926
|
latestUserText,
|