torus-ai 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -9
- package/dist/index.d.ts +104 -5
- package/dist/index.js +209 -17
- package/dist/index.js.map +1 -1
- package/models/registry.json +12 -1
- package/package.json +1 -1
- package/src/index.ts +20 -0
- package/src/pack.ts +155 -0
- package/src/packkit.ts +147 -0
- package/src/providers/cascade.ts +33 -22
- package/src/providers/nvidia.ts +3 -2
package/src/pack.ts
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { existsSync } from "node:fs";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { builtinTools } from "./builtins.ts";
|
|
5
|
+
import { runLoop } from "./loop.ts";
|
|
6
|
+
import { matchesAllow, PermissionEngine } from "./permissions.ts";
|
|
7
|
+
import { createCatalogServer, type CatalogItem } from "./packkit.ts";
|
|
8
|
+
import { createDefaultProvider } from "./providers/cascade.ts";
|
|
9
|
+
import { ToolRegistry } from "./tools.ts";
|
|
10
|
+
import type {
|
|
11
|
+
AgentEvent,
|
|
12
|
+
CanUseTool,
|
|
13
|
+
ContentBlock,
|
|
14
|
+
Message,
|
|
15
|
+
ModelProvider,
|
|
16
|
+
SdkMcpServer,
|
|
17
|
+
} from "./types.ts";
|
|
18
|
+
|
|
19
|
+
// Specialize the generic engine to a product by loading a *pack* (an adapter):
|
|
20
|
+
// persona + sales playbook + policy + domain tools + catalog grounding + guardrails.
|
|
21
|
+
// One engine, many packs — don't fork the SDK per vertical.
|
|
22
|
+
|
|
23
|
+
export interface PackKnowledge {
|
|
24
|
+
/** Product catalog — auto-wired into a `search_catalog` tool for grounding. */
|
|
25
|
+
catalog?: CatalogItem[];
|
|
26
|
+
/** Short reference text (policies, FAQs) appended to the system prompt. */
|
|
27
|
+
faqs?: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface PackGuardrails {
|
|
31
|
+
/** Allowlist of tool names the agent may call (namespaced, wildcards ok). */
|
|
32
|
+
allowedTools?: string[];
|
|
33
|
+
/** Tools that require explicit confirmation before running (namespaced names). */
|
|
34
|
+
confirm?: string[];
|
|
35
|
+
/** Extra custom gate, evaluated after allow/confirm. */
|
|
36
|
+
canUseTool?: CanUseTool;
|
|
37
|
+
/** Rules text (discount authority, no-overpromise, escalation) added to the prompt. */
|
|
38
|
+
policy?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface AgentPack {
|
|
42
|
+
name: string;
|
|
43
|
+
persona: string; // who it is + voice (system prompt core)
|
|
44
|
+
playbook?: string; // sales stages + goal
|
|
45
|
+
tools?: SdkMcpServer[]; // domain actions (quote, reserve, invoice, handoff, ...)
|
|
46
|
+
knowledge?: PackKnowledge;
|
|
47
|
+
guardrails?: PackGuardrails;
|
|
48
|
+
model?: ModelProvider; // defaults to the free-first cascade
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface SpecializeOptions {
|
|
52
|
+
provider?: ModelProvider;
|
|
53
|
+
/** Called when a `confirm` tool wants to run; return true to allow. */
|
|
54
|
+
onConfirm?: (toolName: string, input: Record<string, unknown>) => boolean | Promise<boolean>;
|
|
55
|
+
/** Allow built-in file tools (read/write/list). Off by default for packs. */
|
|
56
|
+
includeBuiltins?: boolean;
|
|
57
|
+
maxTurns?: number;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export interface SpecializedAgent {
|
|
61
|
+
pack: AgentPack;
|
|
62
|
+
system: string;
|
|
63
|
+
servers: SdkMcpServer[];
|
|
64
|
+
query(prompt: string | ContentBlock[], extra?: { maxTurns?: number }): AsyncGenerator<AgentEvent>;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Build a ready-to-run specialized agent from a pack. */
|
|
68
|
+
export function createSpecializedAgent(pack: AgentPack, opts: SpecializeOptions = {}): SpecializedAgent {
|
|
69
|
+
// Assemble the system prompt from persona + playbook + policy + grounding + faqs.
|
|
70
|
+
const servers: SdkMcpServer[] = [...(pack.tools ?? [])];
|
|
71
|
+
if (pack.knowledge?.catalog?.length) servers.unshift(createCatalogServer(pack.knowledge.catalog));
|
|
72
|
+
|
|
73
|
+
const parts = [pack.persona.trim()];
|
|
74
|
+
if (pack.playbook) parts.push(`## Playbook\n${pack.playbook.trim()}`);
|
|
75
|
+
if (pack.guardrails?.policy) parts.push(`## Policy\n${pack.guardrails.policy.trim()}`);
|
|
76
|
+
if (servers.some((s) => s.tools.some((t) => t.name === "search_catalog"))) {
|
|
77
|
+
parts.push(
|
|
78
|
+
"Use the `search_catalog` tool for every product, price, or availability question. Never invent a price or claim availability you did not look up.",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
if (pack.knowledge?.faqs) parts.push(`## Reference\n${pack.knowledge.faqs.trim()}`);
|
|
82
|
+
const system = parts.join("\n\n");
|
|
83
|
+
|
|
84
|
+
const confirmTools = pack.guardrails?.confirm ?? [];
|
|
85
|
+
const allow = pack.guardrails?.allowedTools;
|
|
86
|
+
const base = pack.guardrails?.canUseTool;
|
|
87
|
+
|
|
88
|
+
const canUseTool: CanUseTool = async (name, input) => {
|
|
89
|
+
if (confirmTools.includes(name)) {
|
|
90
|
+
const ok = opts.onConfirm ? await opts.onConfirm(name, input) : false;
|
|
91
|
+
if (!ok) return { behavior: "deny", message: `${name} requires confirmation and it was not granted.` };
|
|
92
|
+
}
|
|
93
|
+
if (allow && !matchesAllow(name, allow)) {
|
|
94
|
+
return { behavior: "deny", message: `${name} is not allowed by this pack's guardrails.` };
|
|
95
|
+
}
|
|
96
|
+
return base ? base(name, input) : { behavior: "allow" };
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const provider = opts.provider ?? pack.model ?? createDefaultProvider();
|
|
100
|
+
const includeBuiltins = opts.includeBuiltins ?? false;
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
pack,
|
|
104
|
+
system,
|
|
105
|
+
servers,
|
|
106
|
+
async *query(prompt, extra) {
|
|
107
|
+
const registry = new ToolRegistry();
|
|
108
|
+
if (includeBuiltins) registry.addBuiltins(builtinTools);
|
|
109
|
+
for (const s of servers) registry.addServer(s);
|
|
110
|
+
|
|
111
|
+
const content: ContentBlock[] =
|
|
112
|
+
typeof prompt === "string" ? [{ type: "text", text: prompt }] : prompt;
|
|
113
|
+
const messages: Message[] = [{ role: "user", content }];
|
|
114
|
+
|
|
115
|
+
const result = yield* runLoop({
|
|
116
|
+
provider,
|
|
117
|
+
registry,
|
|
118
|
+
permissions: new PermissionEngine({ canUseTool }),
|
|
119
|
+
system,
|
|
120
|
+
messages,
|
|
121
|
+
toolContext: { workspaceDir: process.cwd() },
|
|
122
|
+
maxTurns: extra?.maxTurns ?? opts.maxTurns,
|
|
123
|
+
});
|
|
124
|
+
yield { type: "result", finalText: result.finalText, turns: result.turns };
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Load a pack's content from a folder (so non-devs can edit it):
|
|
131
|
+
* persona.md · playbook.md · policy.md · catalog.json · faqs.md
|
|
132
|
+
* Code tools (quote/reserve/invoice/...) are passed via `opts.tools`.
|
|
133
|
+
*/
|
|
134
|
+
export async function loadPack(
|
|
135
|
+
dir: string,
|
|
136
|
+
opts: { tools?: SdkMcpServer[] } = {},
|
|
137
|
+
): Promise<AgentPack> {
|
|
138
|
+
const read = async (f: string): Promise<string | undefined> => {
|
|
139
|
+
const p = join(dir, f);
|
|
140
|
+
return existsSync(p) ? await readFile(p, "utf8") : undefined;
|
|
141
|
+
};
|
|
142
|
+
const catalogRaw = await read("catalog.json");
|
|
143
|
+
const policy = await read("policy.md");
|
|
144
|
+
return {
|
|
145
|
+
name: dir.split(/[\\/]/).filter(Boolean).pop() ?? "pack",
|
|
146
|
+
persona: (await read("persona.md")) ?? "",
|
|
147
|
+
playbook: await read("playbook.md"),
|
|
148
|
+
knowledge: {
|
|
149
|
+
catalog: catalogRaw ? (JSON.parse(catalogRaw) as CatalogItem[]) : undefined,
|
|
150
|
+
faqs: await read("faqs.md"),
|
|
151
|
+
},
|
|
152
|
+
guardrails: policy ? { policy } : undefined,
|
|
153
|
+
tools: opts.tools,
|
|
154
|
+
};
|
|
155
|
+
}
|
package/src/packkit.ts
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { createSdkMcpServer, tool } from "./tools.ts";
|
|
2
|
+
import type { SdkMcpServer } from "./types.ts";
|
|
3
|
+
|
|
4
|
+
// Reusable tool patterns for specializing an agent to a vertical (a "pack").
|
|
5
|
+
// Compose these into a pack's `tools`. They are deliberately backend-agnostic —
|
|
6
|
+
// swap the in-memory stubs for real catalog/CRM/payment integrations later.
|
|
7
|
+
|
|
8
|
+
export type CatalogItem = Record<string, unknown> & {
|
|
9
|
+
id?: string;
|
|
10
|
+
name?: string;
|
|
11
|
+
price?: number;
|
|
12
|
+
tags?: string[];
|
|
13
|
+
available?: boolean;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
/** A `search_catalog` tool over an in-memory product list (text + price + tags). */
|
|
17
|
+
export function createCatalogServer(
|
|
18
|
+
items: CatalogItem[],
|
|
19
|
+
opts: { serverName?: string } = {},
|
|
20
|
+
): SdkMcpServer {
|
|
21
|
+
const search = tool(
|
|
22
|
+
"search_catalog",
|
|
23
|
+
"Search the product catalog by text, max price, and tags. Returns matching items with prices and availability. Use this for every product/price/availability question — never guess.",
|
|
24
|
+
{
|
|
25
|
+
type: "object",
|
|
26
|
+
properties: {
|
|
27
|
+
query: { type: "string" },
|
|
28
|
+
maxPrice: { type: "number" },
|
|
29
|
+
tags: { type: "array", items: { type: "string" } },
|
|
30
|
+
limit: { type: "number" },
|
|
31
|
+
},
|
|
32
|
+
},
|
|
33
|
+
(input: { query?: string; maxPrice?: number; tags?: string[]; limit?: number }) => {
|
|
34
|
+
let res = items.filter((it) => it.available !== false);
|
|
35
|
+
if (input.query) {
|
|
36
|
+
const words = input.query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
37
|
+
res = res.filter((it) => {
|
|
38
|
+
const hay = JSON.stringify(it).toLowerCase();
|
|
39
|
+
return words.every((w) => hay.includes(w));
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
if (typeof input.maxPrice === "number") {
|
|
43
|
+
res = res.filter((it) => typeof it.price !== "number" || it.price <= input.maxPrice!);
|
|
44
|
+
}
|
|
45
|
+
if (Array.isArray(input.tags) && input.tags.length) {
|
|
46
|
+
res = res.filter((it) => Array.isArray(it.tags) && input.tags!.some((t) => it.tags!.includes(t)));
|
|
47
|
+
}
|
|
48
|
+
const out = res.slice(0, input.limit ?? 5);
|
|
49
|
+
return { content: out.length ? JSON.stringify(out, null, 2) : "No matching items." };
|
|
50
|
+
},
|
|
51
|
+
);
|
|
52
|
+
return createSdkMcpServer({ name: opts.serverName ?? "catalog", tools: [search] });
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** `get_lead` / `update_lead` over an in-memory customer profile (the funnel state). */
|
|
56
|
+
export function createLeadMemoryServer(
|
|
57
|
+
initial: Record<string, unknown> = {},
|
|
58
|
+
): SdkMcpServer & { lead: Record<string, unknown> } {
|
|
59
|
+
const lead: Record<string, unknown> = { ...initial };
|
|
60
|
+
const get = tool(
|
|
61
|
+
"get_lead",
|
|
62
|
+
"Get what we know about the current customer (name, date, budget, stage, items seen).",
|
|
63
|
+
{ type: "object", properties: {} },
|
|
64
|
+
() => ({ content: JSON.stringify(lead, null, 2) }),
|
|
65
|
+
);
|
|
66
|
+
const update = tool(
|
|
67
|
+
"update_lead",
|
|
68
|
+
"Merge fields into the customer profile, e.g. { budget: 2000, stage: 'recommend' }.",
|
|
69
|
+
{ type: "object", properties: { fields: { type: "object" } }, required: ["fields"] },
|
|
70
|
+
(input: { fields: Record<string, unknown> }) => {
|
|
71
|
+
Object.assign(lead, input.fields ?? {});
|
|
72
|
+
return { content: `updated: ${Object.keys(input.fields ?? {}).join(", ") || "(none)"}` };
|
|
73
|
+
},
|
|
74
|
+
);
|
|
75
|
+
return Object.assign(createSdkMcpServer({ name: "lead", tools: [get, update] }), { lead });
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export interface Invoice {
|
|
79
|
+
id: string;
|
|
80
|
+
amount: number;
|
|
81
|
+
currency: string;
|
|
82
|
+
items?: unknown;
|
|
83
|
+
customer?: unknown;
|
|
84
|
+
status: "pending";
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* A generic `create_invoice` settle tool: records an order + amount as pending
|
|
89
|
+
* and returns an invoice id. Provider-agnostic — wire your processor via
|
|
90
|
+
* `onCreate` (e.g. create a real payment link, then confirm via webhook).
|
|
91
|
+
*/
|
|
92
|
+
export function createInvoiceServer(
|
|
93
|
+
opts: { onCreate?: (inv: Invoice) => void } = {},
|
|
94
|
+
): SdkMcpServer & { invoices: Invoice[] } {
|
|
95
|
+
const invoices: Invoice[] = [];
|
|
96
|
+
let n = 0;
|
|
97
|
+
const create = tool(
|
|
98
|
+
"create_invoice",
|
|
99
|
+
"Record an order and amount as a pending invoice to settle, returning an invoice id. Call this only after the customer has agreed to buy.",
|
|
100
|
+
{
|
|
101
|
+
type: "object",
|
|
102
|
+
properties: {
|
|
103
|
+
amount: { type: "number" },
|
|
104
|
+
currency: { type: "string" },
|
|
105
|
+
items: {},
|
|
106
|
+
customer: {},
|
|
107
|
+
},
|
|
108
|
+
required: ["amount"],
|
|
109
|
+
},
|
|
110
|
+
(input: { amount: number; currency?: string; items?: unknown; customer?: unknown }) => {
|
|
111
|
+
const inv: Invoice = {
|
|
112
|
+
id: `inv_${++n}`,
|
|
113
|
+
amount: input.amount,
|
|
114
|
+
currency: input.currency ?? "USD",
|
|
115
|
+
items: input.items,
|
|
116
|
+
customer: input.customer,
|
|
117
|
+
status: "pending",
|
|
118
|
+
};
|
|
119
|
+
invoices.push(inv);
|
|
120
|
+
opts.onCreate?.(inv);
|
|
121
|
+
return {
|
|
122
|
+
content: JSON.stringify({ invoiceId: inv.id, status: inv.status, amount: inv.amount, currency: inv.currency }),
|
|
123
|
+
};
|
|
124
|
+
},
|
|
125
|
+
);
|
|
126
|
+
return Object.assign(createSdkMcpServer({ name: "billing", tools: [create] }), { invoices });
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/** A `handoff_human` escalation tool. Wire `onHandoff` to notify a real agent. */
|
|
130
|
+
export function createHandoffServer(
|
|
131
|
+
opts: { onHandoff?: (info: { reason: string; summary: string }) => void } = {},
|
|
132
|
+
): SdkMcpServer {
|
|
133
|
+
const handoff = tool(
|
|
134
|
+
"handoff_human",
|
|
135
|
+
"Escalate to a human agent with a reason and a short summary of the conversation so far. Use when you're stuck, the request is high-value, or the customer asks for a person.",
|
|
136
|
+
{
|
|
137
|
+
type: "object",
|
|
138
|
+
properties: { reason: { type: "string" }, summary: { type: "string" } },
|
|
139
|
+
required: ["reason"],
|
|
140
|
+
},
|
|
141
|
+
(input: { reason: string; summary?: string }) => {
|
|
142
|
+
opts.onHandoff?.({ reason: input.reason, summary: input.summary ?? "" });
|
|
143
|
+
return { content: "Escalated to a human; they will take over shortly." };
|
|
144
|
+
},
|
|
145
|
+
);
|
|
146
|
+
return createSdkMcpServer({ name: "support", tools: [handoff] });
|
|
147
|
+
}
|
package/src/providers/cascade.ts
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
import { hasMedia } from "../types.ts";
|
|
2
1
|
import type { ModelProvider, ModelRequest, ModelResponse } from "../types.ts";
|
|
3
|
-
import { DEEPSEEK_V4_PRO, KIMI_K2_6, NvidiaProvider } from "./nvidia.ts";
|
|
2
|
+
import { DEEPSEEK_V4_PRO, KIMI_K2_6, LLAMA_VISION, NvidiaProvider } from "./nvidia.ts";
|
|
4
3
|
import { GeminiProvider } from "./gemini.ts";
|
|
5
4
|
|
|
6
5
|
// Orchestration: try a prioritized list of (provider, model) steps, falling
|
|
7
6
|
// through to the next on failure (rate limit, error, or capability mismatch).
|
|
8
|
-
// Capability-aware:
|
|
9
|
-
//
|
|
7
|
+
// Capability-aware: image requests only go to vision steps; video requests only
|
|
8
|
+
// to video steps — text-only models (Kimi, DeepSeek) are skipped for those.
|
|
10
9
|
|
|
11
10
|
export interface CascadeStep {
|
|
12
11
|
provider: ModelProvider;
|
|
13
12
|
label: string; // e.g. "nvidia:kimi-k2.6"
|
|
14
|
-
vision: boolean; //
|
|
13
|
+
vision: boolean; // accepts image input?
|
|
14
|
+
video?: boolean; // accepts video input? (default false)
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
export interface CascadeOptions {
|
|
@@ -32,11 +32,22 @@ export class CascadeProvider implements ModelProvider {
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
35
|
-
const
|
|
36
|
-
|
|
35
|
+
const has = (t: "image" | "video") =>
|
|
36
|
+
req.messages.some((m) => m.content.some((b) => b.type === t));
|
|
37
|
+
const needsVideo = has("video");
|
|
38
|
+
const needsImage = has("image");
|
|
39
|
+
const needsVision = needsImage || needsVideo;
|
|
40
|
+
|
|
41
|
+
const eligible = needsVideo
|
|
42
|
+
? this.steps.filter((s) => s.video)
|
|
43
|
+
: needsImage
|
|
44
|
+
? this.steps.filter((s) => s.vision)
|
|
45
|
+
: this.steps;
|
|
37
46
|
|
|
38
47
|
if (!eligible.length) {
|
|
39
|
-
throw new Error(
|
|
48
|
+
throw new Error(
|
|
49
|
+
`Cascade: request needs ${needsVideo ? "video" : "image"} input but no step supports it.`,
|
|
50
|
+
);
|
|
40
51
|
}
|
|
41
52
|
|
|
42
53
|
let lastErr: unknown;
|
|
@@ -63,6 +74,8 @@ export interface DefaultProviderOptions {
|
|
|
63
74
|
mainModel?: string;
|
|
64
75
|
/** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
|
|
65
76
|
secondaryModel?: string;
|
|
77
|
+
/** NVIDIA vision model for image requests (default llama-3.2-90b-vision). */
|
|
78
|
+
visionModel?: string;
|
|
66
79
|
/** Gemini model used as the final fallback option (default gemini-2.5-flash). */
|
|
67
80
|
geminiModel?: string;
|
|
68
81
|
onFallback?: CascadeOptions["onFallback"];
|
|
@@ -70,16 +83,20 @@ export interface DefaultProviderOptions {
|
|
|
70
83
|
|
|
71
84
|
/**
|
|
72
85
|
* The SDK's recommended default: free NVIDIA endpoints first, Google as one
|
|
73
|
-
* fallback option.
|
|
86
|
+
* fallback option. Capability-aware — image/video requests skip the text-only
|
|
87
|
+
* steps automatically.
|
|
74
88
|
*
|
|
75
|
-
* 1. NVIDIA Kimi K2.6
|
|
76
|
-
* 2. NVIDIA DeepSeek V4 Pro
|
|
77
|
-
* 3.
|
|
89
|
+
* 1. NVIDIA Kimi K2.6 — main; agentic + tools (text)
|
|
90
|
+
* 2. NVIDIA DeepSeek V4 Pro — 1M-ctx text; skipped for media
|
|
91
|
+
* 3. NVIDIA Llama-3.2-90B-Vision — image requests
|
|
92
|
+
* 4. Gemini 2.5 Flash — final fallback; image + video
|
|
78
93
|
*/
|
|
79
94
|
export function createDefaultProvider(opts: DefaultProviderOptions = {}): CascadeProvider {
|
|
80
95
|
const main = opts.mainModel ?? KIMI_K2_6;
|
|
81
96
|
const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
|
|
97
|
+
const vision = opts.visionModel ?? LLAMA_VISION;
|
|
82
98
|
const gemini = opts.geminiModel ?? "gemini-2.5-flash";
|
|
99
|
+
const nv = (model: string) => new NvidiaProvider({ model, apiKey: opts.nvidiaApiKey });
|
|
83
100
|
|
|
84
101
|
return new CascadeProvider({
|
|
85
102
|
onFallback:
|
|
@@ -87,20 +104,14 @@ export function createDefaultProvider(opts: DefaultProviderOptions = {}): Cascad
|
|
|
87
104
|
((info) =>
|
|
88
105
|
console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
|
|
89
106
|
steps: [
|
|
90
|
-
{
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
vision: true, // Kimi K2.6 accepts image + video
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
|
|
97
|
-
label: `nvidia:${secondary}`,
|
|
98
|
-
vision: false, // DeepSeek V4 is text-only
|
|
99
|
-
},
|
|
107
|
+
{ provider: nv(main), label: `nvidia:${main}`, vision: false, video: false },
|
|
108
|
+
{ provider: nv(secondary), label: `nvidia:${secondary}`, vision: false, video: false },
|
|
109
|
+
{ provider: nv(vision), label: `nvidia:${vision}`, vision: true, video: false },
|
|
100
110
|
{
|
|
101
111
|
provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
|
|
102
112
|
label: `gemini:${gemini}`,
|
|
103
113
|
vision: true,
|
|
114
|
+
video: true,
|
|
104
115
|
},
|
|
105
116
|
],
|
|
106
117
|
});
|
package/src/providers/nvidia.ts
CHANGED
|
@@ -13,9 +13,10 @@ import type {
|
|
|
13
13
|
export const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
|
|
14
14
|
|
|
15
15
|
// Exact IDs confirmed against GET /v1/models.
|
|
16
|
-
export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools,
|
|
16
|
+
export const KIMI_K2_6 = "moonshotai/kimi-k2.6"; // 256K ctx, tools, agentic — text-only on NIM (verified)
|
|
17
17
|
export const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro"; // 1M ctx, tools, text-only
|
|
18
18
|
export const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash"; // faster/cheaper, text-only
|
|
19
|
+
export const LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct"; // free NVIDIA vision model (image), verified
|
|
19
20
|
|
|
20
21
|
export interface NvidiaOptions {
|
|
21
22
|
model?: string;
|
|
@@ -38,7 +39,7 @@ export class NvidiaProvider implements ModelProvider {
|
|
|
38
39
|
this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
|
|
39
40
|
this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
|
|
40
41
|
this.maxTokens = opts.maxTokens ?? 2048;
|
|
41
|
-
this.temperature = opts.temperature ?? 0.
|
|
42
|
+
this.temperature = opts.temperature ?? 0.2; // low default for deterministic agent behavior
|
|
42
43
|
}
|
|
43
44
|
|
|
44
45
|
async generate(req: ModelRequest): Promise<ModelResponse> {
|