torus-ai 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -92,9 +92,10 @@ into `query()`, `runPipeline()`, or `runLoop()` interchangeably:
92
92
  **The default is a free-first cascade.** If you don't pass a provider, `query()`
93
93
  uses `createDefaultProvider()` — it tries each step and falls through on failure:
94
94
 
95
- 1. **NVIDIA Kimi K2.6** — main; agentic + multimodal (image/video), free NIM endpoint
96
- 2. **NVIDIA DeepSeek V4 Pro** — 1M-context text model, free; *skipped for image/video*
97
- 3. **Gemini 2.5 Flash** — final fallback, different provider for resilience
95
+ 1. **NVIDIA Kimi K2.6** — main; agentic + tools (text), free NIM endpoint
96
+ 2. **NVIDIA DeepSeek V4 Pro** — 1M-context text model, free; *skipped for media*
97
+ 3. **NVIDIA Llama-3.2-90B-Vision** — image requests, free
98
+ 4. **Gemini 2.5 Flash** — final fallback (image + video), different provider for resilience
98
99
 
99
100
  ```ts
100
101
  import { query } from "torus-ai"; // NVIDIA_API_KEY in env → cascade default
@@ -104,20 +105,25 @@ import { createDefaultProvider } from "torus-ai";
104
105
  const provider = createDefaultProvider({ mainModel: "moonshotai/kimi-k2.6" });
105
106
  ```
106
107
 
107
- It's **capability-aware**: image/video requests automatically skip text-only steps.
108
+ It's **capability-aware**: image requests skip text-only steps and route to a
109
+ vision model; video requests route only to a video-capable step.
108
110
 
109
- ### Multimodal (image now, video experimental)
111
+ ### Multimodal (image verified, video experimental)
110
112
 
111
- Pass content blocks instead of a string. Images route to a vision-capable step
112
- (Kimi / Gemini / Claude); video is best-effort to Kimi.
113
+ Pass content blocks instead of a string. Images route to a vision step
114
+ (NVIDIA Llama-Vision Gemini); video routes to Gemini.
113
115
 
114
116
  ```ts
115
117
  await query([
116
- { type: "text", text: "What's in this image?" },
117
- { type: "image", url: "https://example.com/cat.png" }, // or { data, mimeType }
118
+ { type: "text", text: "What animal is this?" },
119
+ { type: "image", url: "https://example.com/cat.jpg" }, // or { data: base64, mimeType }
118
120
  ]);
119
121
  ```
120
122
 
123
+ > Note: Kimi K2.6's docs claim vision, but its NIM endpoint is **text-only in
124
+ > practice** (verified) — so the cascade sends images to a real vision model
125
+ > instead. Video is experimental and currently served only by Gemini.
126
+
121
127
  ### Cost routing (per provider)
122
128
 
123
129
  Each model provider also supports `route: true` — fast heuristics, then a
@@ -138,6 +144,52 @@ A weekly GitHub Action ([model-watch.yml](./.github/workflows/model-watch.yml))
138
144
  pulls NVIDIA's live `/v1/models`, flags new free endpoints as candidates, and opens
139
145
  a PR for human review against the policy. Run it locally with `npm run model-watch`.
140
146
 
147
+ ## Specializing for a product (packs)
148
+
149
+ Don't fork the SDK per product — load a **pack**. A pack is an adapter that turns
150
+ the generic engine into a vertical specialist (a bridal consultant, a mortgage
151
+ advisor, a support agent): persona + sales playbook + policy + domain tools +
152
+ catalog grounding + guardrails.
153
+
154
+ ```ts
155
+ import { createSpecializedAgent, createCatalogServer, createInvoiceServer,
156
+ createHandoffServer } from "torus-ai";
157
+
158
+ const agent = createSpecializedAgent({
159
+ name: "bridal",
160
+ persona: "You are a warm bridal consultant for Aurora Bridal.",
161
+ playbook: "discover needs → recommend → handle objections → close → settle → confirm",
162
+ knowledge: { catalog: dresses, faqs: "Alterations take 3 weeks. ..." }, // → search_catalog
163
+ tools: [createInvoiceServer(), createHandoffServer()],
164
+ guardrails: {
165
+ policy: "Never invent a price or availability. Max 10% discount. Escalate over $5,000.",
166
+ confirm: ["mcp__billing__create_invoice"], // money step needs a yes
167
+ },
168
+ }, {
169
+ onConfirm: async (tool, input) => askHuman(tool, input), // your confirmation UI
170
+ });
171
+
172
+ for await (const ev of agent.query("Anything under $2k for an August wedding?")) { /* ... */ }
173
+ ```
174
+
175
+ What the pack gives you, mapped to the engine:
176
+
177
+ | Pack part | Effect |
178
+ |---|---|
179
+ | `persona` + `playbook` + `policy` | assembled into the system prompt |
180
+ | `knowledge.catalog` | auto-wired `search_catalog` tool + a "never guess prices" instruction |
181
+ | `tools` | your domain actions (compose the [toolkit](./src/packkit.ts): catalog, lead memory, invoice, handoff) |
182
+ | `guardrails.allowedTools` / `confirm` / `canUseTool` | the safety gate — irreversible steps (billing) pause for confirmation |
183
+ | `model` | defaults to the free-first cascade |
184
+
185
+ Edit content as files (`persona.md`, `playbook.md`, `policy.md`, `catalog.json`,
186
+ `faqs.md`) and `loadPack("packs/bridal", { tools })` assembles the pack — so a shop
187
+ owner edits the catalog and tone while devs write the few action tools.
188
+
189
+ The reusable toolkit ([`src/packkit.ts`](./src/packkit.ts)): `createCatalogServer`,
190
+ `createLeadMemoryServer`, `createInvoiceServer` (generic settle stub),
191
+ `createHandoffServer`.
192
+
141
193
  ## The stage contract (Layer 2)
142
194
 
143
195
  Each `stages/NN_verb/CONTEXT.md` is both the agent's instructions and human docs:
package/dist/index.d.ts CHANGED
@@ -329,6 +329,7 @@ declare const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
329
329
  declare const KIMI_K2_6 = "moonshotai/kimi-k2.6";
330
330
  declare const DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
331
331
  declare const DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
332
+ declare const LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct";
332
333
  interface NvidiaOptions {
333
334
  model?: string;
334
335
  apiKey?: string;
@@ -351,6 +352,7 @@ interface CascadeStep {
351
352
  provider: ModelProvider;
352
353
  label: string;
353
354
  vision: boolean;
355
+ video?: boolean;
354
356
  }
355
357
  interface CascadeOptions {
356
358
  steps: CascadeStep[];
@@ -375,20 +377,117 @@ interface DefaultProviderOptions {
375
377
  mainModel?: string;
376
378
  /** Override the secondary NVIDIA model (default DeepSeek V4 Pro). */
377
379
  secondaryModel?: string;
380
+ /** NVIDIA vision model for image requests (default llama-3.2-90b-vision). */
381
+ visionModel?: string;
378
382
  /** Gemini model used as the final fallback option (default gemini-2.5-flash). */
379
383
  geminiModel?: string;
380
384
  onFallback?: CascadeOptions["onFallback"];
381
385
  }
382
386
  /**
383
387
  * The SDK's recommended default: free NVIDIA endpoints first, Google as one
384
- * fallback option.
388
+ * fallback option. Capability-aware — image/video requests skip the text-only
389
+ * steps automatically.
385
390
  *
386
- * 1. NVIDIA Kimi K2.6 — main; agentic + multimodal (image/video)
387
- * 2. NVIDIA DeepSeek V4 Pro — text-only; skipped for image/video requests
388
- * 3. Gemini 2.5 Flash final fallback; multimodal
391
+ * 1. NVIDIA Kimi K2.6 — main; agentic + tools (text)
392
+ * 2. NVIDIA DeepSeek V4 Pro 1M-ctx text; skipped for media
393
+ * 3. NVIDIA Llama-3.2-90B-Vision image requests
394
+ * 4. Gemini 2.5 Flash — final fallback; image + video
389
395
  */
390
396
  declare function createDefaultProvider(opts?: DefaultProviderOptions): CascadeProvider;
391
397
 
398
+ type CatalogItem = Record<string, unknown> & {
399
+ id?: string;
400
+ name?: string;
401
+ price?: number;
402
+ tags?: string[];
403
+ available?: boolean;
404
+ };
405
+ /** A `search_catalog` tool over an in-memory product list (text + price + tags). */
406
+ declare function createCatalogServer(items: CatalogItem[], opts?: {
407
+ serverName?: string;
408
+ }): SdkMcpServer;
409
+ /** `get_lead` / `update_lead` over an in-memory customer profile (the funnel state). */
410
+ declare function createLeadMemoryServer(initial?: Record<string, unknown>): SdkMcpServer & {
411
+ lead: Record<string, unknown>;
412
+ };
413
+ interface Invoice {
414
+ id: string;
415
+ amount: number;
416
+ currency: string;
417
+ items?: unknown;
418
+ customer?: unknown;
419
+ status: "pending";
420
+ }
421
+ /**
422
+ * A generic `create_invoice` settle tool: records an order + amount as pending
423
+ * and returns an invoice id. Provider-agnostic — wire your processor via
424
+ * `onCreate` (e.g. create a real payment link, then confirm via webhook).
425
+ */
426
+ declare function createInvoiceServer(opts?: {
427
+ onCreate?: (inv: Invoice) => void;
428
+ }): SdkMcpServer & {
429
+ invoices: Invoice[];
430
+ };
431
+ /** A `handoff_human` escalation tool. Wire `onHandoff` to notify a real agent. */
432
+ declare function createHandoffServer(opts?: {
433
+ onHandoff?: (info: {
434
+ reason: string;
435
+ summary: string;
436
+ }) => void;
437
+ }): SdkMcpServer;
438
+
439
+ interface PackKnowledge {
440
+ /** Product catalog — auto-wired into a `search_catalog` tool for grounding. */
441
+ catalog?: CatalogItem[];
442
+ /** Short reference text (policies, FAQs) appended to the system prompt. */
443
+ faqs?: string;
444
+ }
445
+ interface PackGuardrails {
446
+ /** Allowlist of tool names the agent may call (namespaced, wildcards ok). */
447
+ allowedTools?: string[];
448
+ /** Tools that require explicit confirmation before running (namespaced names). */
449
+ confirm?: string[];
450
+ /** Extra custom gate, evaluated after allow/confirm. */
451
+ canUseTool?: CanUseTool;
452
+ /** Rules text (discount authority, no-overpromise, escalation) added to the prompt. */
453
+ policy?: string;
454
+ }
455
+ interface AgentPack {
456
+ name: string;
457
+ persona: string;
458
+ playbook?: string;
459
+ tools?: SdkMcpServer[];
460
+ knowledge?: PackKnowledge;
461
+ guardrails?: PackGuardrails;
462
+ model?: ModelProvider;
463
+ }
464
+ interface SpecializeOptions {
465
+ provider?: ModelProvider;
466
+ /** Called when a `confirm` tool wants to run; return true to allow. */
467
+ onConfirm?: (toolName: string, input: Record<string, unknown>) => boolean | Promise<boolean>;
468
+ /** Allow built-in file tools (read/write/list). Off by default for packs. */
469
+ includeBuiltins?: boolean;
470
+ maxTurns?: number;
471
+ }
472
+ interface SpecializedAgent {
473
+ pack: AgentPack;
474
+ system: string;
475
+ servers: SdkMcpServer[];
476
+ query(prompt: string | ContentBlock[], extra?: {
477
+ maxTurns?: number;
478
+ }): AsyncGenerator<AgentEvent>;
479
+ }
480
+ /** Build a ready-to-run specialized agent from a pack. */
481
+ declare function createSpecializedAgent(pack: AgentPack, opts?: SpecializeOptions): SpecializedAgent;
482
+ /**
483
+ * Load a pack's content from a folder (so non-devs can edit it):
484
+ * persona.md · playbook.md · policy.md · catalog.json · faqs.md
485
+ * Code tools (quote/reserve/invoice/...) are passed via `opts.tools`.
486
+ */
487
+ declare function loadPack(dir: string, opts?: {
488
+ tools?: SdkMcpServer[];
489
+ }): Promise<AgentPack>;
490
+
392
491
  declare const CHEAP_MODEL = "claude-haiku-4-5";
393
492
  declare const EXPENSIVE_MODEL = "claude-sonnet-4-6";
394
493
  declare const GEMINI_CHEAP_MODEL = "gemini-2.5-flash-lite";
@@ -451,4 +550,4 @@ interface QueryOptions {
451
550
  */
452
551
  declare function query(prompt: string | ContentBlock[], options?: QueryOptions): AsyncGenerator<AgentEvent>;
453
552
 
454
- export { type AgentEvent, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type CascadeOptions, CascadeProvider, type CascadeStep, type Complexity, type ContentBlock, DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, type DefaultProviderOptions, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type JSONSchema, KIMI_K2_6, type LoadedContext, type LoopOptions, type LoopResult, type MediaBlock, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, NVIDIA_BASE_URL, type NvidiaOptions, NvidiaProvider, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createDefaultProvider, createSdkMcpServer, fastHeuristic, getRoutingStats, hasMedia, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
553
+ export { type AgentEvent, type AgentPack, type AnthropicOptions, AnthropicProvider, CHEAP_MODEL, type CanUseTool, type CascadeOptions, CascadeProvider, type CascadeStep, type CatalogItem, type Complexity, type ContentBlock, DEEPSEEK_V4_FLASH, DEEPSEEK_V4_PRO, type DefaultProviderOptions, EXPENSIVE_MODEL, GEMINI_CHEAP_MODEL, GEMINI_EXPENSIVE_MODEL, type GeminiOptions, GeminiProvider, type Invoice, type JSONSchema, KIMI_K2_6, LLAMA_VISION, type LoadedContext, type LoopOptions, type LoopResult, type MediaBlock, type Message, type MockOptions, MockProvider, type ModelProvider, type ModelRequest, type ModelResponse, NVIDIA_BASE_URL, type NvidiaOptions, NvidiaProvider, type PackGuardrails, type PackKnowledge, type PermissionConfig, type PermissionDecision, PermissionEngine, type PipelineOptions, type QueryOptions, type RegisteredTool, type Role, type RouterOptions, type RoutingStats, type SdkMcpServer, type SpecializeOptions, type SpecializedAgent, type StageContract, type StageInput, type StopReason, type TextBlock, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResultBlock, type ToolResultPayload, type ToolSchema, type ToolUseBlock, builtinTools, classifyComplexity, classifyComplexityGemini, createCatalogServer, createDefaultProvider, createHandoffServer, createInvoiceServer, createLeadMemoryServer, createSdkMcpServer, createSpecializedAgent, fastHeuristic, getRoutingStats, hasMedia, judgeComplexity, judgeComplexityGemini, latestUserText, listDirTool, loadPack, loadStageContext, loadStages, matchesAllow, parseContract, query, readFileTool, runLoop, runPipeline, selectGeminiModel, selectModel, tool, writeFileTool };
package/dist/index.js CHANGED
@@ -705,6 +705,7 @@ var NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
705
705
  var KIMI_K2_6 = "moonshotai/kimi-k2.6";
706
706
  var DEEPSEEK_V4_PRO = "deepseek-ai/deepseek-v4-pro";
707
707
  var DEEPSEEK_V4_FLASH = "deepseek-ai/deepseek-v4-flash";
708
+ var LLAMA_VISION = "meta/llama-3.2-90b-vision-instruct";
708
709
  var NvidiaProvider = class {
709
710
  name = "nvidia";
710
711
  model;
@@ -717,7 +718,7 @@ var NvidiaProvider = class {
717
718
  this.apiKey = opts.apiKey ?? process.env.NVIDIA_API_KEY;
718
719
  this.baseURL = opts.baseURL ?? NVIDIA_BASE_URL;
719
720
  this.maxTokens = opts.maxTokens ?? 2048;
720
- this.temperature = opts.temperature ?? 0.6;
721
+ this.temperature = opts.temperature ?? 0.2;
721
722
  }
722
723
  async generate(req) {
723
724
  if (!this.apiKey) throw new Error("NvidiaProvider needs NVIDIA_API_KEY (nvapi-...).");
@@ -830,10 +831,15 @@ var CascadeProvider = class {
830
831
  this.onFallback = opts.onFallback;
831
832
  }
832
833
  async generate(req) {
833
- const needsVision = hasMedia(req.messages);
834
- const eligible = this.steps.filter((s) => !needsVision || s.vision);
834
+ const has = (t) => req.messages.some((m) => m.content.some((b) => b.type === t));
835
+ const needsVideo = has("video");
836
+ const needsImage = has("image");
837
+ const needsVision = needsImage || needsVideo;
838
+ const eligible = needsVideo ? this.steps.filter((s) => s.video) : needsImage ? this.steps.filter((s) => s.vision) : this.steps;
835
839
  if (!eligible.length) {
836
- throw new Error("Cascade: request needs vision but no step supports image/video input.");
840
+ throw new Error(
841
+ `Cascade: request needs ${needsVideo ? "video" : "image"} input but no step supports it.`
842
+ );
837
843
  }
838
844
  let lastErr;
839
845
  for (const step of eligible) {
@@ -854,31 +860,210 @@ var CascadeProvider = class {
854
860
  function createDefaultProvider(opts = {}) {
855
861
  const main = opts.mainModel ?? KIMI_K2_6;
856
862
  const secondary = opts.secondaryModel ?? DEEPSEEK_V4_PRO;
863
+ const vision = opts.visionModel ?? LLAMA_VISION;
857
864
  const gemini = opts.geminiModel ?? "gemini-2.5-flash";
865
+ const nv = (model) => new NvidiaProvider({ model, apiKey: opts.nvidiaApiKey });
858
866
  return new CascadeProvider({
859
867
  onFallback: opts.onFallback ?? ((info) => console.warn(`[cascade] ${info.from} failed (${info.reason}); trying next`)),
860
868
  steps: [
861
- {
862
- provider: new NvidiaProvider({ model: main, apiKey: opts.nvidiaApiKey }),
863
- label: `nvidia:${main}`,
864
- vision: true
865
- // Kimi K2.6 accepts image + video
866
- },
867
- {
868
- provider: new NvidiaProvider({ model: secondary, apiKey: opts.nvidiaApiKey }),
869
- label: `nvidia:${secondary}`,
870
- vision: false
871
- // DeepSeek V4 is text-only
872
- },
869
+ { provider: nv(main), label: `nvidia:${main}`, vision: false, video: false },
870
+ { provider: nv(secondary), label: `nvidia:${secondary}`, vision: false, video: false },
871
+ { provider: nv(vision), label: `nvidia:${vision}`, vision: true, video: false },
873
872
  {
874
873
  provider: new GeminiProvider({ model: gemini, apiKey: opts.googleApiKey }),
875
874
  label: `gemini:${gemini}`,
876
- vision: true
875
+ vision: true,
876
+ video: true
877
877
  }
878
878
  ]
879
879
  });
880
880
  }
881
881
 
882
+ // src/pack.ts
883
+ import { existsSync as existsSync2 } from "fs";
884
+ import { readFile as readFile4 } from "fs/promises";
885
+ import { join as join4 } from "path";
886
+
887
+ // src/packkit.ts
888
+ function createCatalogServer(items, opts = {}) {
889
+ const search = tool(
890
+ "search_catalog",
891
+ "Search the product catalog by text, max price, and tags. Returns matching items with prices and availability. Use this for every product/price/availability question \u2014 never guess.",
892
+ {
893
+ type: "object",
894
+ properties: {
895
+ query: { type: "string" },
896
+ maxPrice: { type: "number" },
897
+ tags: { type: "array", items: { type: "string" } },
898
+ limit: { type: "number" }
899
+ }
900
+ },
901
+ (input) => {
902
+ let res = items.filter((it) => it.available !== false);
903
+ if (input.query) {
904
+ const words = input.query.toLowerCase().split(/\s+/).filter(Boolean);
905
+ res = res.filter((it) => {
906
+ const hay = JSON.stringify(it).toLowerCase();
907
+ return words.every((w) => hay.includes(w));
908
+ });
909
+ }
910
+ if (typeof input.maxPrice === "number") {
911
+ res = res.filter((it) => typeof it.price !== "number" || it.price <= input.maxPrice);
912
+ }
913
+ if (Array.isArray(input.tags) && input.tags.length) {
914
+ res = res.filter((it) => Array.isArray(it.tags) && input.tags.some((t) => it.tags.includes(t)));
915
+ }
916
+ const out = res.slice(0, input.limit ?? 5);
917
+ return { content: out.length ? JSON.stringify(out, null, 2) : "No matching items." };
918
+ }
919
+ );
920
+ return createSdkMcpServer({ name: opts.serverName ?? "catalog", tools: [search] });
921
+ }
922
+ function createLeadMemoryServer(initial = {}) {
923
+ const lead = { ...initial };
924
+ const get = tool(
925
+ "get_lead",
926
+ "Get what we know about the current customer (name, date, budget, stage, items seen).",
927
+ { type: "object", properties: {} },
928
+ () => ({ content: JSON.stringify(lead, null, 2) })
929
+ );
930
+ const update = tool(
931
+ "update_lead",
932
+ "Merge fields into the customer profile, e.g. { budget: 2000, stage: 'recommend' }.",
933
+ { type: "object", properties: { fields: { type: "object" } }, required: ["fields"] },
934
+ (input) => {
935
+ Object.assign(lead, input.fields ?? {});
936
+ return { content: `updated: ${Object.keys(input.fields ?? {}).join(", ") || "(none)"}` };
937
+ }
938
+ );
939
+ return Object.assign(createSdkMcpServer({ name: "lead", tools: [get, update] }), { lead });
940
+ }
941
+ function createInvoiceServer(opts = {}) {
942
+ const invoices = [];
943
+ let n = 0;
944
+ const create = tool(
945
+ "create_invoice",
946
+ "Record an order and amount as a pending invoice to settle, returning an invoice id. Call this only after the customer has agreed to buy.",
947
+ {
948
+ type: "object",
949
+ properties: {
950
+ amount: { type: "number" },
951
+ currency: { type: "string" },
952
+ items: {},
953
+ customer: {}
954
+ },
955
+ required: ["amount"]
956
+ },
957
+ (input) => {
958
+ const inv = {
959
+ id: `inv_${++n}`,
960
+ amount: input.amount,
961
+ currency: input.currency ?? "USD",
962
+ items: input.items,
963
+ customer: input.customer,
964
+ status: "pending"
965
+ };
966
+ invoices.push(inv);
967
+ opts.onCreate?.(inv);
968
+ return {
969
+ content: JSON.stringify({ invoiceId: inv.id, status: inv.status, amount: inv.amount, currency: inv.currency })
970
+ };
971
+ }
972
+ );
973
+ return Object.assign(createSdkMcpServer({ name: "billing", tools: [create] }), { invoices });
974
+ }
975
+ function createHandoffServer(opts = {}) {
976
+ const handoff = tool(
977
+ "handoff_human",
978
+ "Escalate to a human agent with a reason and a short summary of the conversation so far. Use when you're stuck, the request is high-value, or the customer asks for a person.",
979
+ {
980
+ type: "object",
981
+ properties: { reason: { type: "string" }, summary: { type: "string" } },
982
+ required: ["reason"]
983
+ },
984
+ (input) => {
985
+ opts.onHandoff?.({ reason: input.reason, summary: input.summary ?? "" });
986
+ return { content: "Escalated to a human; they will take over shortly." };
987
+ }
988
+ );
989
+ return createSdkMcpServer({ name: "support", tools: [handoff] });
990
+ }
991
+
992
+ // src/pack.ts
993
+ function createSpecializedAgent(pack, opts = {}) {
994
+ const servers = [...pack.tools ?? []];
995
+ if (pack.knowledge?.catalog?.length) servers.unshift(createCatalogServer(pack.knowledge.catalog));
996
+ const parts = [pack.persona.trim()];
997
+ if (pack.playbook) parts.push(`## Playbook
998
+ ${pack.playbook.trim()}`);
999
+ if (pack.guardrails?.policy) parts.push(`## Policy
1000
+ ${pack.guardrails.policy.trim()}`);
1001
+ if (servers.some((s) => s.tools.some((t) => t.name === "search_catalog"))) {
1002
+ parts.push(
1003
+ "Use the `search_catalog` tool for every product, price, or availability question. Never invent a price or claim availability you did not look up."
1004
+ );
1005
+ }
1006
+ if (pack.knowledge?.faqs) parts.push(`## Reference
1007
+ ${pack.knowledge.faqs.trim()}`);
1008
+ const system = parts.join("\n\n");
1009
+ const confirmTools = pack.guardrails?.confirm ?? [];
1010
+ const allow = pack.guardrails?.allowedTools;
1011
+ const base = pack.guardrails?.canUseTool;
1012
+ const canUseTool = async (name, input) => {
1013
+ if (confirmTools.includes(name)) {
1014
+ const ok = opts.onConfirm ? await opts.onConfirm(name, input) : false;
1015
+ if (!ok) return { behavior: "deny", message: `${name} requires confirmation and it was not granted.` };
1016
+ }
1017
+ if (allow && !matchesAllow(name, allow)) {
1018
+ return { behavior: "deny", message: `${name} is not allowed by this pack's guardrails.` };
1019
+ }
1020
+ return base ? base(name, input) : { behavior: "allow" };
1021
+ };
1022
+ const provider = opts.provider ?? pack.model ?? createDefaultProvider();
1023
+ const includeBuiltins = opts.includeBuiltins ?? false;
1024
+ return {
1025
+ pack,
1026
+ system,
1027
+ servers,
1028
+ async *query(prompt, extra) {
1029
+ const registry = new ToolRegistry();
1030
+ if (includeBuiltins) registry.addBuiltins(builtinTools);
1031
+ for (const s of servers) registry.addServer(s);
1032
+ const content = typeof prompt === "string" ? [{ type: "text", text: prompt }] : prompt;
1033
+ const messages = [{ role: "user", content }];
1034
+ const result = yield* runLoop({
1035
+ provider,
1036
+ registry,
1037
+ permissions: new PermissionEngine({ canUseTool }),
1038
+ system,
1039
+ messages,
1040
+ toolContext: { workspaceDir: process.cwd() },
1041
+ maxTurns: extra?.maxTurns ?? opts.maxTurns
1042
+ });
1043
+ yield { type: "result", finalText: result.finalText, turns: result.turns };
1044
+ }
1045
+ };
1046
+ }
1047
+ async function loadPack(dir, opts = {}) {
1048
+ const read = async (f) => {
1049
+ const p = join4(dir, f);
1050
+ return existsSync2(p) ? await readFile4(p, "utf8") : void 0;
1051
+ };
1052
+ const catalogRaw = await read("catalog.json");
1053
+ const policy = await read("policy.md");
1054
+ return {
1055
+ name: dir.split(/[\\/]/).filter(Boolean).pop() ?? "pack",
1056
+ persona: await read("persona.md") ?? "",
1057
+ playbook: await read("playbook.md"),
1058
+ knowledge: {
1059
+ catalog: catalogRaw ? JSON.parse(catalogRaw) : void 0,
1060
+ faqs: await read("faqs.md")
1061
+ },
1062
+ guardrails: policy ? { policy } : void 0,
1063
+ tools: opts.tools
1064
+ };
1065
+ }
1066
+
882
1067
  // src/index.ts
883
1068
  async function* query(prompt, options = {}) {
884
1069
  const registry = new ToolRegistry();
@@ -908,6 +1093,7 @@ export {
908
1093
  GEMINI_EXPENSIVE_MODEL,
909
1094
  GeminiProvider,
910
1095
  KIMI_K2_6,
1096
+ LLAMA_VISION,
911
1097
  MockProvider,
912
1098
  NVIDIA_BASE_URL,
913
1099
  NvidiaProvider,
@@ -916,8 +1102,13 @@ export {
916
1102
  builtinTools,
917
1103
  classifyComplexity,
918
1104
  classifyComplexityGemini,
1105
+ createCatalogServer,
919
1106
  createDefaultProvider,
1107
+ createHandoffServer,
1108
+ createInvoiceServer,
1109
+ createLeadMemoryServer,
920
1110
  createSdkMcpServer,
1111
+ createSpecializedAgent,
921
1112
  fastHeuristic,
922
1113
  getRoutingStats,
923
1114
  hasMedia,
@@ -925,6 +1116,7 @@ export {
925
1116
  judgeComplexityGemini,
926
1117
  latestUserText,
927
1118
  listDirTool,
1119
+ loadPack,
928
1120
  loadStageContext,
929
1121
  loadStages,
930
1122
  matchesAllow,