torus-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +47 -0
- package/CONTEXT.md +26 -0
- package/LICENSE +21 -0
- package/README.md +143 -0
- package/dist/index.d.ts +373 -0
- package/dist/index.js +733 -0
- package/dist/index.js.map +1 -0
- package/package.json +39 -0
- package/src/builtins.ts +55 -0
- package/src/context.ts +61 -0
- package/src/index.ts +77 -0
- package/src/loop.ts +106 -0
- package/src/permissions.ts +51 -0
- package/src/pipeline.ts +92 -0
- package/src/providers/anthropic.ts +94 -0
- package/src/providers/gemini.ts +120 -0
- package/src/providers/mock.ts +93 -0
- package/src/router.ts +260 -0
- package/src/subagents.ts +92 -0
- package/src/tools.ts +84 -0
- package/src/types.ts +111 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { latestUserText, selectModel } from "../router.ts";
|
|
2
|
+
import type {
|
|
3
|
+
ContentBlock,
|
|
4
|
+
Message,
|
|
5
|
+
ModelProvider,
|
|
6
|
+
ModelRequest,
|
|
7
|
+
ModelResponse,
|
|
8
|
+
} from "../types.ts";
|
|
9
|
+
|
|
10
|
+
export interface AnthropicOptions {
|
|
11
|
+
model?: string;
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
maxTokens?: number;
|
|
14
|
+
/**
|
|
15
|
+
* When true, the model is chosen per-request by the cost router (cheap vs
|
|
16
|
+
* expensive) based on query complexity, instead of using a fixed `model`.
|
|
17
|
+
*/
|
|
18
|
+
route?: boolean;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Real provider backed by the Anthropic Messages API. Requires the optional
|
|
23
|
+
* `@anthropic-ai/sdk` dependency and an ANTHROPIC_API_KEY. The SDK is imported
|
|
24
|
+
* lazily so the package (and the mock demo) work without it installed.
|
|
25
|
+
*/
|
|
26
|
+
export class AnthropicProvider implements ModelProvider {
|
|
27
|
+
readonly name = "anthropic";
|
|
28
|
+
private client: any;
|
|
29
|
+
private model: string;
|
|
30
|
+
private maxTokens: number;
|
|
31
|
+
private apiKey?: string;
|
|
32
|
+
private route: boolean;
|
|
33
|
+
|
|
34
|
+
constructor(opts: AnthropicOptions = {}) {
|
|
35
|
+
this.model = opts.model ?? "claude-sonnet-4-6";
|
|
36
|
+
this.maxTokens = opts.maxTokens ?? 2048;
|
|
37
|
+
this.apiKey = opts.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
|
38
|
+
this.route = opts.route ?? false;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
private async ensureClient(): Promise<void> {
|
|
42
|
+
if (this.client) return;
|
|
43
|
+
const mod = await import("@anthropic-ai/sdk").catch(() => {
|
|
44
|
+
throw new Error(
|
|
45
|
+
"AnthropicProvider needs the @anthropic-ai/sdk package: run `npm i @anthropic-ai/sdk`.",
|
|
46
|
+
);
|
|
47
|
+
});
|
|
48
|
+
const Anthropic = (mod as any).default ?? (mod as any).Anthropic;
|
|
49
|
+
this.client = new Anthropic({ apiKey: this.apiKey });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
53
|
+
await this.ensureClient();
|
|
54
|
+
|
|
55
|
+
// Cost routing: pick cheap vs expensive per request from the latest user
|
|
56
|
+
// turn. selectModel never throws — it falls back to the expensive model.
|
|
57
|
+
const model = this.route
|
|
58
|
+
? await selectModel(latestUserText(req.messages), {
|
|
59
|
+
client: this.client,
|
|
60
|
+
apiKey: this.apiKey,
|
|
61
|
+
})
|
|
62
|
+
: this.model;
|
|
63
|
+
|
|
64
|
+
const res = await this.client.messages.create({
|
|
65
|
+
model,
|
|
66
|
+
max_tokens: this.maxTokens,
|
|
67
|
+
system: req.system,
|
|
68
|
+
tools: req.tools.map((t) => ({
|
|
69
|
+
name: t.name,
|
|
70
|
+
description: t.description,
|
|
71
|
+
input_schema: t.inputSchema,
|
|
72
|
+
})),
|
|
73
|
+
messages: req.messages.map(toApiMessage),
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
const content: ContentBlock[] = res.content.map((b: any): ContentBlock => {
|
|
77
|
+
if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
|
|
78
|
+
return { type: "text", text: b.type === "text" ? b.text : "" };
|
|
79
|
+
});
|
|
80
|
+
const stopReason = res.stop_reason === "tool_use" ? "tool_use" : "end_turn";
|
|
81
|
+
return { content, stopReason };
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function toApiMessage(m: Message): any {
|
|
86
|
+
return {
|
|
87
|
+
role: m.role,
|
|
88
|
+
content: m.content.map((b) => {
|
|
89
|
+
if (b.type === "text") return { type: "text", text: b.text };
|
|
90
|
+
if (b.type === "tool_use") return { type: "tool_use", id: b.id, name: b.name, input: b.input };
|
|
91
|
+
return { type: "tool_result", tool_use_id: b.toolUseId, content: b.content, is_error: b.isError };
|
|
92
|
+
}),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import { latestUserText, selectGeminiModel } from "../router.ts";
|
|
2
|
+
import type {
|
|
3
|
+
ContentBlock,
|
|
4
|
+
Message,
|
|
5
|
+
ModelProvider,
|
|
6
|
+
ModelRequest,
|
|
7
|
+
ModelResponse,
|
|
8
|
+
} from "../types.ts";
|
|
9
|
+
|
|
10
|
+
export interface GeminiOptions {
|
|
11
|
+
model?: string;
|
|
12
|
+
apiKey?: string;
|
|
13
|
+
/**
|
|
14
|
+
* When true, the model is chosen per-request by the cost router (cheap vs
|
|
15
|
+
* expensive Gemini) based on query complexity, instead of a fixed `model`.
|
|
16
|
+
*/
|
|
17
|
+
route?: boolean;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Provider backed by the Google Gemini API (@google/genai). Requires the
|
|
22
|
+
* optional `@google/genai` dependency and a GOOGLE_API_KEY (or GEMINI_API_KEY).
|
|
23
|
+
* The SDK is imported lazily so the package works without it installed.
|
|
24
|
+
*/
|
|
25
|
+
export class GeminiProvider implements ModelProvider {
|
|
26
|
+
readonly name = "gemini";
|
|
27
|
+
private client: any;
|
|
28
|
+
private model: string;
|
|
29
|
+
private apiKey?: string;
|
|
30
|
+
private route: boolean;
|
|
31
|
+
|
|
32
|
+
constructor(opts: GeminiOptions = {}) {
|
|
33
|
+
this.model = opts.model ?? "gemini-2.5-flash";
|
|
34
|
+
this.apiKey = opts.apiKey ?? process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY;
|
|
35
|
+
this.route = opts.route ?? false;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
private async ensureClient(): Promise<void> {
|
|
39
|
+
if (this.client) return;
|
|
40
|
+
const mod = await import("@google/genai").catch(() => {
|
|
41
|
+
throw new Error("GeminiProvider needs the @google/genai package: run `npm i @google/genai`.");
|
|
42
|
+
});
|
|
43
|
+
const GoogleGenAI = (mod as any).GoogleGenAI;
|
|
44
|
+
this.client = new GoogleGenAI({ apiKey: this.apiKey });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
48
|
+
await this.ensureClient();
|
|
49
|
+
|
|
50
|
+
const model = this.route
|
|
51
|
+
? await selectGeminiModel(latestUserText(req.messages), {
|
|
52
|
+
client: this.client,
|
|
53
|
+
apiKey: this.apiKey,
|
|
54
|
+
})
|
|
55
|
+
: this.model;
|
|
56
|
+
|
|
57
|
+
const idToName = toolUseNames(req.messages);
|
|
58
|
+
|
|
59
|
+
const config: any = {};
|
|
60
|
+
if (req.system) config.systemInstruction = req.system;
|
|
61
|
+
if (req.tools.length) {
|
|
62
|
+
config.tools = [
|
|
63
|
+
{
|
|
64
|
+
functionDeclarations: req.tools.map((t) => ({
|
|
65
|
+
name: t.name,
|
|
66
|
+
description: t.description,
|
|
67
|
+
parameters: t.inputSchema,
|
|
68
|
+
})),
|
|
69
|
+
},
|
|
70
|
+
];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const res = await this.client.models.generateContent({
|
|
74
|
+
model,
|
|
75
|
+
contents: req.messages.map((m) => toGeminiContent(m, idToName)),
|
|
76
|
+
config,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
const content: ContentBlock[] = [];
|
|
80
|
+
const text: string | undefined = res.text;
|
|
81
|
+
if (text && text.trim()) content.push({ type: "text", text });
|
|
82
|
+
|
|
83
|
+
const calls: any[] = res.functionCalls ?? [];
|
|
84
|
+
for (const fc of calls) {
|
|
85
|
+
content.push({ type: "tool_use", id: fc.id ?? "", name: fc.name, input: fc.args ?? {} });
|
|
86
|
+
}
|
|
87
|
+
if (content.length === 0) content.push({ type: "text", text: "" });
|
|
88
|
+
|
|
89
|
+
return { content, stopReason: calls.length ? "tool_use" : "end_turn" };
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/** Map tool_use id -> tool name (Gemini matches function responses by name). */
|
|
94
|
+
function toolUseNames(messages: Message[]): Map<string, string> {
|
|
95
|
+
const map = new Map<string, string>();
|
|
96
|
+
for (const m of messages) {
|
|
97
|
+
for (const b of m.content) {
|
|
98
|
+
if (b.type === "tool_use") map.set(b.id, b.name);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return map;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Translate one of our Messages into a Gemini `Content` (role + parts). */
|
|
105
|
+
function toGeminiContent(m: Message, idToName: Map<string, string>): any {
|
|
106
|
+
const role = m.role === "assistant" ? "model" : "user";
|
|
107
|
+
const parts = m.content.map((b) => {
|
|
108
|
+
if (b.type === "text") return { text: b.text };
|
|
109
|
+
if (b.type === "tool_use") return { functionCall: { id: b.id, name: b.name, args: b.input } };
|
|
110
|
+
// tool_result -> functionResponse
|
|
111
|
+
return {
|
|
112
|
+
functionResponse: {
|
|
113
|
+
id: b.toolUseId,
|
|
114
|
+
name: idToName.get(b.toolUseId) ?? b.toolUseId,
|
|
115
|
+
response: b.isError ? { error: b.content } : { result: b.content },
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
});
|
|
119
|
+
return { role, parts };
|
|
120
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
ModelProvider,
|
|
3
|
+
ModelRequest,
|
|
4
|
+
ModelResponse,
|
|
5
|
+
ToolSchema,
|
|
6
|
+
} from "../types.ts";
|
|
7
|
+
|
|
8
|
+
export interface MockOptions {
|
|
9
|
+
/** Label stamped into outputs so mock-generated content is unmistakable. */
|
|
10
|
+
label?: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* A deterministic, offline provider that exercises the full agent loop with no API
|
|
15
|
+
* key. Strategy: if tools are offered and none have been used yet, call the first
|
|
16
|
+
* tool once; otherwise synthesize a final answer from the system context + any tool
|
|
17
|
+
* results. It is intentionally dumb — its job is to prove the harness wiring, not to
|
|
18
|
+
* write good prose. Swap in AnthropicProvider for real output.
|
|
19
|
+
*/
|
|
20
|
+
export class MockProvider implements ModelProvider {
|
|
21
|
+
readonly name = "mock";
|
|
22
|
+
private opts: MockOptions;
|
|
23
|
+
constructor(opts: MockOptions = {}) {
|
|
24
|
+
this.opts = opts;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
async generate(req: ModelRequest): Promise<ModelResponse> {
|
|
28
|
+
const alreadyUsedTool = req.messages.some((m) =>
|
|
29
|
+
m.content.some((b) => b.type === "tool_use"),
|
|
30
|
+
);
|
|
31
|
+
if (req.tools.length > 0 && !alreadyUsedTool) {
|
|
32
|
+
const t = req.tools[0];
|
|
33
|
+
return {
|
|
34
|
+
stopReason: "tool_use",
|
|
35
|
+
content: [{ type: "tool_use", id: "", name: t.name, input: this.sampleInput(t) }],
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
return { stopReason: "end_turn", content: [{ type: "text", text: this.synthesize(req) }] };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
private sampleInput(t: ToolSchema): Record<string, unknown> {
|
|
42
|
+
const props = (t.inputSchema.properties ?? {}) as Record<string, { type?: string }>;
|
|
43
|
+
const topic = "the requested topic";
|
|
44
|
+
const out: Record<string, unknown> = {};
|
|
45
|
+
for (const [k, v] of Object.entries(props)) {
|
|
46
|
+
out[k] =
|
|
47
|
+
v.type === "number"
|
|
48
|
+
? 3
|
|
49
|
+
: v.type === "boolean"
|
|
50
|
+
? true
|
|
51
|
+
: k === "path"
|
|
52
|
+
? "shared/notes.md"
|
|
53
|
+
: topic;
|
|
54
|
+
}
|
|
55
|
+
return out;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
private synthesize(req: ModelRequest): string {
|
|
59
|
+
const toolData = req.messages
|
|
60
|
+
.flatMap((m) => m.content)
|
|
61
|
+
.filter((b) => b.type === "tool_result")
|
|
62
|
+
.map((b) => (b as { content: string }).content)
|
|
63
|
+
.join("\n");
|
|
64
|
+
|
|
65
|
+
const contract = extractLayer(req.system, "2 contract");
|
|
66
|
+
const label = this.opts.label ? ` ${this.opts.label}` : "";
|
|
67
|
+
|
|
68
|
+
const lines = [
|
|
69
|
+
`<!-- mock model output${label} -->`,
|
|
70
|
+
"",
|
|
71
|
+
"## Result",
|
|
72
|
+
"",
|
|
73
|
+
"Produced by the Torus MockProvider — proof that layered context →",
|
|
74
|
+
"agent loop → output handoff works end to end. Replace with AnthropicProvider",
|
|
75
|
+
"for real generation.",
|
|
76
|
+
];
|
|
77
|
+
if (toolData) {
|
|
78
|
+
lines.push("", "### Tool-sourced material", "", "```", toolData.slice(0, 600), "```");
|
|
79
|
+
}
|
|
80
|
+
if (contract) {
|
|
81
|
+
lines.push("", "### Stage focus (read from the Layer 2 contract)", "", firstLines(contract, 6));
|
|
82
|
+
}
|
|
83
|
+
return lines.join("\n");
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function extractLayer(system: string, layer: string): string {
|
|
88
|
+
const m = system.match(new RegExp(`<context layer="${layer}"[^>]*>([\\s\\S]*?)</context>`));
|
|
89
|
+
return m ? m[1].trim() : "";
|
|
90
|
+
}
|
|
91
|
+
function firstLines(s: string, n: number): string {
|
|
92
|
+
return s.split("\n").slice(0, n).join("\n");
|
|
93
|
+
}
|
package/src/router.ts
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
2
|
+
// Intelligent LLM router — picks CHEAP vs EXPENSIVE per query to cut API cost.
|
|
3
|
+
//
|
|
4
|
+
// Hybrid strategy (provider-agnostic):
|
|
5
|
+
// 1. Fast heuristics (no API call) for the obvious cases.
|
|
6
|
+
// 2. Otherwise a structured "judge" call to the CHEAP model that classifies
|
|
7
|
+
// the query as SIMPLE | COMPLEX.
|
|
8
|
+
// SIMPLE → cheap model, COMPLEX → expensive model.
|
|
9
|
+
//
|
|
10
|
+
// The same mechanism is provided for two provider families:
|
|
11
|
+
// - Anthropic: selectModel() (Claude Haiku judge → Haiku / Sonnet)
|
|
12
|
+
// - Gemini: selectGeminiModel() (Gemini Flash-Lite judge → Flash-Lite / Pro)
|
|
13
|
+
//
|
|
14
|
+
// Safety: the select* functions never throw — on any failure they default to
|
|
15
|
+
// the EXPENSIVE model so the user experience never breaks.
|
|
16
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
import type { Message } from "./types.ts";
|
|
19
|
+
|
|
20
|
+
// ── Target models ──────────────────────────────────────────────────────────
|
|
21
|
+
// Change these in one place per provider.
|
|
22
|
+
export const CHEAP_MODEL = "claude-haiku-4-5"; // $1 / $5 per MTok
|
|
23
|
+
export const EXPENSIVE_MODEL = "claude-sonnet-4-6"; // current default — $3 / $15 per MTok
|
|
24
|
+
|
|
25
|
+
// Gemini defaults to the stable 2.5 family. Swap to newer IDs (e.g.
|
|
26
|
+
// "gemini-3.1-flash-lite" / "gemini-3.1-pro-preview") if your key has access.
|
|
27
|
+
export const GEMINI_CHEAP_MODEL = "gemini-2.5-flash-lite";
|
|
28
|
+
export const GEMINI_EXPENSIVE_MODEL = "gemini-2.5-pro";
|
|
29
|
+
|
|
30
|
+
export type Complexity = "SIMPLE" | "COMPLEX";
|
|
31
|
+
|
|
32
|
+
export interface RouterOptions {
|
|
33
|
+
/** Reuse an existing provider SDK client (avoids a second client init). */
|
|
34
|
+
client?: any;
|
|
35
|
+
/** API key for a lazily-created client (defaults to the provider's env var). */
|
|
36
|
+
apiKey?: string;
|
|
37
|
+
/** Model used as the complexity judge. Defaults to the provider's cheap model. */
|
|
38
|
+
judgeModel?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// ── 1. Fast heuristics (no API call, provider-agnostic) ─────────────────────
|
|
42
|
+
|
|
43
|
+
const SIMPLE_KEYWORDS = [
|
|
44
|
+
"hello", "hi ", "hey", "thanks", "thank you", "yes", "no",
|
|
45
|
+
"format", "json", "yaml", "uppercase", "lowercase", "capitalize",
|
|
46
|
+
"translate", "spell", "reverse", "echo", "greeting",
|
|
47
|
+
];
|
|
48
|
+
|
|
49
|
+
const estimateTokens = (s: string) => Math.ceil(s.length / 4); // ~4 chars/token
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Cheap, deterministic pre-classification. Returns a verdict only when it's
|
|
53
|
+
* confident; otherwise null (defer to the judge).
|
|
54
|
+
*/
|
|
55
|
+
export function fastHeuristic(prompt: string): Complexity | null {
|
|
56
|
+
const tokens = estimateTokens(prompt);
|
|
57
|
+
const lower = prompt.toLowerCase();
|
|
58
|
+
|
|
59
|
+
// Short prompt that mentions a trivial operation → SIMPLE, route immediately.
|
|
60
|
+
if (tokens <= 30 && SIMPLE_KEYWORDS.some((k) => lower.includes(k))) return "SIMPLE";
|
|
61
|
+
|
|
62
|
+
// Very long prompts are almost always real work → skip the judge call.
|
|
63
|
+
if (tokens >= 400) return "COMPLEX";
|
|
64
|
+
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// ── 2. LLM judges (structured output on each provider's cheap model) ─────────
|
|
69
|
+
|
|
70
|
+
const JUDGE_SYSTEM =
|
|
71
|
+
"You are a routing classifier. Decide whether a user query needs a powerful " +
|
|
72
|
+
"model or can be handled by a small, fast one. Classify as SIMPLE (greetings, " +
|
|
73
|
+
"formatting, short factual lookups, simple rewrites, single-step tasks) or " +
|
|
74
|
+
"COMPLEX (multi-step reasoning, coding, analysis, planning, nuanced judgment). " +
|
|
75
|
+
'Respond ONLY with the required JSON: {"complexity": "SIMPLE" | "COMPLEX"}.';
|
|
76
|
+
|
|
77
|
+
const COMPLEXITY_SCHEMA = {
|
|
78
|
+
type: "object",
|
|
79
|
+
properties: { complexity: { type: "string", enum: ["SIMPLE", "COMPLEX"] } },
|
|
80
|
+
required: ["complexity"],
|
|
81
|
+
additionalProperties: false,
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
/** Robustly extract SIMPLE/COMPLEX from a judge response. Throws if neither. */
|
|
85
|
+
function parseComplexity(text: string): Complexity {
|
|
86
|
+
try {
|
|
87
|
+
const parsed = JSON.parse(text) as { complexity?: string };
|
|
88
|
+
if (parsed.complexity === "SIMPLE" || parsed.complexity === "COMPLEX") return parsed.complexity;
|
|
89
|
+
} catch {
|
|
90
|
+
// fall through to text scan
|
|
91
|
+
}
|
|
92
|
+
const m = text.toUpperCase().match(/\b(SIMPLE|COMPLEX)\b/);
|
|
93
|
+
if (m) return m[1] as Complexity;
|
|
94
|
+
throw new Error(`judge returned unparseable complexity: ${text.slice(0, 80)}`);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// -- Anthropic judge --
|
|
98
|
+
let sharedAnthropic: any;
|
|
99
|
+
async function getAnthropic(opts: RouterOptions): Promise<any> {
|
|
100
|
+
if (opts.client) return opts.client;
|
|
101
|
+
if (sharedAnthropic) return sharedAnthropic;
|
|
102
|
+
const mod = await import("@anthropic-ai/sdk").catch(() => {
|
|
103
|
+
throw new Error("Anthropic judge needs @anthropic-ai/sdk (npm i @anthropic-ai/sdk).");
|
|
104
|
+
});
|
|
105
|
+
const Anthropic = (mod as any).default ?? (mod as any).Anthropic;
|
|
106
|
+
sharedAnthropic = new Anthropic({ apiKey: opts.apiKey ?? process.env.ANTHROPIC_API_KEY });
|
|
107
|
+
return sharedAnthropic;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/** Grade complexity with Claude (structured output). May throw. */
|
|
111
|
+
export async function judgeComplexity(prompt: string, opts: RouterOptions = {}): Promise<Complexity> {
|
|
112
|
+
const client = await getAnthropic(opts);
|
|
113
|
+
const res = await client.messages.create({
|
|
114
|
+
model: opts.judgeModel ?? CHEAP_MODEL,
|
|
115
|
+
max_tokens: 64,
|
|
116
|
+
system: JUDGE_SYSTEM,
|
|
117
|
+
output_config: { format: { type: "json_schema", schema: COMPLEXITY_SCHEMA } },
|
|
118
|
+
messages: [{ role: "user", content: prompt }],
|
|
119
|
+
});
|
|
120
|
+
const text: string = res.content.find((b: any) => b.type === "text")?.text ?? "";
|
|
121
|
+
return parseComplexity(text);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// -- Gemini judge --
|
|
125
|
+
let sharedGemini: any;
|
|
126
|
+
async function getGemini(opts: RouterOptions): Promise<any> {
|
|
127
|
+
if (opts.client) return opts.client;
|
|
128
|
+
if (sharedGemini) return sharedGemini;
|
|
129
|
+
const mod = await import("@google/genai").catch(() => {
|
|
130
|
+
throw new Error("Gemini judge needs @google/genai (npm i @google/genai).");
|
|
131
|
+
});
|
|
132
|
+
const GoogleGenAI = (mod as any).GoogleGenAI;
|
|
133
|
+
sharedGemini = new GoogleGenAI({
|
|
134
|
+
apiKey: opts.apiKey ?? process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY,
|
|
135
|
+
});
|
|
136
|
+
return sharedGemini;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/** Grade complexity with Gemini (JSON structured output). May throw. */
|
|
140
|
+
export async function judgeComplexityGemini(prompt: string, opts: RouterOptions = {}): Promise<Complexity> {
|
|
141
|
+
const client = await getGemini(opts);
|
|
142
|
+
const res = await client.models.generateContent({
|
|
143
|
+
model: opts.judgeModel ?? GEMINI_CHEAP_MODEL,
|
|
144
|
+
contents: prompt,
|
|
145
|
+
config: {
|
|
146
|
+
systemInstruction: JUDGE_SYSTEM,
|
|
147
|
+
responseMimeType: "application/json",
|
|
148
|
+
responseSchema: {
|
|
149
|
+
type: "object",
|
|
150
|
+
properties: { complexity: { type: "string", enum: ["SIMPLE", "COMPLEX"] } },
|
|
151
|
+
required: ["complexity"],
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
});
|
|
155
|
+
return parseComplexity(res.text ?? "");
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ── 3. Classification + routing ─────────────────────────────────────────────
|
|
159
|
+
|
|
160
|
+
type Judge = (prompt: string, opts: RouterOptions) => Promise<Complexity>;
|
|
161
|
+
|
|
162
|
+
async function classifyWith(prompt: string, judge: Judge, opts: RouterOptions): Promise<Complexity> {
|
|
163
|
+
return fastHeuristic(prompt) ?? judge(prompt, opts);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/** Heuristics first, Claude judge second. May throw. */
|
|
167
|
+
export function classifyComplexity(prompt: string, opts: RouterOptions = {}): Promise<Complexity> {
|
|
168
|
+
return classifyWith(prompt, judgeComplexity, opts);
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/** Heuristics first, Gemini judge second. May throw. */
|
|
172
|
+
export function classifyComplexityGemini(prompt: string, opts: RouterOptions = {}): Promise<Complexity> {
|
|
173
|
+
return classifyWith(prompt, judgeComplexityGemini, opts);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
interface RouteConfig {
|
|
177
|
+
cheapModel: string;
|
|
178
|
+
expensiveModel: string;
|
|
179
|
+
judge: Judge;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async function routeWith(prompt: string, cfg: RouteConfig, opts: RouterOptions): Promise<string> {
|
|
183
|
+
let model = cfg.expensiveModel;
|
|
184
|
+
try {
|
|
185
|
+
const complexity = await classifyWith(prompt, cfg.judge, opts);
|
|
186
|
+
model = complexity === "SIMPLE" ? cfg.cheapModel : cfg.expensiveModel;
|
|
187
|
+
} catch (err) {
|
|
188
|
+
console.warn(
|
|
189
|
+
`[router] classification failed — defaulting to expensive model. Reason: ${(err as Error).message}`,
|
|
190
|
+
);
|
|
191
|
+
model = cfg.expensiveModel;
|
|
192
|
+
}
|
|
193
|
+
record(model, model === cfg.cheapModel);
|
|
194
|
+
return model;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/** Pick a Claude model for a prompt. Never throws (falls back to expensive). */
|
|
198
|
+
export function selectModel(prompt: string, opts: RouterOptions = {}): Promise<string> {
|
|
199
|
+
return routeWith(prompt, { cheapModel: CHEAP_MODEL, expensiveModel: EXPENSIVE_MODEL, judge: judgeComplexity }, opts);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/** Pick a Gemini model for a prompt. Never throws (falls back to expensive). */
|
|
203
|
+
export function selectGeminiModel(prompt: string, opts: RouterOptions = {}): Promise<string> {
|
|
204
|
+
return routeWith(
|
|
205
|
+
prompt,
|
|
206
|
+
{ cheapModel: GEMINI_CHEAP_MODEL, expensiveModel: GEMINI_EXPENSIVE_MODEL, judge: judgeComplexityGemini },
|
|
207
|
+
opts,
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// ── 4. Observability ─────────────────────────────────────────────────────────
|
|
212
|
+
|
|
213
|
+
let cheapCount = 0;
|
|
214
|
+
let expensiveCount = 0;
|
|
215
|
+
|
|
216
|
+
function record(model: string, isCheap: boolean): void {
|
|
217
|
+
if (isCheap) cheapCount++;
|
|
218
|
+
else expensiveCount++;
|
|
219
|
+
const total = cheapCount + expensiveCount;
|
|
220
|
+
const pct = (n: number) => ((n / total) * 100).toFixed(0);
|
|
221
|
+
console.log(
|
|
222
|
+
`[router] → ${isCheap ? "CHEAP" : "EXPENSIVE"} (${model}) | cheap ${pct(cheapCount)}% / expensive ${pct(expensiveCount)}% (n=${total})`,
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
export interface RoutingStats {
|
|
227
|
+
cheap: number;
|
|
228
|
+
expensive: number;
|
|
229
|
+
total: number;
|
|
230
|
+
cheapPct: number;
|
|
231
|
+
expensivePct: number;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
export function getRoutingStats(): RoutingStats {
|
|
235
|
+
const total = cheapCount + expensiveCount;
|
|
236
|
+
return {
|
|
237
|
+
cheap: cheapCount,
|
|
238
|
+
expensive: expensiveCount,
|
|
239
|
+
total,
|
|
240
|
+
cheapPct: total ? (cheapCount / total) * 100 : 0,
|
|
241
|
+
expensivePct: total ? (expensiveCount / total) * 100 : 0,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// ── Shared message util ──────────────────────────────────────────────────────
|
|
246
|
+
|
|
247
|
+
/** Extract the most recent user turn's text — what the router classifies on. */
|
|
248
|
+
export function latestUserText(messages: Message[]): string {
|
|
249
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
250
|
+
const m = messages[i];
|
|
251
|
+
if (m.role !== "user") continue;
|
|
252
|
+
const text = m.content
|
|
253
|
+
.filter((b): b is Extract<typeof b, { type: "text" }> => b.type === "text")
|
|
254
|
+
.map((b) => b.text)
|
|
255
|
+
.join("\n")
|
|
256
|
+
.trim();
|
|
257
|
+
if (text) return text;
|
|
258
|
+
}
|
|
259
|
+
return "";
|
|
260
|
+
}
|
package/src/subagents.ts
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { readFile, readdir } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
|
|
4
|
+
// A "subagent" in this SDK is a stage: a markdown contract (Layer 2 CONTEXT.md)
|
|
5
|
+
// that declares its Inputs / Process / Outputs / Tools. Parsing the contract turns
|
|
6
|
+
// folder structure into agent architecture — the whole ICM premise.
|
|
7
|
+
|
|
8
|
+
export interface StageInput {
|
|
9
|
+
layer: 3 | 4; // 3 = reference (constraints), 4 = working (input to process)
|
|
10
|
+
path: string; // relative to the stage dir, exactly as the contract names it
|
|
11
|
+
note?: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface StageContract {
|
|
15
|
+
name: string; // "01_research"
|
|
16
|
+
order: number;
|
|
17
|
+
stageDir: string;
|
|
18
|
+
contractPath: string;
|
|
19
|
+
inputs: StageInput[];
|
|
20
|
+
process: string;
|
|
21
|
+
outputs: string[]; // artifact filenames, written to the stage's output/
|
|
22
|
+
tools: string[]; // allowlist patterns from the optional "## Tools" section
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/** Pull the body of a "## <Name>" markdown section (up to the next "## " or EOF). */
|
|
26
|
+
function section(body: string, name: string): string {
|
|
27
|
+
const re = new RegExp(`(?:^|\\n)##\\s+${name}\\s*\\n([\\s\\S]*?)(?=\\n##\\s|$)`, "i");
|
|
28
|
+
const m = body.match(re);
|
|
29
|
+
return m ? m[1].trim() : "";
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function parseContract(
|
|
33
|
+
name: string,
|
|
34
|
+
stageDir: string,
|
|
35
|
+
contractPath: string,
|
|
36
|
+
body: string,
|
|
37
|
+
): StageContract {
|
|
38
|
+
const order = parseInt(name.slice(0, 2), 10) || 0;
|
|
39
|
+
|
|
40
|
+
// Inputs: "- Layer 3 (reference): ../../_config/voice.md # optional note"
|
|
41
|
+
const inputs: StageInput[] = [];
|
|
42
|
+
for (const line of section(body, "Inputs").split("\n")) {
|
|
43
|
+
const m = line.match(/Layer\s+([34])\b.*?:\s*([^\s#]+)\s*(?:#\s*(.*))?$/i);
|
|
44
|
+
if (m) inputs.push({ layer: Number(m[1]) as 3 | 4, path: m[2], note: m[3]?.trim() });
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Outputs: "- research-output.md -> output/"
|
|
48
|
+
const outputs: string[] = [];
|
|
49
|
+
for (const line of section(body, "Outputs").split("\n")) {
|
|
50
|
+
const m = line.match(/-\s*([A-Za-z0-9._-]+\.(?:md|json|txt))/);
|
|
51
|
+
if (m) outputs.push(m[1]);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Tools (optional): the stage declares exactly which tools it may use.
|
|
55
|
+
const toolsRaw = section(body, "Tools");
|
|
56
|
+
const tools = toolsRaw
|
|
57
|
+
? toolsRaw
|
|
58
|
+
.split(/[\n,]/)
|
|
59
|
+
.map((s) => s.replace(/^[-*]\s*/, "").trim())
|
|
60
|
+
.filter(Boolean)
|
|
61
|
+
: [];
|
|
62
|
+
|
|
63
|
+
return {
|
|
64
|
+
name,
|
|
65
|
+
order,
|
|
66
|
+
stageDir,
|
|
67
|
+
contractPath,
|
|
68
|
+
inputs,
|
|
69
|
+
process: section(body, "Process"),
|
|
70
|
+
outputs,
|
|
71
|
+
tools,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Discover and parse every numbered stage folder, in execution order. */
|
|
76
|
+
export async function loadStages(workspaceDir: string): Promise<StageContract[]> {
|
|
77
|
+
const stagesRoot = join(workspaceDir, "stages");
|
|
78
|
+
const entries = await readdir(stagesRoot, { withFileTypes: true });
|
|
79
|
+
const dirs = entries
|
|
80
|
+
.filter((e) => e.isDirectory() && /^\d{2}_/.test(e.name))
|
|
81
|
+
.map((e) => e.name)
|
|
82
|
+
.sort();
|
|
83
|
+
|
|
84
|
+
const contracts: StageContract[] = [];
|
|
85
|
+
for (const name of dirs) {
|
|
86
|
+
const stageDir = join(stagesRoot, name);
|
|
87
|
+
const contractPath = join(stageDir, "CONTEXT.md");
|
|
88
|
+
const body = await readFile(contractPath, "utf8");
|
|
89
|
+
contracts.push(parseContract(name, stageDir, contractPath, body));
|
|
90
|
+
}
|
|
91
|
+
return contracts;
|
|
92
|
+
}
|