@rdcahalane/ai-router 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # @rdcahalane/ai-router
2
+
3
+ Simple multi-provider LLM router with fallback across local and hosted models.
4
+
5
+ ## Features
6
+
7
+ - Routes prompts by task type
8
+ - Supports local Ollama fallback
9
+ - Handles provider errors and quota failures
10
+ - Supports text and image inputs
11
+
12
+ ## Install
13
+
14
+ ```bash
15
+ npm install @rdcahalane/ai-router
16
+ ```
17
+
18
+ ## Usage
19
+
20
+ ```ts
21
+ import { createAIRouter } from "@rdcahalane/ai-router";
22
+
23
+ const router = createAIRouter({
24
+ anthropicApiKey: process.env.ANTHROPIC_API_KEY,
25
+ openaiApiKey: process.env.OPENAI_API_KEY,
26
+ geminiApiKey: process.env.GEMINI_API_KEY,
27
+ ollamaUrl: process.env.OLLAMA_URL,
28
+ });
29
+
30
+ const text = await router.chat({
31
+ user: "Summarize this report",
32
+ requireQuality: true,
33
+ });
34
+ ```
35
+
36
+ ## Notes
37
+
38
+ - Local-first setups can rely on Ollama for cheap fallback
39
+ - Host applications should decide their own provider policy and cost controls
40
+ - Best for apps that want one simple abstraction over several model providers
package/package.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "name": "@rdcahalane/ai-router",
3
+ "version": "1.0.0",
4
+ "description": "Model routing — Sonnet for reasoning, Haiku for extraction, Ollama local fallback",
5
+ "license": "MIT",
6
+ "type": "module",
7
+ "main": "./dist/index.js",
8
+ "types": "./dist/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": "./dist/index.js",
12
+ "types": "./dist/index.d.ts"
13
+ }
14
+ },
15
+ "scripts": {
16
+ "build": "tsc",
17
+ "dev": "tsc --watch"
18
+ },
19
+ "keywords": ["anthropic", "claude", "ollama", "model-routing", "haiku", "sonnet"]
20
+ }
package/src/index.ts ADDED
@@ -0,0 +1,339 @@
1
+ /**
2
+ * ai-router
3
+ *
4
+ * Unified model routing with automatic provider fallback:
5
+ * Anthropic (best quality) → OpenAI (good quality) → Gemini Flash
6
+ *
7
+ * All methods try providers in order and move to the next on quota/auth errors.
8
+ * Vision methods support image URLs or base64 data.
9
+ *
10
+ * Usage:
11
+ * const ai = createAIRouter({
12
+ * anthropicApiKey: process.env.ANTHROPIC_API_KEY,
13
+ * openaiApiKey: process.env.OPENAI_API_KEY,
14
+ * geminiApiKey: process.env.GEMINI_API_KEY,
15
+ * });
16
+ *
17
+ * // Text — tries Haiku → GPT-4o-mini → Gemini Flash
18
+ * const result = await ai.fast({ user: "Summarize this..." });
19
+ *
20
+ * // Best reasoning — tries Sonnet → GPT-4o → Gemini Flash
21
+ * const analysis = await ai.best({ system: "You are...", user: "Analyze..." });
22
+ *
23
+ * // Vision — tries Claude Sonnet → GPT-4o → Gemini Flash
24
+ * const desc = await ai.vision({ user: "What do you see?", imageUrl: "https://..." });
25
+ *
26
+ * // Explicit providers (no fallback)
27
+ * await ai.haiku({ user: "..." });
28
+ * await ai.sonnet({ user: "..." });
29
+ * await ai.openai({ user: "..." }); // GPT-4o
30
+ * await ai.openaiMini({ user: "..." }); // GPT-4o-mini
31
+ * await ai.gemini({ user: "..." }); // Gemini 2.0 Flash
32
+ */
33
+
34
+ export interface AIRouterConfig {
35
+ anthropicApiKey?: string;
36
+ openaiApiKey?: string;
37
+ geminiApiKey?: string;
38
+ /** Ollama base URL — default http://localhost:11434 */
39
+ ollamaUrl?: string;
40
+ /** Ollama model name — default llama3.2 */
41
+ ollamaModel?: string;
42
+ }
43
+
44
+ export interface CallOptions {
45
+ system?: string;
46
+ user: string;
47
+ maxTokens?: number;
48
+ /** Image URL for vision calls (publicly accessible) */
49
+ imageUrl?: string;
50
+ /** Base64-encoded image data for vision calls */
51
+ imageBase64?: string;
52
+ /** MIME type of image — default image/jpeg */
53
+ imageMimeType?: string;
54
+ }
55
+
56
+ export interface AIRouter {
57
+ /** Anthropic Claude Haiku — fast, cheap extraction/classification */
58
+ haiku(opts: CallOptions): Promise<string>;
59
+ /** Anthropic Claude Sonnet — best reasoning */
60
+ sonnet(opts: CallOptions): Promise<string>;
61
+ /** OpenAI GPT-4o — strong reasoning, vision */
62
+ openai(opts: CallOptions): Promise<string>;
63
+ /** OpenAI GPT-4o-mini — cheap, fast */
64
+ openaiMini(opts: CallOptions): Promise<string>;
65
+ /** Google Gemini 2.0 Flash — free tier, vision */
66
+ gemini(opts: CallOptions): Promise<string>;
67
+ /** Local Ollama → Haiku fallback */
68
+ local(opts: CallOptions): Promise<string>;
69
+ /**
70
+ * Smart fast: Haiku → GPT-4o-mini → Gemini Flash
71
+ * Use for: extraction, classification, summarization, batch tasks
72
+ */
73
+ fast(opts: CallOptions): Promise<string>;
74
+ /**
75
+ * Smart best: Sonnet → GPT-4o → Gemini Flash
76
+ * Use for: reasoning, analysis, drafting, complex tasks
77
+ */
78
+ best(opts: CallOptions): Promise<string>;
79
+ /**
80
+ * Smart vision: Claude Sonnet → GPT-4o → Gemini Flash
81
+ * Use for: photo analysis, image description, visual extraction
82
+ * Pass imageUrl or imageBase64 + imageMimeType
83
+ */
84
+ vision(opts: CallOptions): Promise<string>;
85
+ /** Embed text — Ollama nomic-embed-text → OpenAI text-embedding-3-small */
86
+ embed(text: string): Promise<number[]>;
87
+ }
88
+
89
+ const ANTHROPIC_MODELS = {
90
+ HAIKU: "claude-haiku-4-5-20251001",
91
+ SONNET: "claude-sonnet-4-6",
92
+ };
93
+
94
+ /** Returns true if this is a provider-level failure we should fall through on */
95
+ function isProviderError(status: number, bodyText: string): boolean {
96
+ if (status === 401 || status === 403) return true;
97
+ if (status === 429) return true;
98
+ const lower = bodyText.toLowerCase();
99
+ if (status === 400 && (lower.includes("credit") || lower.includes("quota") || lower.includes("balance") || lower.includes("exhausted"))) return true;
100
+ return false;
101
+ }
102
+
103
+ /** Thrown when a provider is unavailable (quota/auth) — triggers waterfall fallback */
104
+ class ProviderError extends Error {
105
+ constructor(msg: string) { super(msg); this.name = "ProviderError"; }
106
+ }
107
+
108
+ export function createAIRouter(config: AIRouterConfig): AIRouter {
109
+ const ollamaUrl = config.ollamaUrl ?? "http://localhost:11434";
110
+ const ollamaModel = config.ollamaModel ?? "llama3.2";
111
+
112
+ // ── Anthropic ──────────────────────────────────────────────────────────────
113
+ async function callAnthropic(model: string, opts: CallOptions): Promise<string> {
114
+ if (!config.anthropicApiKey) throw new ProviderError("No Anthropic API key");
115
+
116
+ const userContent: unknown[] = [];
117
+ if (opts.imageUrl || opts.imageBase64) {
118
+ if (opts.imageUrl) {
119
+ userContent.push({ type: "image", source: { type: "url", url: opts.imageUrl } });
120
+ } else if (opts.imageBase64) {
121
+ userContent.push({
122
+ type: "image",
123
+ source: {
124
+ type: "base64",
125
+ media_type: opts.imageMimeType ?? "image/jpeg",
126
+ data: opts.imageBase64,
127
+ },
128
+ });
129
+ }
130
+ }
131
+ userContent.push({ type: "text", text: opts.user });
132
+
133
+ const r = await fetch("https://api.anthropic.com/v1/messages", {
134
+ method: "POST",
135
+ headers: {
136
+ "Content-Type": "application/json",
137
+ "x-api-key": config.anthropicApiKey,
138
+ "anthropic-version": "2023-06-01",
139
+ },
140
+ body: JSON.stringify({
141
+ model,
142
+ max_tokens: opts.maxTokens ?? 4096,
143
+ ...(opts.system ? { system: opts.system } : {}),
144
+ messages: [{ role: "user", content: userContent }],
145
+ }),
146
+ });
147
+
148
+ const bodyText = await r.text();
149
+ if (!r.ok) {
150
+ if (isProviderError(r.status, bodyText)) throw new ProviderError(`Anthropic: ${bodyText.slice(0, 120)}`);
151
+ throw new Error(`Anthropic error ${r.status}: ${bodyText.slice(0, 200)}`);
152
+ }
153
+ const d = JSON.parse(bodyText) as { content?: { text?: string }[] };
154
+ return d.content?.[0]?.text?.trim() ?? "";
155
+ }
156
+
157
+ // ── OpenAI ─────────────────────────────────────────────────────────────────
158
+ async function callOpenAI(model: string, opts: CallOptions): Promise<string> {
159
+ if (!config.openaiApiKey) throw new ProviderError("No OpenAI API key");
160
+
161
+ const userParts: unknown[] = [];
162
+ if (opts.imageUrl) {
163
+ userParts.push({ type: "image_url", image_url: { url: opts.imageUrl, detail: "high" } });
164
+ } else if (opts.imageBase64) {
165
+ const mime = opts.imageMimeType ?? "image/jpeg";
166
+ userParts.push({ type: "image_url", image_url: { url: `data:${mime};base64,${opts.imageBase64}`, detail: "high" } });
167
+ }
168
+ userParts.push({ type: "text", text: opts.user });
169
+
170
+ const messages: unknown[] = [];
171
+ if (opts.system) messages.push({ role: "system", content: opts.system });
172
+ messages.push({ role: "user", content: userParts });
173
+
174
+ const r = await fetch("https://api.openai.com/v1/chat/completions", {
175
+ method: "POST",
176
+ headers: {
177
+ "Content-Type": "application/json",
178
+ Authorization: `Bearer ${config.openaiApiKey}`,
179
+ },
180
+ body: JSON.stringify({ model, messages, max_tokens: opts.maxTokens ?? 4096 }),
181
+ });
182
+
183
+ const bodyText = await r.text();
184
+ if (!r.ok) {
185
+ if (isProviderError(r.status, bodyText)) throw new ProviderError(`OpenAI: ${bodyText.slice(0, 120)}`);
186
+ throw new Error(`OpenAI error ${r.status}: ${bodyText.slice(0, 200)}`);
187
+ }
188
+ const d = JSON.parse(bodyText) as { choices?: { message?: { content?: string } }[] };
189
+ return d.choices?.[0]?.message?.content?.trim() ?? "";
190
+ }
191
+
192
+ // ── Gemini ─────────────────────────────────────────────────────────────────
193
+ async function callGemini(opts: CallOptions): Promise<string> {
194
+ if (!config.geminiApiKey) throw new ProviderError("No Gemini API key");
195
+
196
+ const parts: unknown[] = [];
197
+ if (opts.imageUrl) {
198
+ // Fetch image and convert to base64 for Gemini inline data
199
+ try {
200
+ const imgRes = await fetch(opts.imageUrl);
201
+ if (imgRes.ok) {
202
+ const buf = await imgRes.arrayBuffer();
203
+ const base64 = Buffer.from(buf).toString("base64");
204
+ const mime = opts.imageMimeType ?? imgRes.headers.get("content-type") ?? "image/jpeg";
205
+ parts.push({ inlineData: { mimeType: mime, data: base64 } });
206
+ }
207
+ } catch { /* skip image if fetch fails */ }
208
+ } else if (opts.imageBase64) {
209
+ parts.push({ inlineData: { mimeType: opts.imageMimeType ?? "image/jpeg", data: opts.imageBase64 } });
210
+ }
211
+ parts.push({ text: opts.user });
212
+
213
+ const body: Record<string, unknown> = {
214
+ contents: [{ role: "user", parts }],
215
+ generationConfig: { maxOutputTokens: opts.maxTokens ?? 4096 },
216
+ };
217
+ if (opts.system) {
218
+ body.systemInstruction = { parts: [{ text: opts.system }] };
219
+ }
220
+
221
+ const model = "gemini-2.0-flash";
222
+ const r = await fetch(
223
+ `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent?key=${config.geminiApiKey}`,
224
+ { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body) }
225
+ );
226
+
227
+ const bodyText = await r.text();
228
+ if (!r.ok) {
229
+ if (isProviderError(r.status, bodyText)) throw new ProviderError(`Gemini: ${bodyText.slice(0, 120)}`);
230
+ throw new Error(`Gemini error ${r.status}: ${bodyText.slice(0, 200)}`);
231
+ }
232
+ const d = JSON.parse(bodyText) as { candidates?: { content?: { parts?: { text?: string }[] } }[] };
233
+ return d.candidates?.[0]?.content?.parts?.[0]?.text?.trim() ?? "";
234
+ }
235
+
236
+ // ── Ollama ─────────────────────────────────────────────────────────────────
237
+ async function callOllama(opts: CallOptions): Promise<string | null> {
238
+ try {
239
+ const r = await fetch(`${ollamaUrl}/api/chat`, {
240
+ method: "POST",
241
+ headers: { "Content-Type": "application/json" },
242
+ body: JSON.stringify({
243
+ model: ollamaModel,
244
+ stream: false,
245
+ messages: [
246
+ ...(opts.system ? [{ role: "system", content: opts.system }] : []),
247
+ { role: "user", content: opts.user },
248
+ ],
249
+ }),
250
+ signal: AbortSignal.timeout(5000),
251
+ });
252
+ if (!r.ok) return null;
253
+ const d = await r.json() as { message?: { content?: string } };
254
+ return d.message?.content?.trim() ?? null;
255
+ } catch {
256
+ return null;
257
+ }
258
+ }
259
+
260
+ // ── Waterfall helper ───────────────────────────────────────────────────────
261
+ async function waterfall(providers: (() => Promise<string>)[]): Promise<string> {
262
+ const errors: string[] = [];
263
+ for (const fn of providers) {
264
+ try {
265
+ return await fn();
266
+ } catch (e) {
267
+ if (e instanceof ProviderError) {
268
+ errors.push(e.message);
269
+ continue;
270
+ }
271
+ throw e;
272
+ }
273
+ }
274
+ throw new Error(`All providers failed: ${errors.join(" | ")}`);
275
+ }
276
+
277
+ return {
278
+ haiku: (opts) => callAnthropic(ANTHROPIC_MODELS.HAIKU, opts),
279
+ sonnet: (opts) => callAnthropic(ANTHROPIC_MODELS.SONNET, opts),
280
+ openai: (opts) => callOpenAI("gpt-4o", opts),
281
+ openaiMini: (opts) => callOpenAI("gpt-4o-mini", opts),
282
+ gemini: (opts) => callGemini(opts),
283
+
284
+ async local(opts) {
285
+ const result = await callOllama(opts);
286
+ if (result !== null) return result;
287
+ return waterfall([
288
+ () => callAnthropic(ANTHROPIC_MODELS.HAIKU, opts),
289
+ () => callOpenAI("gpt-4o-mini", opts),
290
+ () => callGemini(opts),
291
+ ]);
292
+ },
293
+
294
+ fast: (opts) => waterfall([
295
+ () => callAnthropic(ANTHROPIC_MODELS.HAIKU, opts),
296
+ () => callOpenAI("gpt-4o-mini", opts),
297
+ () => callGemini(opts),
298
+ ]),
299
+
300
+ best: (opts) => waterfall([
301
+ () => callAnthropic(ANTHROPIC_MODELS.SONNET, opts),
302
+ () => callOpenAI("gpt-4o", opts),
303
+ () => callGemini(opts),
304
+ ]),
305
+
306
+ vision: (opts) => waterfall([
307
+ () => callAnthropic(ANTHROPIC_MODELS.SONNET, opts),
308
+ () => callOpenAI("gpt-4o", opts),
309
+ () => callGemini(opts),
310
+ ]),
311
+
312
+ async embed(text) {
313
+ try {
314
+ const r = await fetch(`${ollamaUrl}/api/embeddings`, {
315
+ method: "POST",
316
+ headers: { "Content-Type": "application/json" },
317
+ body: JSON.stringify({ model: "nomic-embed-text", prompt: text }),
318
+ signal: AbortSignal.timeout(5000),
319
+ });
320
+ if (r.ok) {
321
+ const d = await r.json() as { embedding?: number[] };
322
+ if (d.embedding) return d.embedding;
323
+ }
324
+ } catch { /* fall through */ }
325
+
326
+ if (config.openaiApiKey) {
327
+ const r = await fetch("https://api.openai.com/v1/embeddings", {
328
+ method: "POST",
329
+ headers: { "Content-Type": "application/json", Authorization: `Bearer ${config.openaiApiKey}` },
330
+ body: JSON.stringify({ model: "text-embedding-3-small", input: text }),
331
+ });
332
+ const d = await r.json() as { data?: { embedding: number[] }[] };
333
+ if (d.data?.[0]) return d.data[0].embedding;
334
+ }
335
+
336
+ throw new Error("No embedding provider available");
337
+ },
338
+ };
339
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,15 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "NodeNext",
5
+ "moduleResolution": "NodeNext",
6
+ "outDir": "./dist",
7
+ "declaration": true,
8
+ "declarationMap": true,
9
+ "sourceMap": true,
10
+ "strict": true,
11
+ "esModuleInterop": true,
12
+ "skipLibCheck": true
13
+ },
14
+ "include": ["src"]
15
+ }