@jz92/ai-provider 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +367 -0
- package/dist/index.cjs +582 -0
- package/dist/index.d.cts +109 -0
- package/dist/index.d.ts +109 -0
- package/dist/index.js +541 -0
- package/package.json +72 -0
- package/scripts/postinstall.js +38 -0
- package/scripts/setup-local.sh +73 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import * as zod from 'zod';
|
|
2
|
+
import { ZodSchema } from 'zod';
|
|
3
|
+
|
|
4
|
+
type AIProviderName = 'ollama' | 'anthropic' | 'openai' | 'google' | 'groq' | 'mistral';
|
|
5
|
+
type AIEnvironment = 'development' | 'test' | 'production';
|
|
6
|
+
type ProviderConfig = {
|
|
7
|
+
provider: AIProviderName;
|
|
8
|
+
model: string;
|
|
9
|
+
baseURL?: string;
|
|
10
|
+
maxTokens: number;
|
|
11
|
+
usePromptCache: boolean;
|
|
12
|
+
env: AIEnvironment;
|
|
13
|
+
};
|
|
14
|
+
type AIRequestOptions<T = string> = {
|
|
15
|
+
/** The user-facing prompt — the dynamic part of every request */
|
|
16
|
+
prompt: string;
|
|
17
|
+
/** Stable system instructions — cached in production (Anthropic), baked into Modelfile locally */
|
|
18
|
+
systemPrompt: string;
|
|
19
|
+
/** Zod schema for structured output. Required for generateStructured(). */
|
|
20
|
+
schema?: zod.ZodSchema<T>;
|
|
21
|
+
/** Deterministic key — same key returns cached result without hitting the API */
|
|
22
|
+
cacheKey?: string;
|
|
23
|
+
/** Rough token ceiling on input. Throws before the API call if exceeded. Default 8000. */
|
|
24
|
+
maxInputTokens?: number;
|
|
25
|
+
};
|
|
26
|
+
type AIResponse<T> = {
|
|
27
|
+
data: T;
|
|
28
|
+
usage?: {
|
|
29
|
+
inputTokens: number;
|
|
30
|
+
outputTokens: number;
|
|
31
|
+
/** Provider-side cache token count. Only populated if the provider returns
|
|
32
|
+
* cache metadata in the response (currently Anthropic via the Vercel AI SDK).
|
|
33
|
+
* Defaults to 0 for providers that do not yet expose this. */
|
|
34
|
+
cachedTokens: number;
|
|
35
|
+
};
|
|
36
|
+
provider: AIProviderName;
|
|
37
|
+
model: string;
|
|
38
|
+
fromCache: boolean;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* generateStructured — typed, validated JSON output.
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* const result = await generateStructured({
|
|
46
|
+
* systemPrompt: MY_SYSTEM_PROMPT,
|
|
47
|
+
* prompt: userInput,
|
|
48
|
+
* schema: z.object({ name: z.string() }),
|
|
49
|
+
* cacheKey: `parse:${userInput}`,
|
|
50
|
+
* })
|
|
51
|
+
*/
|
|
52
|
+
declare function generateStructured<T>(options: AIRequestOptions<T> & {
|
|
53
|
+
schema: ZodSchema<T>;
|
|
54
|
+
}): Promise<AIResponse<T>>;
|
|
55
|
+
/**
|
|
56
|
+
* generatePlainText — unstructured text output.
|
|
57
|
+
*/
|
|
58
|
+
declare function generatePlainText(options: Omit<AIRequestOptions, 'schema'>): Promise<AIResponse<string>>;
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Resolves which AI provider + model to use based on NODE_ENV.
|
|
62
|
+
*
|
|
63
|
+
* Default routing:
|
|
64
|
+
* development → Ollama (local, free, no API key)
|
|
65
|
+
* test → Anthropic Haiku (cheap, real API, for CI)
|
|
66
|
+
* production → Anthropic Sonnet with prompt caching
|
|
67
|
+
*
|
|
68
|
+
* Override anything via env vars:
|
|
69
|
+
* AI_PROVIDER=openai|anthropic|google|groq|mistral|ollama
|
|
70
|
+
* AI_MODEL=<model string>
|
|
71
|
+
* OLLAMA_MODEL=<named variant>
|
|
72
|
+
* OLLAMA_BASE_URL=<url>
|
|
73
|
+
*
|
|
74
|
+
* Provider-specific defaults when AI_PROVIDER is set:
|
|
75
|
+
* openai → gpt-4o-mini (test) / gpt-4o (prod)
|
|
76
|
+
* google → gemini-1.5-flash (test) / gemini-1.5-pro (prod)
|
|
77
|
+
* groq → llama-3.1-8b-instant (test) / llama-3.1-70b-versatile (prod)
|
|
78
|
+
* mistral → mistral-small-latest (test) / mistral-large-latest (prod)
|
|
79
|
+
*/
|
|
80
|
+
declare function resolveProvider(): ProviderConfig;
|
|
81
|
+
|
|
82
|
+
declare class BoundedCache {
|
|
83
|
+
private store;
|
|
84
|
+
private readonly maxSize;
|
|
85
|
+
private readonly ttlMs;
|
|
86
|
+
constructor(maxSize?: number, ttlMs?: number);
|
|
87
|
+
get<T>(key: string): T | null;
|
|
88
|
+
set<T>(key: string, value: T): void;
|
|
89
|
+
delete(key: string): void;
|
|
90
|
+
/** Clear all entries — useful between tests */
|
|
91
|
+
clear(): void;
|
|
92
|
+
get size(): number;
|
|
93
|
+
}
|
|
94
|
+
declare const responseCache: BoundedCache;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Error classification for the retry logic.
|
|
98
|
+
* We only retry transient errors — never auth, billing, or validation failures.
|
|
99
|
+
*/
|
|
100
|
+
declare class AIProviderError extends Error {
|
|
101
|
+
readonly code: AIErrorCode;
|
|
102
|
+
readonly provider?: string | undefined;
|
|
103
|
+
readonly status?: number | undefined;
|
|
104
|
+
readonly cause?: unknown | undefined;
|
|
105
|
+
constructor(message: string, code: AIErrorCode, provider?: string | undefined, status?: number | undefined, cause?: unknown | undefined);
|
|
106
|
+
}
|
|
107
|
+
type AIErrorCode = 'AUTH_ERROR' | 'BILLING_ERROR' | 'RATE_LIMIT' | 'SERVER_ERROR' | 'TIMEOUT' | 'MODEL_NOT_FOUND' | 'TOKEN_BUDGET' | 'SCHEMA_VALIDATION' | 'UNKNOWN';
|
|
108
|
+
|
|
109
|
+
export { type AIEnvironment, type AIErrorCode, AIProviderError, type AIProviderName, type AIRequestOptions, type AIResponse, type ProviderConfig, generatePlainText, generateStructured, resolveProvider, responseCache };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,541 @@
|
|
|
1
|
+
// src/gateway.ts
|
|
2
|
+
import { generateObject, generateText } from "ai";
|
|
3
|
+
|
|
4
|
+
// src/provider.ts
|
|
5
|
+
function resolveProvider() {
|
|
6
|
+
const env = resolveEnvironment();
|
|
7
|
+
const providerOverride = process.env.AI_PROVIDER;
|
|
8
|
+
const modelOverride = process.env.AI_MODEL;
|
|
9
|
+
if (providerOverride && providerOverride !== "ollama") {
|
|
10
|
+
return buildCloudConfig(providerOverride, env, modelOverride);
|
|
11
|
+
}
|
|
12
|
+
if (providerOverride === "ollama") {
|
|
13
|
+
return buildOllamaConfig(modelOverride);
|
|
14
|
+
}
|
|
15
|
+
switch (env) {
|
|
16
|
+
case "development":
|
|
17
|
+
return buildOllamaConfig(modelOverride);
|
|
18
|
+
case "test":
|
|
19
|
+
return buildCloudConfig("anthropic", "test", modelOverride);
|
|
20
|
+
case "production":
|
|
21
|
+
return buildCloudConfig("anthropic", "production", modelOverride);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function resolveEnvironment() {
|
|
25
|
+
const raw = process.env.NODE_ENV;
|
|
26
|
+
if (raw === "test") return "test";
|
|
27
|
+
if (raw === "production") return "production";
|
|
28
|
+
return "development";
|
|
29
|
+
}
|
|
30
|
+
function buildOllamaConfig(modelOverride) {
|
|
31
|
+
return {
|
|
32
|
+
provider: "ollama",
|
|
33
|
+
model: modelOverride ?? process.env.OLLAMA_MODEL ?? "qwen2.5-coder:14b",
|
|
34
|
+
baseURL: process.env.OLLAMA_BASE_URL ?? "http://localhost:11434",
|
|
35
|
+
maxTokens: 2048,
|
|
36
|
+
usePromptCache: false,
|
|
37
|
+
env: "development"
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
function buildCloudConfig(provider, env, modelOverride) {
|
|
41
|
+
const isTest = env === "test";
|
|
42
|
+
const defaults = {
|
|
43
|
+
anthropic: {
|
|
44
|
+
test: "claude-haiku-4-5-20251001",
|
|
45
|
+
production: "claude-sonnet-4-6"
|
|
46
|
+
},
|
|
47
|
+
openai: {
|
|
48
|
+
test: "gpt-4o-mini",
|
|
49
|
+
production: "gpt-4o"
|
|
50
|
+
},
|
|
51
|
+
google: {
|
|
52
|
+
test: "gemini-1.5-flash",
|
|
53
|
+
production: "gemini-1.5-pro"
|
|
54
|
+
},
|
|
55
|
+
groq: {
|
|
56
|
+
test: "llama-3.1-8b-instant",
|
|
57
|
+
production: "llama-3.1-70b-versatile"
|
|
58
|
+
},
|
|
59
|
+
mistral: {
|
|
60
|
+
test: "mistral-small-latest",
|
|
61
|
+
production: "mistral-large-latest"
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
|
+
return {
|
|
65
|
+
provider,
|
|
66
|
+
model: modelOverride ?? defaults[provider][isTest ? "test" : "production"],
|
|
67
|
+
maxTokens: isTest ? 512 : 1024,
|
|
68
|
+
// Only Anthropic supports prompt caching via this SDK
|
|
69
|
+
usePromptCache: provider === "anthropic" && !isTest,
|
|
70
|
+
env
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// src/errors.ts
|
|
75
|
+
var AIProviderError = class extends Error {
|
|
76
|
+
constructor(message, code, provider, status, cause) {
|
|
77
|
+
super(message);
|
|
78
|
+
this.code = code;
|
|
79
|
+
this.provider = provider;
|
|
80
|
+
this.status = status;
|
|
81
|
+
this.cause = cause;
|
|
82
|
+
this.name = "AIProviderError";
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
function isRetryable(code) {
|
|
86
|
+
return code === "RATE_LIMIT" || code === "SERVER_ERROR" || code === "TIMEOUT";
|
|
87
|
+
}
|
|
88
|
+
function classifyError(err) {
|
|
89
|
+
const message = errorMessage(err);
|
|
90
|
+
const status = extractStatus(err);
|
|
91
|
+
if (status === 401) return "AUTH_ERROR";
|
|
92
|
+
if (status === 402) return "BILLING_ERROR";
|
|
93
|
+
if (status === 403) return "BILLING_ERROR";
|
|
94
|
+
if (status === 404) return "MODEL_NOT_FOUND";
|
|
95
|
+
if (status === 429) return "RATE_LIMIT";
|
|
96
|
+
if (status >= 500 && status < 600) return "SERVER_ERROR";
|
|
97
|
+
const lower = message.toLowerCase();
|
|
98
|
+
if (lower.includes("api key") || lower.includes("authentication") || lower.includes("unauthorized"))
|
|
99
|
+
return "AUTH_ERROR";
|
|
100
|
+
if (lower.includes("credit") || lower.includes("billing") || lower.includes("quota"))
|
|
101
|
+
return "BILLING_ERROR";
|
|
102
|
+
if (lower.includes("rate limit") || lower.includes("too many requests"))
|
|
103
|
+
return "RATE_LIMIT";
|
|
104
|
+
if (lower.includes("timeout") || lower.includes("timed out") || lower.includes("etimedout"))
|
|
105
|
+
return "TIMEOUT";
|
|
106
|
+
if (lower.includes("model") && (lower.includes("not found") || lower.includes("pull")))
|
|
107
|
+
return "MODEL_NOT_FOUND";
|
|
108
|
+
if (lower.includes("schema") || lower.includes("validation") || lower.includes("parse"))
|
|
109
|
+
return "SCHEMA_VALIDATION";
|
|
110
|
+
return "UNKNOWN";
|
|
111
|
+
}
|
|
112
|
+
function wrapError(err, provider) {
|
|
113
|
+
const code = classifyError(err);
|
|
114
|
+
const status = extractStatus(err);
|
|
115
|
+
const message = buildMessage(code, provider, errorMessage(err));
|
|
116
|
+
return new AIProviderError(message, code, provider, status, err);
|
|
117
|
+
}
|
|
118
|
+
function buildMessage(code, provider, raw) {
|
|
119
|
+
switch (code) {
|
|
120
|
+
case "AUTH_ERROR":
|
|
121
|
+
return `[ai-provider] Authentication failed for "${provider}".
|
|
122
|
+
Check your API key is set correctly and hasn't expired.
|
|
123
|
+
Env var: ${providerKeyName(provider)}`;
|
|
124
|
+
case "BILLING_ERROR":
|
|
125
|
+
return `[ai-provider] Billing or quota issue for "${provider}".
|
|
126
|
+
Check your account has active credits at the provider dashboard.`;
|
|
127
|
+
case "RATE_LIMIT":
|
|
128
|
+
return `[ai-provider] Rate limit hit for "${provider}".
|
|
129
|
+
Request will be retried with exponential backoff.`;
|
|
130
|
+
case "MODEL_NOT_FOUND":
|
|
131
|
+
return `[ai-provider] Model not found for "${provider}".
|
|
132
|
+
If using Ollama locally, run: ollama pull <model-name>
|
|
133
|
+
Raw: ${raw}`;
|
|
134
|
+
case "TIMEOUT":
|
|
135
|
+
return `[ai-provider] Request timed out for "${provider}".
|
|
136
|
+
Check your network connection and provider status.`;
|
|
137
|
+
case "SCHEMA_VALIDATION":
|
|
138
|
+
return `[ai-provider] Response from "${provider}" did not match the expected schema.
|
|
139
|
+
Try making your system prompt more explicit about the output format.
|
|
140
|
+
Raw: ${raw}`;
|
|
141
|
+
default:
|
|
142
|
+
return `[ai-provider] Unexpected error from "${provider}": ${raw}`;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
function providerKeyName(provider) {
|
|
146
|
+
const keys = {
|
|
147
|
+
anthropic: "ANTHROPIC_API_KEY",
|
|
148
|
+
openai: "OPENAI_API_KEY",
|
|
149
|
+
google: "GOOGLE_GENERATIVE_AI_API_KEY",
|
|
150
|
+
groq: "GROQ_API_KEY",
|
|
151
|
+
mistral: "MISTRAL_API_KEY"
|
|
152
|
+
};
|
|
153
|
+
return keys[provider] ?? `${provider.toUpperCase()}_API_KEY`;
|
|
154
|
+
}
|
|
155
|
+
function extractStatus(err) {
|
|
156
|
+
if (err && typeof err === "object") {
|
|
157
|
+
if ("status" in err && typeof err.status === "number")
|
|
158
|
+
return err.status;
|
|
159
|
+
if ("statusCode" in err && typeof err.statusCode === "number")
|
|
160
|
+
return err.statusCode;
|
|
161
|
+
}
|
|
162
|
+
return 0;
|
|
163
|
+
}
|
|
164
|
+
function errorMessage(err) {
|
|
165
|
+
if (err instanceof Error) return err.message;
|
|
166
|
+
if (typeof err === "string") return err;
|
|
167
|
+
return String(err);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// src/client.ts
|
|
171
|
+
async function buildModel(config) {
|
|
172
|
+
switch (config.provider) {
|
|
173
|
+
case "ollama":
|
|
174
|
+
return buildOllamaModel(config);
|
|
175
|
+
case "anthropic":
|
|
176
|
+
return buildAnthropicModel(config);
|
|
177
|
+
case "openai":
|
|
178
|
+
return buildOpenAIModel(config);
|
|
179
|
+
case "google":
|
|
180
|
+
return buildGoogleModel(config);
|
|
181
|
+
case "groq":
|
|
182
|
+
return buildGroqModel(config);
|
|
183
|
+
case "mistral":
|
|
184
|
+
return buildMistralModel(config);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
async function buildOllamaModel(config) {
|
|
188
|
+
await assertOllamaReachable(config.baseURL ?? "http://localhost:11434");
|
|
189
|
+
try {
|
|
190
|
+
const { createOllama } = await import("ollama-ai-provider");
|
|
191
|
+
const ollama = createOllama({ baseURL: `${config.baseURL}/api` });
|
|
192
|
+
return ollama(config.model);
|
|
193
|
+
} catch (err) {
|
|
194
|
+
if (isModuleNotFound(err)) {
|
|
195
|
+
throw new AIProviderError(
|
|
196
|
+
"[ai-provider] ollama-ai-provider is not installed.\nRun: npm install ollama-ai-provider",
|
|
197
|
+
"UNKNOWN",
|
|
198
|
+
"ollama"
|
|
199
|
+
);
|
|
200
|
+
}
|
|
201
|
+
throw err;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
async function assertOllamaReachable(baseURL) {
|
|
205
|
+
try {
|
|
206
|
+
const res = await fetch(baseURL, { signal: AbortSignal.timeout(3e3) });
|
|
207
|
+
if (!res.ok && res.status !== 200) throw new Error(`status ${res.status}`);
|
|
208
|
+
} catch {
|
|
209
|
+
throw new AIProviderError(
|
|
210
|
+
`[ai-provider] Ollama is not reachable at ${baseURL}.
|
|
211
|
+
|
|
212
|
+
Start Ollama: brew services start ollama
|
|
213
|
+
Or (foreground): ollama serve
|
|
214
|
+
|
|
215
|
+
To use a cloud provider instead:
|
|
216
|
+
Set AI_PROVIDER=anthropic (and ANTHROPIC_API_KEY) in your .env
|
|
217
|
+
Or: AI_PROVIDER=openai (and OPENAI_API_KEY)
|
|
218
|
+
Or: AI_PROVIDER=groq (and GROQ_API_KEY \u2014 free tier available)`,
|
|
219
|
+
"UNKNOWN",
|
|
220
|
+
"ollama"
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
async function buildAnthropicModel(config) {
|
|
225
|
+
assertKey("ANTHROPIC_API_KEY", "anthropic", "@ai-sdk/anthropic");
|
|
226
|
+
try {
|
|
227
|
+
const { createAnthropic } = await import("@ai-sdk/anthropic");
|
|
228
|
+
const anthropic = createAnthropic({ apiKey: process.env.ANTHROPIC_API_KEY });
|
|
229
|
+
return anthropic(config.model);
|
|
230
|
+
} catch (err) {
|
|
231
|
+
if (isModuleNotFound(err)) notInstalled("@ai-sdk/anthropic");
|
|
232
|
+
throw err;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
async function buildOpenAIModel(config) {
|
|
236
|
+
assertKey("OPENAI_API_KEY", "openai", "@ai-sdk/openai");
|
|
237
|
+
try {
|
|
238
|
+
const { createOpenAI } = await import("@ai-sdk/openai");
|
|
239
|
+
const openai = createOpenAI({ apiKey: process.env.OPENAI_API_KEY });
|
|
240
|
+
return openai(config.model);
|
|
241
|
+
} catch (err) {
|
|
242
|
+
if (isModuleNotFound(err)) notInstalled("@ai-sdk/openai");
|
|
243
|
+
throw err;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
async function buildGoogleModel(config) {
|
|
247
|
+
assertKey("GOOGLE_GENERATIVE_AI_API_KEY", "google", "@ai-sdk/google");
|
|
248
|
+
try {
|
|
249
|
+
const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
|
|
250
|
+
const google = createGoogleGenerativeAI({ apiKey: process.env.GOOGLE_GENERATIVE_AI_API_KEY });
|
|
251
|
+
return google(config.model);
|
|
252
|
+
} catch (err) {
|
|
253
|
+
if (isModuleNotFound(err)) notInstalled("@ai-sdk/google");
|
|
254
|
+
throw err;
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
async function buildGroqModel(config) {
|
|
258
|
+
assertKey("GROQ_API_KEY", "groq", "@ai-sdk/groq");
|
|
259
|
+
try {
|
|
260
|
+
const { createGroq } = await import("@ai-sdk/groq");
|
|
261
|
+
const groq = createGroq({ apiKey: process.env.GROQ_API_KEY });
|
|
262
|
+
return groq(config.model);
|
|
263
|
+
} catch (err) {
|
|
264
|
+
if (isModuleNotFound(err)) notInstalled("@ai-sdk/groq");
|
|
265
|
+
throw err;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
async function buildMistralModel(config) {
|
|
269
|
+
assertKey("MISTRAL_API_KEY", "mistral", "@ai-sdk/mistral");
|
|
270
|
+
try {
|
|
271
|
+
const { createMistral } = await import("@ai-sdk/mistral");
|
|
272
|
+
const mistral = createMistral({ apiKey: process.env.MISTRAL_API_KEY });
|
|
273
|
+
return mistral(config.model);
|
|
274
|
+
} catch (err) {
|
|
275
|
+
if (isModuleNotFound(err)) notInstalled("@ai-sdk/mistral");
|
|
276
|
+
throw err;
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
function assertKey(envVar, provider, pkg) {
|
|
280
|
+
if (!process.env[envVar]) {
|
|
281
|
+
throw new AIProviderError(
|
|
282
|
+
`[ai-provider] ${envVar} is not set.
|
|
283
|
+
|
|
284
|
+
This is required for the "${provider}" provider.
|
|
285
|
+
|
|
286
|
+
1. Install the SDK: npm install ${pkg}
|
|
287
|
+
2. Set the key:
|
|
288
|
+
Local: add ${envVar}=<your-key> to .env.local
|
|
289
|
+
Vercel: add it in Project Settings \u2192 Environment Variables
|
|
290
|
+
AWS: add it to your task definition or Secrets Manager
|
|
291
|
+
GitHub CI: add it to repo secrets and reference as \${{ secrets.${envVar} }}
|
|
292
|
+
|
|
293
|
+
Get a key at: ${providerDashboard(provider)}`,
|
|
294
|
+
"AUTH_ERROR",
|
|
295
|
+
provider
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
function notInstalled(pkg) {
|
|
300
|
+
throw new AIProviderError(
|
|
301
|
+
`[ai-provider] ${pkg} is not installed.
|
|
302
|
+
Run: npm install ${pkg}`,
|
|
303
|
+
"UNKNOWN"
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
function isModuleNotFound(err) {
|
|
307
|
+
return err instanceof Error && "code" in err && err.code === "ERR_MODULE_NOT_FOUND";
|
|
308
|
+
}
|
|
309
|
+
function providerDashboard(provider) {
|
|
310
|
+
const dashboards = {
|
|
311
|
+
anthropic: "https://console.anthropic.com",
|
|
312
|
+
openai: "https://platform.openai.com/api-keys",
|
|
313
|
+
google: "https://aistudio.google.com/app/apikey",
|
|
314
|
+
groq: "https://console.groq.com/keys",
|
|
315
|
+
mistral: "https://console.mistral.ai/api-keys"
|
|
316
|
+
};
|
|
317
|
+
return dashboards[provider] ?? `https://${provider}.com`;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// src/cache.ts
|
|
321
|
+
var BoundedCache = class {
|
|
322
|
+
constructor(maxSize, ttlMs) {
|
|
323
|
+
this.store = /* @__PURE__ */ new Map();
|
|
324
|
+
this.maxSize = maxSize ?? parseInt(process.env.AI_CACHE_MAX_SIZE ?? "500", 10);
|
|
325
|
+
this.ttlMs = ttlMs ?? parseInt(process.env.AI_CACHE_TTL_MS ?? String(5 * 60 * 1e3), 10);
|
|
326
|
+
}
|
|
327
|
+
get(key) {
|
|
328
|
+
const entry = this.store.get(key);
|
|
329
|
+
if (!entry) return null;
|
|
330
|
+
if (Date.now() > entry.expiresAt) {
|
|
331
|
+
this.store.delete(key);
|
|
332
|
+
return null;
|
|
333
|
+
}
|
|
334
|
+
return entry.value;
|
|
335
|
+
}
|
|
336
|
+
set(key, value) {
|
|
337
|
+
if (this.store.size >= this.maxSize) {
|
|
338
|
+
const oldestKey = this.store.keys().next().value;
|
|
339
|
+
if (oldestKey) this.store.delete(oldestKey);
|
|
340
|
+
}
|
|
341
|
+
this.store.set(key, {
|
|
342
|
+
value,
|
|
343
|
+
expiresAt: Date.now() + this.ttlMs
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
delete(key) {
|
|
347
|
+
this.store.delete(key);
|
|
348
|
+
}
|
|
349
|
+
/** Clear all entries — useful between tests */
|
|
350
|
+
clear() {
|
|
351
|
+
this.store.clear();
|
|
352
|
+
}
|
|
353
|
+
get size() {
|
|
354
|
+
return this.store.size;
|
|
355
|
+
}
|
|
356
|
+
};
|
|
357
|
+
var responseCache = new BoundedCache();
|
|
358
|
+
|
|
359
|
+
// src/gateway.ts
|
|
360
|
+
var TIMEOUT_MS = {
|
|
361
|
+
ollama: parseInt(process.env.AI_TIMEOUT_MS ?? "60000", 10),
|
|
362
|
+
anthropic: parseInt(process.env.AI_TIMEOUT_MS ?? "30000", 10),
|
|
363
|
+
openai: parseInt(process.env.AI_TIMEOUT_MS ?? "30000", 10),
|
|
364
|
+
google: parseInt(process.env.AI_TIMEOUT_MS ?? "30000", 10),
|
|
365
|
+
groq: parseInt(process.env.AI_TIMEOUT_MS ?? "30000", 10),
|
|
366
|
+
mistral: parseInt(process.env.AI_TIMEOUT_MS ?? "30000", 10)
|
|
367
|
+
};
|
|
368
|
+
async function generateStructured(options) {
|
|
369
|
+
const config = resolveProvider();
|
|
370
|
+
const cached = responseCache.get(options.cacheKey ?? "");
|
|
371
|
+
if (cached && options.cacheKey) {
|
|
372
|
+
return { data: cached, provider: config.provider, model: config.model, fromCache: true };
|
|
373
|
+
}
|
|
374
|
+
guardTokenBudget(options.systemPrompt + options.prompt, options.maxInputTokens ?? 8e3);
|
|
375
|
+
const model = await buildModel(config);
|
|
376
|
+
const messages = buildMessages(config, options.systemPrompt, options.prompt);
|
|
377
|
+
const timeout = TIMEOUT_MS[config.provider];
|
|
378
|
+
const result = await withRetry(
|
|
379
|
+
() => withTimeout(
|
|
380
|
+
() => generateObject({ model, messages, schema: options.schema, maxTokens: config.maxTokens }),
|
|
381
|
+
timeout
|
|
382
|
+
),
|
|
383
|
+
config.provider
|
|
384
|
+
);
|
|
385
|
+
return buildResponse(result.object, result.usage, config, options.cacheKey);
|
|
386
|
+
}
|
|
387
|
+
async function generatePlainText(options) {
|
|
388
|
+
const config = resolveProvider();
|
|
389
|
+
const cached = responseCache.get(options.cacheKey ?? "");
|
|
390
|
+
if (cached && options.cacheKey) {
|
|
391
|
+
return { data: cached, provider: config.provider, model: config.model, fromCache: true };
|
|
392
|
+
}
|
|
393
|
+
guardTokenBudget(options.systemPrompt + options.prompt, options.maxInputTokens ?? 8e3);
|
|
394
|
+
const model = await buildModel(config);
|
|
395
|
+
const messages = buildMessages(config, options.systemPrompt, options.prompt);
|
|
396
|
+
const timeout = TIMEOUT_MS[config.provider];
|
|
397
|
+
const result = await withRetry(
|
|
398
|
+
() => withTimeout(
|
|
399
|
+
() => generateText({ model, messages, maxTokens: config.maxTokens }),
|
|
400
|
+
timeout
|
|
401
|
+
),
|
|
402
|
+
config.provider
|
|
403
|
+
);
|
|
404
|
+
return buildResponse(result.text, result.usage, config, options.cacheKey);
|
|
405
|
+
}
|
|
406
|
+
function buildMessages(config, systemPrompt, userPrompt) {
|
|
407
|
+
if (config.usePromptCache) {
|
|
408
|
+
return [
|
|
409
|
+
{ role: "system", content: systemPrompt },
|
|
410
|
+
{
|
|
411
|
+
role: "user",
|
|
412
|
+
content: [
|
|
413
|
+
{
|
|
414
|
+
type: "text",
|
|
415
|
+
text: userPrompt,
|
|
416
|
+
experimental_providerMetadata: {
|
|
417
|
+
anthropic: { cacheControl: { type: "ephemeral" } }
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
]
|
|
421
|
+
}
|
|
422
|
+
];
|
|
423
|
+
}
|
|
424
|
+
return [
|
|
425
|
+
{ role: "system", content: systemPrompt },
|
|
426
|
+
{ role: "user", content: userPrompt }
|
|
427
|
+
];
|
|
428
|
+
}
|
|
429
|
+
function buildResponse(data, usage, config, cacheKey) {
|
|
430
|
+
if (cacheKey) responseCache.set(cacheKey, data);
|
|
431
|
+
let cachedTokens = 0;
|
|
432
|
+
if (usage?.experimental_providerMetadata) {
|
|
433
|
+
const meta = usage.experimental_providerMetadata;
|
|
434
|
+
const anthropic = meta?.anthropic;
|
|
435
|
+
cachedTokens = (anthropic?.cacheReadInputTokens ?? 0) + (anthropic?.cacheCreationInputTokens ?? 0);
|
|
436
|
+
}
|
|
437
|
+
const response = {
|
|
438
|
+
data,
|
|
439
|
+
usage: usage ? {
|
|
440
|
+
inputTokens: usage.promptTokens,
|
|
441
|
+
outputTokens: usage.completionTokens,
|
|
442
|
+
cachedTokens
|
|
443
|
+
} : void 0,
|
|
444
|
+
provider: config.provider,
|
|
445
|
+
model: config.model,
|
|
446
|
+
fromCache: false
|
|
447
|
+
};
|
|
448
|
+
logUsage(config, response);
|
|
449
|
+
return response;
|
|
450
|
+
}
|
|
451
|
+
function logUsage(config, response) {
|
|
452
|
+
const isDev = config.env === "development";
|
|
453
|
+
const forceLog = process.env.AI_LOG_USAGE === "true";
|
|
454
|
+
if (!isDev && !forceLog) return;
|
|
455
|
+
const u = response.usage;
|
|
456
|
+
const width = 41;
|
|
457
|
+
const line = (label, value) => {
|
|
458
|
+
const content = ` ${label.padEnd(10)} ${value}`;
|
|
459
|
+
const pad = width - content.length - 1;
|
|
460
|
+
return `\x1B[2m[ai-provider]\x1B[0m \u2502${content}${" ".repeat(Math.max(0, pad))}\u2502`;
|
|
461
|
+
};
|
|
462
|
+
const bar = (char) => `\x1B[2m[ai-provider]\x1B[0m ${char}${"\u2500".repeat(width)}${char === "\u250C" ? "\u2510" : "\u2518"}`;
|
|
463
|
+
const lines = [bar("\u250C")];
|
|
464
|
+
lines.push(line("provider", `\x1B[36m${config.provider}\x1B[0m \x1B[2m(${config.env})\x1B[0m`));
|
|
465
|
+
lines.push(line("model", `\x1B[36m${config.model}\x1B[0m`));
|
|
466
|
+
if (response.fromCache) {
|
|
467
|
+
lines.push(line("tokens", "\x1B[2mskipped \u2014 response cache hit\x1B[0m"));
|
|
468
|
+
} else if (u) {
|
|
469
|
+
const tokenStr = `in: \x1B[33m${u.inputTokens}\x1B[0m out: \x1B[33m${u.outputTokens}\x1B[0m`;
|
|
470
|
+
lines.push(line("tokens", tokenStr));
|
|
471
|
+
if (u.cachedTokens > 0) {
|
|
472
|
+
const pct = Math.round(u.cachedTokens / u.inputTokens * 100);
|
|
473
|
+
lines.push(line("cached", `\x1B[32m${u.cachedTokens} tokens (${pct}% of input)\x1B[0m`));
|
|
474
|
+
}
|
|
475
|
+
} else {
|
|
476
|
+
lines.push(line("tokens", "\x1B[2munavailable\x1B[0m"));
|
|
477
|
+
}
|
|
478
|
+
lines.push(bar("\u2514"));
|
|
479
|
+
lines.forEach((l) => console.log(l));
|
|
480
|
+
}
|
|
481
|
+
function guardTokenBudget(text, maxTokens) {
|
|
482
|
+
const estimate = Math.ceil(text.length / 4);
|
|
483
|
+
if (estimate > maxTokens) {
|
|
484
|
+
throw new AIProviderError(
|
|
485
|
+
`[ai-provider] Input exceeds token budget.
|
|
486
|
+
Estimated ~${estimate} tokens, limit is ${maxTokens}.
|
|
487
|
+
Trim your prompt or raise maxInputTokens in your call options.`,
|
|
488
|
+
"TOKEN_BUDGET"
|
|
489
|
+
);
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
async function withTimeout(fn, timeoutMs) {
|
|
493
|
+
let timer;
|
|
494
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
495
|
+
timer = setTimeout(() => {
|
|
496
|
+
reject(new AIProviderError(
|
|
497
|
+
`[ai-provider] Request timed out after ${timeoutMs}ms.
|
|
498
|
+
If using Ollama locally, the model may still be loading \u2014 try again in a few seconds.
|
|
499
|
+
Override timeout: AI_TIMEOUT_MS=90000`,
|
|
500
|
+
"TIMEOUT"
|
|
501
|
+
));
|
|
502
|
+
}, timeoutMs);
|
|
503
|
+
});
|
|
504
|
+
try {
|
|
505
|
+
const result = await Promise.race([fn(), timeoutPromise]);
|
|
506
|
+
clearTimeout(timer);
|
|
507
|
+
return result;
|
|
508
|
+
} catch (err) {
|
|
509
|
+
clearTimeout(timer);
|
|
510
|
+
throw err;
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
async function withRetry(fn, provider, maxRetries = 2) {
|
|
514
|
+
let lastError;
|
|
515
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
516
|
+
try {
|
|
517
|
+
return await fn();
|
|
518
|
+
} catch (err) {
|
|
519
|
+
const wrapped = err instanceof AIProviderError ? err : wrapError(err, provider);
|
|
520
|
+
if (!isRetryable(wrapped.code)) {
|
|
521
|
+
throw wrapped;
|
|
522
|
+
}
|
|
523
|
+
lastError = wrapped;
|
|
524
|
+
if (attempt < maxRetries) {
|
|
525
|
+
const backoff = 500 * Math.pow(2, attempt);
|
|
526
|
+
if (process.env.AI_LOG_USAGE === "true") {
|
|
527
|
+
console.warn(`[ai-provider] Retrying (attempt ${attempt + 1}/${maxRetries}) after ${backoff}ms \u2014 ${wrapped.code}`);
|
|
528
|
+
}
|
|
529
|
+
await new Promise((r) => setTimeout(r, backoff));
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
throw lastError;
|
|
534
|
+
}
|
|
535
|
+
export {
|
|
536
|
+
AIProviderError,
|
|
537
|
+
generatePlainText,
|
|
538
|
+
generateStructured,
|
|
539
|
+
resolveProvider,
|
|
540
|
+
responseCache
|
|
541
|
+
};
|