@almadar/llm 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +72 -0
- package/dist/chunk-KH4JNOLT.js +174 -0
- package/dist/chunk-KH4JNOLT.js.map +1 -0
- package/dist/chunk-MJS33AAS.js +234 -0
- package/dist/chunk-MJS33AAS.js.map +1 -0
- package/dist/chunk-PV3G5PJS.js +633 -0
- package/dist/chunk-PV3G5PJS.js.map +1 -0
- package/dist/chunk-WM7QVK2Z.js +192 -0
- package/dist/chunk-WM7QVK2Z.js.map +1 -0
- package/dist/client.d.ts +136 -0
- package/dist/client.js +39 -0
- package/dist/client.js.map +1 -0
- package/dist/index.d.ts +67 -0
- package/dist/index.js +477 -0
- package/dist/index.js.map +1 -0
- package/dist/json-parser.d.ts +43 -0
- package/dist/json-parser.js +15 -0
- package/dist/json-parser.js.map +1 -0
- package/dist/rate-limiter-9XAWfHwe.d.ts +98 -0
- package/dist/structured-output.d.ts +113 -0
- package/dist/structured-output.js +16 -0
- package/dist/structured-output.js.map +1 -0
- package/package.json +55 -0
- package/src/client.ts +967 -0
- package/src/continuation.ts +290 -0
- package/src/index.ts +87 -0
- package/src/json-parser.ts +273 -0
- package/src/rate-limiter.ts +237 -0
- package/src/structured-output.ts +330 -0
- package/src/token-tracker.ts +116 -0
- package/src/truncation-detector.ts +308 -0
|
@@ -0,0 +1,633 @@
|
|
|
1
|
+
import {
|
|
2
|
+
parseJsonResponse
|
|
3
|
+
} from "./chunk-WM7QVK2Z.js";
|
|
4
|
+
import {
|
|
5
|
+
RateLimiter,
|
|
6
|
+
getGlobalRateLimiter,
|
|
7
|
+
getGlobalTokenTracker
|
|
8
|
+
} from "./chunk-MJS33AAS.js";
|
|
9
|
+
|
|
10
|
+
// src/client.ts
|
|
11
|
+
import { ChatOpenAI } from "@langchain/openai";
|
|
12
|
+
import {
|
|
13
|
+
ChatAnthropic
|
|
14
|
+
} from "@langchain/anthropic";
|
|
15
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
16
|
+
var CachingChatAnthropic = class extends ChatAnthropic {
|
|
17
|
+
async invoke(input, options) {
|
|
18
|
+
let messages;
|
|
19
|
+
if (typeof input === "string") {
|
|
20
|
+
messages = [{ role: "user", content: input }];
|
|
21
|
+
} else {
|
|
22
|
+
messages = input;
|
|
23
|
+
}
|
|
24
|
+
const transformedMessages = messages.map((msg) => {
|
|
25
|
+
const msgType = msg._getType?.() || msg.role || "unknown";
|
|
26
|
+
const isSystem = msgType === "system";
|
|
27
|
+
if (!isSystem) return msg;
|
|
28
|
+
if (typeof msg.content === "string") {
|
|
29
|
+
return {
|
|
30
|
+
...msg,
|
|
31
|
+
content: [
|
|
32
|
+
{
|
|
33
|
+
type: "text",
|
|
34
|
+
text: msg.content,
|
|
35
|
+
cache_control: { type: "ephemeral" }
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
if (Array.isArray(msg.content)) {
|
|
41
|
+
const blocks = msg.content;
|
|
42
|
+
const hasAnyCacheControl = blocks.some((b) => b.cache_control);
|
|
43
|
+
if (!hasAnyCacheControl) {
|
|
44
|
+
const transformedBlocks = blocks.map((block, idx) => {
|
|
45
|
+
if (block.type === "text" && idx === blocks.length - 1) {
|
|
46
|
+
return {
|
|
47
|
+
...block,
|
|
48
|
+
cache_control: { type: "ephemeral" }
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
return block;
|
|
52
|
+
});
|
|
53
|
+
return { ...msg, content: transformedBlocks };
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return msg;
|
|
57
|
+
});
|
|
58
|
+
return super.invoke(transformedMessages, options);
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
var PROVIDER_CONFIGS = {
|
|
62
|
+
openai: () => {
|
|
63
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
64
|
+
if (!apiKey) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
"OPENAI_API_KEY environment variable is not set. Please set it in your .env file or environment."
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
return { apiKey, baseUrl: void 0, defaultModel: "gpt-4o" };
|
|
70
|
+
},
|
|
71
|
+
deepseek: () => {
|
|
72
|
+
const apiKey = process.env.DEEPSEEK_API_KEY;
|
|
73
|
+
if (!apiKey) {
|
|
74
|
+
throw new Error(
|
|
75
|
+
"DEEPSEEK_API_KEY environment variable is not set. Please set it in your .env file or environment."
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
apiKey,
|
|
80
|
+
baseUrl: "https://api.deepseek.com/v1",
|
|
81
|
+
defaultModel: "deepseek-chat"
|
|
82
|
+
};
|
|
83
|
+
},
|
|
84
|
+
anthropic: () => {
|
|
85
|
+
const apiKey = process.env.ANTHROPIC_API_KEY;
|
|
86
|
+
if (!apiKey) {
|
|
87
|
+
throw new Error(
|
|
88
|
+
"ANTHROPIC_API_KEY environment variable is not set. Please set it in your .env file or environment."
|
|
89
|
+
);
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
apiKey,
|
|
93
|
+
baseUrl: void 0,
|
|
94
|
+
defaultModel: "claude-sonnet-4-5-20250929"
|
|
95
|
+
};
|
|
96
|
+
},
|
|
97
|
+
kimi: () => {
|
|
98
|
+
const apiKey = process.env.KIMI_API_KEY;
|
|
99
|
+
if (!apiKey) {
|
|
100
|
+
throw new Error(
|
|
101
|
+
"KIMI_API_KEY environment variable is not set. Please set it in your .env file or environment."
|
|
102
|
+
);
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
apiKey,
|
|
106
|
+
baseUrl: "https://api.moonshot.cn/v1",
|
|
107
|
+
defaultModel: "kimi-k2.5"
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
};
|
|
111
|
+
var DEEPSEEK_MODELS = {
|
|
112
|
+
CHAT: "deepseek-chat",
|
|
113
|
+
CODER: "deepseek-coder",
|
|
114
|
+
REASONER: "deepseek-reasoner"
|
|
115
|
+
};
|
|
116
|
+
var OPENAI_MODELS = {
|
|
117
|
+
GPT4O: "gpt-4o",
|
|
118
|
+
GPT4O_MINI: "gpt-4o-mini",
|
|
119
|
+
GPT4_TURBO: "gpt-4-turbo",
|
|
120
|
+
GPT35_TURBO: "gpt-3.5-turbo",
|
|
121
|
+
GPT_5_1: "gpt-5.1"
|
|
122
|
+
};
|
|
123
|
+
var ANTHROPIC_MODELS = {
|
|
124
|
+
CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929",
|
|
125
|
+
CLAUDE_SONNET_4: "claude-sonnet-4-20250514",
|
|
126
|
+
CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929",
|
|
127
|
+
CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022"
|
|
128
|
+
};
|
|
129
|
+
var KIMI_MODELS = {
|
|
130
|
+
K2_5: "kimi-k2.5"
|
|
131
|
+
};
|
|
132
|
+
var DEFAULT_TEMPERATURE = 0.3;
|
|
133
|
+
var LLMClient = class {
|
|
134
|
+
constructor(options = {}) {
|
|
135
|
+
this.provider = options.provider || "openai";
|
|
136
|
+
this.temperature = options.temperature ?? DEFAULT_TEMPERATURE;
|
|
137
|
+
this.streaming = options.streaming ?? false;
|
|
138
|
+
this.providerConfig = PROVIDER_CONFIGS[this.provider]();
|
|
139
|
+
this.modelName = options.model || this.providerConfig.defaultModel;
|
|
140
|
+
const keyPreview = this.providerConfig.apiKey.slice(-4);
|
|
141
|
+
console.log(
|
|
142
|
+
`[LLMClient] Provider: ${this.provider}, Model: ${this.modelName}, Key: ****${keyPreview}`
|
|
143
|
+
);
|
|
144
|
+
if (this.providerConfig.baseUrl) {
|
|
145
|
+
console.log(
|
|
146
|
+
`[LLMClient] Using custom base URL: ${this.providerConfig.baseUrl}`
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
this.model = this.createModel();
|
|
150
|
+
this.rateLimiter = options.useGlobalRateLimiter !== false ? getGlobalRateLimiter(options.rateLimiter) : new RateLimiter(options.rateLimiter);
|
|
151
|
+
this.tokenTracker = options.trackTokens !== false ? getGlobalTokenTracker(this.modelName) : null;
|
|
152
|
+
}
|
|
153
|
+
usesMaxCompletionTokens() {
|
|
154
|
+
const model = this.modelName.toLowerCase();
|
|
155
|
+
return model.startsWith("o1") || model.startsWith("gpt-5") || model.includes("o1-") || model.includes("o3");
|
|
156
|
+
}
|
|
157
|
+
createModel(options) {
|
|
158
|
+
const maxTokens = options?.maxTokens;
|
|
159
|
+
const temperature = options?.temperature ?? this.temperature;
|
|
160
|
+
if (this.provider === "anthropic") {
|
|
161
|
+
return new CachingChatAnthropic({
|
|
162
|
+
anthropicApiKey: this.providerConfig.apiKey,
|
|
163
|
+
modelName: this.modelName,
|
|
164
|
+
temperature,
|
|
165
|
+
streaming: this.streaming,
|
|
166
|
+
maxTokens: maxTokens || 8192,
|
|
167
|
+
callbacks: [
|
|
168
|
+
{
|
|
169
|
+
handleLLMEnd: (output) => {
|
|
170
|
+
const generation = output.generations?.[0]?.[0];
|
|
171
|
+
const usage = generation?.message?.usage_metadata;
|
|
172
|
+
if (usage) {
|
|
173
|
+
const cacheCreated = usage.cache_creation_input_tokens ?? 0;
|
|
174
|
+
const cacheRead = usage.cache_read_input_tokens ?? 0;
|
|
175
|
+
const inputTokens = usage.input_tokens ?? 0;
|
|
176
|
+
const outputTokens = usage.output_tokens ?? 0;
|
|
177
|
+
if (cacheCreated > 0) {
|
|
178
|
+
console.log(
|
|
179
|
+
`[LLMClient:Anthropic] Cache WRITE: ${cacheCreated} tokens cached`
|
|
180
|
+
);
|
|
181
|
+
}
|
|
182
|
+
if (cacheRead > 0) {
|
|
183
|
+
const savingsPercent = Math.round(
|
|
184
|
+
cacheRead / (cacheRead + inputTokens) * 100
|
|
185
|
+
);
|
|
186
|
+
console.log(
|
|
187
|
+
`[LLMClient:Anthropic] Cache HIT: ${cacheRead} tokens (~${savingsPercent}% of prompt)`
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
if (cacheCreated === 0 && cacheRead === 0 && inputTokens > 0) {
|
|
191
|
+
if (inputTokens < 500) {
|
|
192
|
+
console.log(
|
|
193
|
+
`[LLMClient:Anthropic] ${inputTokens} input, ${outputTokens} output tokens (likely cached)`
|
|
194
|
+
);
|
|
195
|
+
} else {
|
|
196
|
+
console.log(
|
|
197
|
+
`[LLMClient:Anthropic] ${inputTokens} input, ${outputTokens} output tokens`
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
]
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
const useCompletionTokens = this.usesMaxCompletionTokens();
|
|
208
|
+
const tokenConfig = maxTokens ? useCompletionTokens ? { modelKwargs: { max_completion_tokens: maxTokens } } : { maxTokens } : {};
|
|
209
|
+
const timeout = this.provider === "deepseek" ? 6e5 : void 0;
|
|
210
|
+
return new ChatOpenAI({
|
|
211
|
+
openAIApiKey: this.providerConfig.apiKey,
|
|
212
|
+
modelName: this.modelName,
|
|
213
|
+
temperature: useCompletionTokens ? void 0 : temperature,
|
|
214
|
+
streaming: this.streaming,
|
|
215
|
+
timeout,
|
|
216
|
+
...tokenConfig,
|
|
217
|
+
configuration: {
|
|
218
|
+
apiKey: this.providerConfig.apiKey,
|
|
219
|
+
...this.providerConfig.baseUrl ? { baseURL: this.providerConfig.baseUrl } : {}
|
|
220
|
+
}
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
getModelWithOptions(options) {
|
|
224
|
+
return this.createModel(options);
|
|
225
|
+
}
|
|
226
|
+
getProvider() {
|
|
227
|
+
return this.provider;
|
|
228
|
+
}
|
|
229
|
+
getModelName() {
|
|
230
|
+
return this.modelName;
|
|
231
|
+
}
|
|
232
|
+
getModel() {
|
|
233
|
+
return this.model;
|
|
234
|
+
}
|
|
235
|
+
getRateLimiterStatus() {
|
|
236
|
+
return this.rateLimiter.getStatus();
|
|
237
|
+
}
|
|
238
|
+
getTokenUsage() {
|
|
239
|
+
return this.tokenTracker?.getSummary() ?? null;
|
|
240
|
+
}
|
|
241
|
+
async call(options) {
|
|
242
|
+
const response = await this.callWithMetadata(options);
|
|
243
|
+
return response.data;
|
|
244
|
+
}
|
|
245
|
+
async callWithMetadata(options) {
|
|
246
|
+
const {
|
|
247
|
+
systemPrompt,
|
|
248
|
+
userPrompt,
|
|
249
|
+
schema,
|
|
250
|
+
maxRetries = 2,
|
|
251
|
+
retryWithContext = true,
|
|
252
|
+
maxTokens,
|
|
253
|
+
skipSchemaValidation = false,
|
|
254
|
+
temperature
|
|
255
|
+
} = options;
|
|
256
|
+
let currentPrompt = userPrompt;
|
|
257
|
+
let lastError = null;
|
|
258
|
+
console.log(
|
|
259
|
+
`[LLMClient:call] Starting call to ${this.provider}/${this.modelName}`
|
|
260
|
+
);
|
|
261
|
+
console.log(`[LLMClient:call] Prompt length: ${userPrompt.length} chars`);
|
|
262
|
+
if (maxTokens) {
|
|
263
|
+
console.log(`[LLMClient:call] Max tokens: ${maxTokens}`);
|
|
264
|
+
}
|
|
265
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
266
|
+
try {
|
|
267
|
+
console.log(
|
|
268
|
+
`[LLMClient:call] Attempt ${attempt + 1}/${maxRetries + 1}...`
|
|
269
|
+
);
|
|
270
|
+
const attemptStartTime = Date.now();
|
|
271
|
+
const result = await this.rateLimiter.execute(async () => {
|
|
272
|
+
console.log(`[LLMClient:call] Invoking model...`);
|
|
273
|
+
const invokeStartTime = Date.now();
|
|
274
|
+
const modelToUse = maxTokens || temperature !== void 0 ? this.getModelWithOptions({ maxTokens, temperature }) : this.model;
|
|
275
|
+
const response = await modelToUse.invoke([
|
|
276
|
+
{ role: "system", content: systemPrompt },
|
|
277
|
+
{ role: "user", content: currentPrompt }
|
|
278
|
+
]);
|
|
279
|
+
console.log(
|
|
280
|
+
`[LLMClient:call] Model responded in ${Date.now() - invokeStartTime}ms`
|
|
281
|
+
);
|
|
282
|
+
let usage = null;
|
|
283
|
+
if (response.usage_metadata) {
|
|
284
|
+
const usageMeta = response.usage_metadata;
|
|
285
|
+
usage = {
|
|
286
|
+
promptTokens: usageMeta.input_tokens || 0,
|
|
287
|
+
completionTokens: usageMeta.output_tokens || 0,
|
|
288
|
+
totalTokens: (usageMeta.input_tokens || 0) + (usageMeta.output_tokens || 0)
|
|
289
|
+
};
|
|
290
|
+
console.log(
|
|
291
|
+
`[LLMClient:call] Tokens used: ${usage.promptTokens} in, ${usage.completionTokens} out`
|
|
292
|
+
);
|
|
293
|
+
if (this.tokenTracker) {
|
|
294
|
+
this.tokenTracker.addUsage(
|
|
295
|
+
usage.promptTokens,
|
|
296
|
+
usage.completionTokens
|
|
297
|
+
);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
const finishReason = this.extractFinishReason(response);
|
|
301
|
+
if (finishReason === "length") {
|
|
302
|
+
console.warn(
|
|
303
|
+
`[LLMClient:call] Response truncated (finish_reason=length)`
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
const content = typeof response.content === "string" ? response.content : JSON.stringify(response.content);
|
|
307
|
+
console.log(
|
|
308
|
+
`[LLMClient:call] Response length: ${content.length} chars, finish_reason: ${finishReason}`
|
|
309
|
+
);
|
|
310
|
+
return { content, finishReason, usage };
|
|
311
|
+
});
|
|
312
|
+
console.log(
|
|
313
|
+
`[LLMClient:call] Attempt ${attempt + 1} completed in ${Date.now() - attemptStartTime}ms, parsing response...`
|
|
314
|
+
);
|
|
315
|
+
const parsed = skipSchemaValidation ? parseJsonResponse(result.content, void 0) : parseJsonResponse(result.content, schema);
|
|
316
|
+
console.log(
|
|
317
|
+
`[LLMClient:call] Response parsed successfully${skipSchemaValidation ? " (schema validation skipped)" : ""}`
|
|
318
|
+
);
|
|
319
|
+
return {
|
|
320
|
+
data: parsed,
|
|
321
|
+
raw: result.content,
|
|
322
|
+
finishReason: result.finishReason,
|
|
323
|
+
usage: result.usage
|
|
324
|
+
};
|
|
325
|
+
} catch (error) {
|
|
326
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
327
|
+
console.error(
|
|
328
|
+
`[LLMClient:call] Attempt ${attempt + 1} failed:`,
|
|
329
|
+
lastError.message
|
|
330
|
+
);
|
|
331
|
+
if (this.isRateLimitError(lastError)) {
|
|
332
|
+
console.error(`[LLMClient:call] Rate limit error, not retrying`);
|
|
333
|
+
throw lastError;
|
|
334
|
+
}
|
|
335
|
+
if (attempt < maxRetries && retryWithContext) {
|
|
336
|
+
console.log(`[LLMClient:call] Will retry with error context`);
|
|
337
|
+
currentPrompt = `${userPrompt}
|
|
338
|
+
|
|
339
|
+
[Previous attempt failed with: ${lastError.message}]
|
|
340
|
+
Please output valid JSON that matches the expected schema.`;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
console.error(`[LLMClient:call] All attempts exhausted, throwing error`);
|
|
345
|
+
throw lastError;
|
|
346
|
+
}
|
|
347
|
+
extractFinishReason(response) {
|
|
348
|
+
const metadata = response.response_metadata;
|
|
349
|
+
if (metadata?.finish_reason) {
|
|
350
|
+
const reason = metadata.finish_reason;
|
|
351
|
+
if (reason === "stop" || reason === "length" || reason === "content_filter" || reason === "tool_calls") {
|
|
352
|
+
return reason;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return null;
|
|
356
|
+
}
|
|
357
|
+
async callRaw(options) {
|
|
358
|
+
const response = await this.callRawWithMetadata(options);
|
|
359
|
+
return response.raw;
|
|
360
|
+
}
|
|
361
|
+
async callRawWithMetadata(options) {
|
|
362
|
+
const { systemPrompt, userPrompt, maxTokens } = options;
|
|
363
|
+
return this.rateLimiter.execute(async () => {
|
|
364
|
+
const modelToUse = maxTokens ? this.getModelWithOptions({ maxTokens }) : this.model;
|
|
365
|
+
const response = await modelToUse.invoke([
|
|
366
|
+
{ role: "system", content: systemPrompt },
|
|
367
|
+
{ role: "user", content: userPrompt }
|
|
368
|
+
]);
|
|
369
|
+
let usage = null;
|
|
370
|
+
if (response.usage_metadata) {
|
|
371
|
+
const usageMeta = response.usage_metadata;
|
|
372
|
+
usage = {
|
|
373
|
+
promptTokens: usageMeta.input_tokens || 0,
|
|
374
|
+
completionTokens: usageMeta.output_tokens || 0,
|
|
375
|
+
totalTokens: (usageMeta.input_tokens || 0) + (usageMeta.output_tokens || 0)
|
|
376
|
+
};
|
|
377
|
+
if (this.tokenTracker) {
|
|
378
|
+
this.tokenTracker.addUsage(
|
|
379
|
+
usage.promptTokens,
|
|
380
|
+
usage.completionTokens
|
|
381
|
+
);
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
const finishReason = this.extractFinishReason(response);
|
|
385
|
+
const content = typeof response.content === "string" ? response.content : JSON.stringify(response.content);
|
|
386
|
+
return { raw: content, finishReason, usage };
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
isRateLimitError(error) {
|
|
390
|
+
const message = error.message.toLowerCase();
|
|
391
|
+
return message.includes("rate limit") || message.includes("429") || message.includes("quota exceeded");
|
|
392
|
+
}
|
|
393
|
+
// ==========================================================================
|
|
394
|
+
// Anthropic Cache Control Support
|
|
395
|
+
// ==========================================================================
|
|
396
|
+
async callWithCache(options) {
|
|
397
|
+
const {
|
|
398
|
+
systemPrompt,
|
|
399
|
+
userPrompt,
|
|
400
|
+
systemBlocks,
|
|
401
|
+
userBlocks,
|
|
402
|
+
schema,
|
|
403
|
+
maxRetries = 2,
|
|
404
|
+
maxTokens,
|
|
405
|
+
skipSchemaValidation = false,
|
|
406
|
+
temperature,
|
|
407
|
+
rawText = false
|
|
408
|
+
} = options;
|
|
409
|
+
if (this.provider !== "anthropic") {
|
|
410
|
+
console.log(
|
|
411
|
+
`[LLMClient:callWithCache] Provider ${this.provider} doesn't support caching, using regular call`
|
|
412
|
+
);
|
|
413
|
+
return this.callWithMetadata(options);
|
|
414
|
+
}
|
|
415
|
+
const cacheableCount = (systemBlocks || []).filter((b) => b.cache_control).length + (userBlocks || []).filter((b) => b.cache_control).length;
|
|
416
|
+
console.log(
|
|
417
|
+
`[LLMClient:callWithCache] ${cacheableCount} cacheable block(s)`
|
|
418
|
+
);
|
|
419
|
+
let lastError = null;
|
|
420
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
421
|
+
try {
|
|
422
|
+
console.log(
|
|
423
|
+
`[LLMClient:callWithCache] Attempt ${attempt + 1}/${maxRetries + 1}...`
|
|
424
|
+
);
|
|
425
|
+
const result = await this.rateLimiter.execute(async () => {
|
|
426
|
+
const anthropic = new Anthropic();
|
|
427
|
+
const systemContent = systemBlocks && systemBlocks.length > 0 ? systemBlocks.map((b) => ({
|
|
428
|
+
type: "text",
|
|
429
|
+
text: b.text,
|
|
430
|
+
...b.cache_control ? { cache_control: b.cache_control } : {}
|
|
431
|
+
})) : systemPrompt ? [{ type: "text", text: systemPrompt }] : [];
|
|
432
|
+
const userContent = userBlocks && userBlocks.length > 0 ? userBlocks.map((b) => ({
|
|
433
|
+
type: "text",
|
|
434
|
+
text: b.text,
|
|
435
|
+
...b.cache_control ? { cache_control: b.cache_control } : {}
|
|
436
|
+
})) : userPrompt ? [{ type: "text", text: userPrompt }] : [];
|
|
437
|
+
const response = await anthropic.messages.create({
|
|
438
|
+
model: this.modelName,
|
|
439
|
+
max_tokens: maxTokens || 8192,
|
|
440
|
+
temperature: temperature ?? 0,
|
|
441
|
+
system: systemContent,
|
|
442
|
+
messages: [{ role: "user", content: userContent }]
|
|
443
|
+
});
|
|
444
|
+
const textContent = response.content.find((c) => c.type === "text");
|
|
445
|
+
const content = textContent && "text" in textContent ? textContent.text : "";
|
|
446
|
+
const apiUsage = response.usage;
|
|
447
|
+
const cacheRead = apiUsage.cache_read_input_tokens || 0;
|
|
448
|
+
const cacheCreation = apiUsage.cache_creation_input_tokens || 0;
|
|
449
|
+
if (cacheCreation > 0) {
|
|
450
|
+
console.log(
|
|
451
|
+
`[LLMClient:callWithCache] Cache WRITE: ${cacheCreation} tokens`
|
|
452
|
+
);
|
|
453
|
+
}
|
|
454
|
+
if (cacheRead > 0) {
|
|
455
|
+
const savingsPercent = Math.round(
|
|
456
|
+
cacheRead / (cacheRead + apiUsage.input_tokens) * 100
|
|
457
|
+
);
|
|
458
|
+
console.log(
|
|
459
|
+
`[LLMClient:callWithCache] Cache HIT: ${cacheRead} tokens (~${savingsPercent}% of prompt)`
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
if (cacheCreation === 0 && cacheRead === 0) {
|
|
463
|
+
console.log(
|
|
464
|
+
`[LLMClient:callWithCache] No caching: ${apiUsage.input_tokens} input tokens`
|
|
465
|
+
);
|
|
466
|
+
}
|
|
467
|
+
const usage = {
|
|
468
|
+
promptTokens: apiUsage.input_tokens,
|
|
469
|
+
completionTokens: apiUsage.output_tokens,
|
|
470
|
+
totalTokens: apiUsage.input_tokens + apiUsage.output_tokens
|
|
471
|
+
};
|
|
472
|
+
if (this.tokenTracker) {
|
|
473
|
+
this.tokenTracker.addUsage(
|
|
474
|
+
usage.promptTokens,
|
|
475
|
+
usage.completionTokens
|
|
476
|
+
);
|
|
477
|
+
}
|
|
478
|
+
const finishReason = response.stop_reason === "end_turn" ? "stop" : response.stop_reason;
|
|
479
|
+
return {
|
|
480
|
+
content,
|
|
481
|
+
finishReason,
|
|
482
|
+
usage
|
|
483
|
+
};
|
|
484
|
+
});
|
|
485
|
+
let parsed;
|
|
486
|
+
if (rawText) {
|
|
487
|
+
parsed = result.content;
|
|
488
|
+
} else if (skipSchemaValidation) {
|
|
489
|
+
parsed = parseJsonResponse(result.content, void 0);
|
|
490
|
+
} else {
|
|
491
|
+
parsed = parseJsonResponse(result.content, schema);
|
|
492
|
+
}
|
|
493
|
+
return {
|
|
494
|
+
data: parsed,
|
|
495
|
+
raw: result.content,
|
|
496
|
+
finishReason: result.finishReason,
|
|
497
|
+
usage: result.usage
|
|
498
|
+
};
|
|
499
|
+
} catch (error) {
|
|
500
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
501
|
+
console.error(
|
|
502
|
+
`[LLMClient:callWithCache] Attempt ${attempt + 1} failed:`,
|
|
503
|
+
lastError.message
|
|
504
|
+
);
|
|
505
|
+
if (this.isRateLimitError(lastError)) {
|
|
506
|
+
throw lastError;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
throw lastError;
|
|
511
|
+
}
|
|
512
|
+
static cacheableBlock(text, cache = true) {
|
|
513
|
+
return cache ? { type: "text", text, cache_control: { type: "ephemeral" } } : { type: "text", text };
|
|
514
|
+
}
|
|
515
|
+
};
|
|
516
|
+
var sharedClients = {};
|
|
517
|
+
function getSharedLLMClient(options) {
|
|
518
|
+
const provider = options?.provider || "openai";
|
|
519
|
+
if (!sharedClients[provider]) {
|
|
520
|
+
sharedClients[provider] = new LLMClient(options);
|
|
521
|
+
}
|
|
522
|
+
return sharedClients[provider];
|
|
523
|
+
}
|
|
524
|
+
function resetSharedLLMClient(provider) {
|
|
525
|
+
if (provider) {
|
|
526
|
+
delete sharedClients[provider];
|
|
527
|
+
} else {
|
|
528
|
+
for (const key of Object.keys(sharedClients)) {
|
|
529
|
+
delete sharedClients[key];
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
function getAvailableProvider() {
|
|
534
|
+
if (process.env.ANTHROPIC_API_KEY) return "anthropic";
|
|
535
|
+
if (process.env.DEEPSEEK_API_KEY) return "deepseek";
|
|
536
|
+
if (process.env.KIMI_API_KEY) return "kimi";
|
|
537
|
+
if (process.env.OPENAI_API_KEY) return "openai";
|
|
538
|
+
throw new Error(
|
|
539
|
+
"No LLM API key found. Please set ANTHROPIC_API_KEY, OPENAI_API_KEY, DEEPSEEK_API_KEY, or KIMI_API_KEY."
|
|
540
|
+
);
|
|
541
|
+
}
|
|
542
|
+
function isProviderAvailable(provider) {
|
|
543
|
+
switch (provider) {
|
|
544
|
+
case "openai":
|
|
545
|
+
return !!process.env.OPENAI_API_KEY;
|
|
546
|
+
case "deepseek":
|
|
547
|
+
return !!process.env.DEEPSEEK_API_KEY;
|
|
548
|
+
case "anthropic":
|
|
549
|
+
return !!process.env.ANTHROPIC_API_KEY;
|
|
550
|
+
case "kimi":
|
|
551
|
+
return !!process.env.KIMI_API_KEY;
|
|
552
|
+
default:
|
|
553
|
+
return false;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
function createRequirementsClient(options) {
|
|
557
|
+
const provider = options?.provider || getAvailableProvider();
|
|
558
|
+
const defaultModel = provider === "deepseek" ? DEEPSEEK_MODELS.CHAT : OPENAI_MODELS.GPT_5_1;
|
|
559
|
+
return new LLMClient({
|
|
560
|
+
provider,
|
|
561
|
+
model: defaultModel,
|
|
562
|
+
temperature: 0.3,
|
|
563
|
+
...options
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
function createCreativeClient(options) {
|
|
567
|
+
const provider = options?.provider || getAvailableProvider();
|
|
568
|
+
const defaultModel = provider === "deepseek" ? DEEPSEEK_MODELS.REASONER : OPENAI_MODELS.GPT4O;
|
|
569
|
+
return new LLMClient({
|
|
570
|
+
provider,
|
|
571
|
+
model: defaultModel,
|
|
572
|
+
temperature: 0.7,
|
|
573
|
+
...options
|
|
574
|
+
});
|
|
575
|
+
}
|
|
576
|
+
function createFixClient(options) {
|
|
577
|
+
const provider = options?.provider || getAvailableProvider();
|
|
578
|
+
const defaultModel = provider === "deepseek" ? DEEPSEEK_MODELS.CHAT : OPENAI_MODELS.GPT4O_MINI;
|
|
579
|
+
return new LLMClient({
|
|
580
|
+
provider,
|
|
581
|
+
model: defaultModel,
|
|
582
|
+
temperature: 0.2,
|
|
583
|
+
...options
|
|
584
|
+
});
|
|
585
|
+
}
|
|
586
|
+
function createDeepSeekClient(options) {
|
|
587
|
+
return new LLMClient({
|
|
588
|
+
provider: "deepseek",
|
|
589
|
+
model: DEEPSEEK_MODELS.CHAT,
|
|
590
|
+
...options
|
|
591
|
+
});
|
|
592
|
+
}
|
|
593
|
+
function createOpenAIClient(options) {
|
|
594
|
+
return new LLMClient({
|
|
595
|
+
provider: "openai",
|
|
596
|
+
model: OPENAI_MODELS.GPT4O,
|
|
597
|
+
...options
|
|
598
|
+
});
|
|
599
|
+
}
|
|
600
|
+
function createAnthropicClient(options) {
|
|
601
|
+
return new LLMClient({
|
|
602
|
+
provider: "anthropic",
|
|
603
|
+
model: ANTHROPIC_MODELS.CLAUDE_SONNET_4_5,
|
|
604
|
+
...options
|
|
605
|
+
});
|
|
606
|
+
}
|
|
607
|
+
function createKimiClient(options) {
|
|
608
|
+
return new LLMClient({
|
|
609
|
+
provider: "kimi",
|
|
610
|
+
model: KIMI_MODELS.K2_5,
|
|
611
|
+
...options
|
|
612
|
+
});
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
export {
|
|
616
|
+
DEEPSEEK_MODELS,
|
|
617
|
+
OPENAI_MODELS,
|
|
618
|
+
ANTHROPIC_MODELS,
|
|
619
|
+
KIMI_MODELS,
|
|
620
|
+
LLMClient,
|
|
621
|
+
getSharedLLMClient,
|
|
622
|
+
resetSharedLLMClient,
|
|
623
|
+
getAvailableProvider,
|
|
624
|
+
isProviderAvailable,
|
|
625
|
+
createRequirementsClient,
|
|
626
|
+
createCreativeClient,
|
|
627
|
+
createFixClient,
|
|
628
|
+
createDeepSeekClient,
|
|
629
|
+
createOpenAIClient,
|
|
630
|
+
createAnthropicClient,
|
|
631
|
+
createKimiClient
|
|
632
|
+
};
|
|
633
|
+
//# sourceMappingURL=chunk-PV3G5PJS.js.map
|