@almadar/llm 2.14.1 → 2.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-BqWOhaXY.js';
2
+ import { ChatOpenAI } from '@langchain/openai';
3
+ import { ChatAnthropic } from '@langchain/anthropic';
4
+ import { z } from 'zod';
5
+ import { JsonSchema } from '@almadar/core';
6
+
7
+ /**
8
+ * OpenAI Chat Completions wire-format types used by `LLMClient.callWithTools`.
9
+ *
10
+ * These mirror the public OpenAI Chat Completions API spec, which is also
11
+ * the protocol every OpenAI-compatible provider (DeepSeek, OpenRouter,
12
+ * Kimi, OrbGen, etc.) implements. The types are intentionally faithful
13
+ * to the wire format — when the LLM emits a `reasoning_content` field
14
+ * (DeepSeek V4 thinking mode), it's preserved verbatim and echoed back
15
+ * on the next round-trip.
16
+ */
17
+
18
+ interface ChatCompletionToolDef {
19
+ type: 'function';
20
+ function: {
21
+ name: string;
22
+ description: string;
23
+ /** JSON Schema describing the tool's parameters. Sent verbatim to the
24
+ * provider as the tool-call `function.parameters` field. */
25
+ parameters: JsonSchema;
26
+ };
27
+ }
28
+ type ChatCompletionRole = 'system' | 'user' | 'assistant' | 'tool';
29
+ interface ChatCompletionToolCall {
30
+ id: string;
31
+ type: 'function';
32
+ function: {
33
+ name: string;
34
+ arguments: string;
35
+ };
36
+ }
37
+ interface ChatCompletionMessage {
38
+ role: ChatCompletionRole;
39
+ /** Null is valid (assistant-only) when the message exists purely to carry `tool_calls`. */
40
+ content: string | null;
41
+ /** Present on assistant turns that called one or more tools. */
42
+ tool_calls?: ChatCompletionToolCall[];
43
+ /** Present on tool-role messages — matches `tool_calls[*].id` of the preceding assistant turn. */
44
+ tool_call_id?: string;
45
+ /**
46
+ * DeepSeek V4 thinking-mode chain-of-thought string. Must be echoed
47
+ * back on the next round-trip when the assistant turn triggered
48
+ * tool_calls — that's the protocol contract that LangChain's
49
+ * ChatOpenAI converter breaks.
50
+ */
51
+ reasoning_content?: string;
52
+ }
53
+ interface ChatCompletionChoice {
54
+ index: number;
55
+ message: ChatCompletionMessage;
56
+ finish_reason: string;
57
+ }
58
+ interface ChatCompletionUsage {
59
+ prompt_tokens: number;
60
+ completion_tokens: number;
61
+ total_tokens: number;
62
+ }
63
+ interface ChatCompletionResponse {
64
+ choices: ChatCompletionChoice[];
65
+ usage?: ChatCompletionUsage;
66
+ }
67
+ declare function parseChatCompletionResponse(raw: string): ChatCompletionResponse;
68
+
69
+ type ChatModel = ChatOpenAI | ChatAnthropic;
70
+ type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
71
+ interface ProviderConfig {
72
+ apiKey: string;
73
+ baseUrl?: string;
74
+ defaultModel: string;
75
+ }
76
+ interface LLMClientOptions {
77
+ provider?: LLMProvider;
78
+ model?: string;
79
+ temperature?: number;
80
+ streaming?: boolean;
81
+ rateLimiter?: RateLimiterOptions;
82
+ useGlobalRateLimiter?: boolean;
83
+ trackTokens?: boolean;
84
+ }
85
+ interface LLMCallOptions<T = unknown> {
86
+ systemPrompt: string;
87
+ userPrompt: string;
88
+ schema?: z.ZodSchema<T>;
89
+ maxRetries?: number;
90
+ retryWithContext?: boolean;
91
+ maxTokens?: number;
92
+ skipSchemaValidation?: boolean;
93
+ temperature?: number;
94
+ }
95
+ interface CacheableBlock {
96
+ type: 'text';
97
+ text: string;
98
+ cache_control?: {
99
+ type: 'ephemeral';
100
+ };
101
+ }
102
+ interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
103
+ systemBlocks?: CacheableBlock[];
104
+ userBlocks?: CacheableBlock[];
105
+ rawText?: boolean;
106
+ }
107
+ interface LLMUsage {
108
+ promptTokens: number;
109
+ completionTokens: number;
110
+ totalTokens: number;
111
+ }
112
+ type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
113
+ interface LLMResponse<T> {
114
+ data: T;
115
+ raw: string;
116
+ finishReason: LLMFinishReason;
117
+ usage: LLMUsage | null;
118
+ }
119
+ interface LLMStreamOptions {
120
+ systemPrompt: string;
121
+ messages: Array<{
122
+ role: 'system' | 'user' | 'assistant';
123
+ content: string;
124
+ }>;
125
+ maxTokens?: number;
126
+ temperature?: number;
127
+ }
128
+ interface LLMStreamChunk {
129
+ content: string;
130
+ done: boolean;
131
+ }
132
+ declare const DEEPSEEK_MODELS: {
133
+ readonly CHAT: "deepseek-chat";
134
+ readonly CODER: "deepseek-coder";
135
+ readonly REASONER: "deepseek-reasoner";
136
+ readonly V4_PRO: "deepseek-v4-pro";
137
+ readonly V4_FLASH: "deepseek-v4-flash";
138
+ };
139
+ declare const OPENAI_MODELS: {
140
+ readonly GPT4O: "gpt-4o";
141
+ readonly GPT4O_MINI: "gpt-4o-mini";
142
+ readonly GPT4_TURBO: "gpt-4-turbo";
143
+ readonly GPT35_TURBO: "gpt-3.5-turbo";
144
+ readonly GPT_5_1: "gpt-5.1";
145
+ };
146
+ declare const ANTHROPIC_MODELS: {
147
+ readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
148
+ readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
149
+ readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
150
+ readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
151
+ };
152
+ declare const KIMI_MODELS: {
153
+ readonly K2_5: "kimi-k2.5";
154
+ };
155
+ declare const OPENROUTER_MODELS: {
156
+ readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
157
+ readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
158
+ readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
159
+ readonly GEMMA_3_4B: "google/gemma-3-4b-it";
160
+ readonly GEMMA_3_12B: "google/gemma-3-12b-it";
161
+ readonly GEMMA_3_27B: "google/gemma-3-27b-it";
162
+ readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
163
+ readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
164
+ readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
165
+ readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
166
+ readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
167
+ readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
168
+ readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
169
+ readonly KIMI_K2: "moonshotai/kimi-k2";
170
+ readonly GLM_4_7: "z-ai/glm-4.7";
171
+ };
172
+ declare class LLMClient {
173
+ private model;
174
+ private rateLimiter;
175
+ private tokenTracker;
176
+ private modelName;
177
+ private provider;
178
+ private providerConfig;
179
+ private temperature;
180
+ private streaming;
181
+ constructor(options?: LLMClientOptions);
182
+ private usesMaxCompletionTokens;
183
+ private createModel;
184
+ private getModelWithOptions;
185
+ /**
186
+ * Check if this model is a Qwen3.5 thinking model.
187
+ * These models burn all output tokens on internal reasoning
188
+ * unless thinking is explicitly disabled via /no_think prefix.
189
+ */
190
+ private isQwenThinkingModel;
191
+ /**
192
+ * Prepare user prompt with provider-specific adjustments.
193
+ * Qwen3.5 models require /no_think to disable reasoning mode.
194
+ */
195
+ private prepareUserPrompt;
196
+ getProvider(): LLMProvider;
197
+ getModelName(): string;
198
+ getModel(): ChatModel;
199
+ getRateLimiterStatus(): {
200
+ queueLength: number;
201
+ activeRequests: number;
202
+ minuteTokens: number;
203
+ secondTokens: number;
204
+ backoffMs: number;
205
+ };
206
+ getTokenUsage(): TokenUsage | null;
207
+ call<T>(options: LLMCallOptions<T>): Promise<T>;
208
+ callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
209
+ private extractFinishReason;
210
+ /**
211
+ * Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
212
+ * `timeout` option doesn't reliably fire when an HTTP connection
213
+ * half-opens (the response stream can sit open indefinitely), so every
214
+ * raw call gets wrapped in an AbortController-driven deadline. Callers
215
+ * can override by passing their own `signal`.
216
+ */
217
+ private static readonly DEFAULT_RAW_TIMEOUT_MS;
218
+ /**
219
+ * Wrap a `model.invoke(...)` with start/finish/error logging and a hard
220
+ * abort-driven timeout. Returns whatever the underlying `invoke` returns.
221
+ *
222
+ * The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
223
+ * used to be silent — when a connection half-opened, callers sat
224
+ * indefinitely with no feedback. This wrapper makes hangs visible (start
225
+ * + duration logs) and bounded (timeout fires with a clear error).
226
+ */
227
+ private invokeWithObservability;
228
+ callRaw(options: {
229
+ systemPrompt: string;
230
+ userPrompt: string;
231
+ maxTokens?: number;
232
+ signal?: AbortSignal;
233
+ }): Promise<string>;
234
+ callRawWithMetadata(options: {
235
+ systemPrompt: string;
236
+ userPrompt: string;
237
+ maxTokens?: number;
238
+ signal?: AbortSignal;
239
+ }): Promise<Omit<LLMResponse<string>, 'data'> & {
240
+ raw: string;
241
+ }>;
242
+ /**
243
+ * Call the LLM with a structured messages array.
244
+ *
245
+ * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
246
+ * this accepts a full conversation history with proper role separation.
247
+ * This enables:
248
+ * - Anthropic prompt caching on message boundaries (not just system prompt)
249
+ * - Proper tool_use/tool_result role handling across providers
250
+ * - Reduced token waste from string concatenation
251
+ *
252
+ * All providers support the messages format:
253
+ * - Anthropic: native messages API with cache_control
254
+ * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
255
+ * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
256
+ */
257
+ callWithMessages(options: {
258
+ messages: Array<{
259
+ role: string;
260
+ content: string;
261
+ }>;
262
+ maxTokens?: number;
263
+ signal?: AbortSignal;
264
+ }): Promise<Omit<LLMResponse<string>, 'data'> & {
265
+ raw: string;
266
+ }>;
267
+ /**
268
+ * Tool-calling chat-completion call that speaks the OpenAI wire format
269
+ * directly via `fetch`, bypassing LangChain's `ChatOpenAI` converter.
270
+ *
271
+ * MOTIVATION: LangChain's `convertMessagesToCompletionsMessageParams`
272
+ * silently drops every `additional_kwargs` field except `function_call`
273
+ * and `tool_calls`. DeepSeek V4 thinking-mode requires
274
+ * `reasoning_content` to be echoed back on assistant turns that
275
+ * triggered tool_calls; LangChain's converter strips it, the next
276
+ * round-trip fails with "400 The reasoning_content in the thinking
277
+ * mode must be passed back to the API." This method preserves every
278
+ * assistant field verbatim across round-trips.
279
+ *
280
+ * Supported providers: any OpenAI-compatible endpoint (openai,
281
+ * deepseek, openrouter, kimi, orbgen). Anthropic uses a different
282
+ * wire format and is intentionally not supported here — use
283
+ * `callWithMessages` for Anthropic.
284
+ *
285
+ * Defaults `parallel_tool_calls: false` — sequential tool dispatch is
286
+ * the protocol-safe baseline. Multi-tool-call assistant messages
287
+ * trigger DeepSeek's "insufficient tool messages" 400 error.
288
+ */
289
+ callWithTools(options: {
290
+ messages: ReadonlyArray<ChatCompletionMessage>;
291
+ tools: ReadonlyArray<ChatCompletionToolDef>;
292
+ maxTokens?: number;
293
+ parallelToolCalls?: boolean;
294
+ signal?: AbortSignal;
295
+ }): Promise<{
296
+ message: ChatCompletionMessage;
297
+ finishReason: string;
298
+ usage: LLMUsage | null;
299
+ }>;
300
+ /**
301
+ * Stream a raw text response as an async iterator of content chunks.
302
+ * Uses the underlying LangChain model's .stream() method.
303
+ *
304
+ * @param options - System prompt plus full message history
305
+ * @yields LLMStreamChunk with content deltas and a done flag
306
+ */
307
+ streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
308
+ private isRateLimitError;
309
+ callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
310
+ static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
311
+ }
312
+ declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
313
+ declare function resetSharedLLMClient(provider?: LLMProvider): void;
314
+ declare function getAvailableProvider(): LLMProvider;
315
+ declare function isProviderAvailable(provider: LLMProvider): boolean;
316
+ /**
317
+ * Create an LLM client optimized for requirements analysis.
318
+ *
319
+ * Uses lower temperature (0.3) for more deterministic output.
320
+ * Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
321
+ *
322
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
323
+ * @returns {LLMClient} Configured LLM client
324
+ */
325
+ declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
326
+ /**
327
+ * Create an LLM client optimized for creative tasks.
328
+ *
329
+ * Uses higher temperature (0.7) for more varied output.
330
+ * Defaults to GPT-4o or DeepSeek Reasoner.
331
+ *
332
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
333
+ * @returns {LLMClient} Configured LLM client
334
+ */
335
+ declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
336
+ /**
337
+ * Create an LLM client optimized for code fixing.
338
+ *
339
+ * Uses low temperature (0.2) for precise, deterministic fixes.
340
+ * Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
341
+ *
342
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
343
+ * @returns {LLMClient} Configured LLM client
344
+ */
345
+ declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
346
+ /**
347
+ * Create a DeepSeek LLM client.
348
+ *
349
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
350
+ * @returns {LLMClient} Configured DeepSeek client
351
+ */
352
+ declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
353
+ /**
354
+ * Create an OpenAI LLM client.
355
+ *
356
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
357
+ * @returns {LLMClient} Configured OpenAI client
358
+ */
359
+ declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
360
+ /**
361
+ * Create an Anthropic LLM client.
362
+ *
363
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
364
+ * @returns {LLMClient} Configured Anthropic client
365
+ */
366
+ declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
367
+ /**
368
+ * Create a Kimi LLM client.
369
+ *
370
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
371
+ * @returns {LLMClient} Configured Kimi client
372
+ */
373
+ declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
374
+ /**
375
+ * Create an OpenRouter LLM client.
376
+ *
377
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
378
+ * @returns {LLMClient} Configured OpenRouter client
379
+ */
380
+ declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
381
+ /**
382
+ * Create a Zhipu (GLM) LLM client via OpenRouter.
383
+ *
384
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
385
+ * @returns {LLMClient} Configured Zhipu client
386
+ */
387
+ declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
388
+
389
+ export { ANTHROPIC_MODELS as A, getAvailableProvider as B, type CacheAwareLLMCallOptions as C, DEEPSEEK_MODELS as D, getSharedLLMClient as E, isProviderAvailable as F, parseChatCompletionResponse as G, resetSharedLLMClient as H, KIMI_MODELS as K, type LLMCallOptions as L, OPENAI_MODELS as O, type ProviderConfig as P, type CacheableBlock as a, type ChatCompletionChoice as b, type ChatCompletionMessage as c, type ChatCompletionResponse as d, type ChatCompletionRole as e, type ChatCompletionToolCall as f, type ChatCompletionToolDef as g, type ChatCompletionUsage as h, LLMClient as i, type LLMClientOptions as j, type LLMFinishReason as k, type LLMProvider as l, type LLMResponse as m, type LLMStreamChunk as n, type LLMStreamOptions as o, type LLMUsage as p, OPENROUTER_MODELS as q, createAnthropicClient as r, createCreativeClient as s, createDeepSeekClient as t, createFixClient as u, createKimiClient as v, createOpenAIClient as w, createOpenRouterClient as x, createRequirementsClient as y, createZhipuClient as z };
package/dist/client.d.ts CHANGED
@@ -1,293 +1,6 @@
1
- import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-BqWOhaXY.js';
2
- import { ChatOpenAI } from '@langchain/openai';
3
- import { ChatAnthropic } from '@langchain/anthropic';
4
- import { z } from 'zod';
5
-
6
- type ChatModel = ChatOpenAI | ChatAnthropic;
7
- type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
8
- interface ProviderConfig {
9
- apiKey: string;
10
- baseUrl?: string;
11
- defaultModel: string;
12
- }
13
- interface LLMClientOptions {
14
- provider?: LLMProvider;
15
- model?: string;
16
- temperature?: number;
17
- streaming?: boolean;
18
- rateLimiter?: RateLimiterOptions;
19
- useGlobalRateLimiter?: boolean;
20
- trackTokens?: boolean;
21
- }
22
- interface LLMCallOptions<T = unknown> {
23
- systemPrompt: string;
24
- userPrompt: string;
25
- schema?: z.ZodSchema<T>;
26
- maxRetries?: number;
27
- retryWithContext?: boolean;
28
- maxTokens?: number;
29
- skipSchemaValidation?: boolean;
30
- temperature?: number;
31
- }
32
- interface CacheableBlock {
33
- type: 'text';
34
- text: string;
35
- cache_control?: {
36
- type: 'ephemeral';
37
- };
38
- }
39
- interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
40
- systemBlocks?: CacheableBlock[];
41
- userBlocks?: CacheableBlock[];
42
- rawText?: boolean;
43
- }
44
- interface LLMUsage {
45
- promptTokens: number;
46
- completionTokens: number;
47
- totalTokens: number;
48
- }
49
- type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
50
- interface LLMResponse<T> {
51
- data: T;
52
- raw: string;
53
- finishReason: LLMFinishReason;
54
- usage: LLMUsage | null;
55
- }
56
- interface LLMStreamOptions {
57
- systemPrompt: string;
58
- messages: Array<{
59
- role: 'system' | 'user' | 'assistant';
60
- content: string;
61
- }>;
62
- maxTokens?: number;
63
- temperature?: number;
64
- }
65
- interface LLMStreamChunk {
66
- content: string;
67
- done: boolean;
68
- }
69
- declare const DEEPSEEK_MODELS: {
70
- readonly CHAT: "deepseek-chat";
71
- readonly CODER: "deepseek-coder";
72
- readonly REASONER: "deepseek-reasoner";
73
- readonly V4_PRO: "deepseek-v4-pro";
74
- readonly V4_FLASH: "deepseek-v4-flash";
75
- };
76
- declare const OPENAI_MODELS: {
77
- readonly GPT4O: "gpt-4o";
78
- readonly GPT4O_MINI: "gpt-4o-mini";
79
- readonly GPT4_TURBO: "gpt-4-turbo";
80
- readonly GPT35_TURBO: "gpt-3.5-turbo";
81
- readonly GPT_5_1: "gpt-5.1";
82
- };
83
- declare const ANTHROPIC_MODELS: {
84
- readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
85
- readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
86
- readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
87
- readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
88
- };
89
- declare const KIMI_MODELS: {
90
- readonly K2_5: "kimi-k2.5";
91
- };
92
- declare const OPENROUTER_MODELS: {
93
- readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
94
- readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
95
- readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
96
- readonly GEMMA_3_4B: "google/gemma-3-4b-it";
97
- readonly GEMMA_3_12B: "google/gemma-3-12b-it";
98
- readonly GEMMA_3_27B: "google/gemma-3-27b-it";
99
- readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
100
- readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
101
- readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
102
- readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
103
- readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
104
- readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
105
- readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
106
- readonly KIMI_K2: "moonshotai/kimi-k2";
107
- readonly GLM_4_7: "z-ai/glm-4.7";
108
- };
109
- declare class LLMClient {
110
- private model;
111
- private rateLimiter;
112
- private tokenTracker;
113
- private modelName;
114
- private provider;
115
- private providerConfig;
116
- private temperature;
117
- private streaming;
118
- constructor(options?: LLMClientOptions);
119
- private usesMaxCompletionTokens;
120
- private createModel;
121
- private getModelWithOptions;
122
- /**
123
- * Check if this model is a Qwen3.5 thinking model.
124
- * These models burn all output tokens on internal reasoning
125
- * unless thinking is explicitly disabled via /no_think prefix.
126
- */
127
- private isQwenThinkingModel;
128
- /**
129
- * Prepare user prompt with provider-specific adjustments.
130
- * Qwen3.5 models require /no_think to disable reasoning mode.
131
- */
132
- private prepareUserPrompt;
133
- getProvider(): LLMProvider;
134
- getModelName(): string;
135
- getModel(): ChatModel;
136
- getRateLimiterStatus(): {
137
- queueLength: number;
138
- activeRequests: number;
139
- minuteTokens: number;
140
- secondTokens: number;
141
- backoffMs: number;
142
- };
143
- getTokenUsage(): TokenUsage | null;
144
- call<T>(options: LLMCallOptions<T>): Promise<T>;
145
- callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
146
- private extractFinishReason;
147
- /**
148
- * Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
149
- * `timeout` option doesn't reliably fire when an HTTP connection
150
- * half-opens (the response stream can sit open indefinitely), so every
151
- * raw call gets wrapped in an AbortController-driven deadline. Callers
152
- * can override by passing their own `signal`.
153
- */
154
- private static readonly DEFAULT_RAW_TIMEOUT_MS;
155
- /**
156
- * Wrap a `model.invoke(...)` with start/finish/error logging and a hard
157
- * abort-driven timeout. Returns whatever the underlying `invoke` returns.
158
- *
159
- * The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
160
- * used to be silent — when a connection half-opened, callers sat
161
- * indefinitely with no feedback. This wrapper makes hangs visible (start
162
- * + duration logs) and bounded (timeout fires with a clear error).
163
- */
164
- private invokeWithObservability;
165
- callRaw(options: {
166
- systemPrompt: string;
167
- userPrompt: string;
168
- maxTokens?: number;
169
- signal?: AbortSignal;
170
- }): Promise<string>;
171
- callRawWithMetadata(options: {
172
- systemPrompt: string;
173
- userPrompt: string;
174
- maxTokens?: number;
175
- signal?: AbortSignal;
176
- }): Promise<Omit<LLMResponse<string>, 'data'> & {
177
- raw: string;
178
- }>;
179
- /**
180
- * Call the LLM with a structured messages array.
181
- *
182
- * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
183
- * this accepts a full conversation history with proper role separation.
184
- * This enables:
185
- * - Anthropic prompt caching on message boundaries (not just system prompt)
186
- * - Proper tool_use/tool_result role handling across providers
187
- * - Reduced token waste from string concatenation
188
- *
189
- * All providers support the messages format:
190
- * - Anthropic: native messages API with cache_control
191
- * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
192
- * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
193
- */
194
- callWithMessages(options: {
195
- messages: Array<{
196
- role: string;
197
- content: string;
198
- }>;
199
- maxTokens?: number;
200
- signal?: AbortSignal;
201
- }): Promise<Omit<LLMResponse<string>, 'data'> & {
202
- raw: string;
203
- }>;
204
- /**
205
- * Stream a raw text response as an async iterator of content chunks.
206
- * Uses the underlying LangChain model's .stream() method.
207
- *
208
- * @param options - System prompt plus full message history
209
- * @yields LLMStreamChunk with content deltas and a done flag
210
- */
211
- streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
212
- private isRateLimitError;
213
- callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
214
- static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
215
- }
216
- declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
217
- declare function resetSharedLLMClient(provider?: LLMProvider): void;
218
- declare function getAvailableProvider(): LLMProvider;
219
- declare function isProviderAvailable(provider: LLMProvider): boolean;
220
- /**
221
- * Create an LLM client optimized for requirements analysis.
222
- *
223
- * Uses lower temperature (0.3) for more deterministic output.
224
- * Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
225
- *
226
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
227
- * @returns {LLMClient} Configured LLM client
228
- */
229
- declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
230
- /**
231
- * Create an LLM client optimized for creative tasks.
232
- *
233
- * Uses higher temperature (0.7) for more varied output.
234
- * Defaults to GPT-4o or DeepSeek Reasoner.
235
- *
236
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
237
- * @returns {LLMClient} Configured LLM client
238
- */
239
- declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
240
- /**
241
- * Create an LLM client optimized for code fixing.
242
- *
243
- * Uses low temperature (0.2) for precise, deterministic fixes.
244
- * Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
245
- *
246
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
247
- * @returns {LLMClient} Configured LLM client
248
- */
249
- declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
250
- /**
251
- * Create a DeepSeek LLM client.
252
- *
253
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
254
- * @returns {LLMClient} Configured DeepSeek client
255
- */
256
- declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
257
- /**
258
- * Create an OpenAI LLM client.
259
- *
260
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
261
- * @returns {LLMClient} Configured OpenAI client
262
- */
263
- declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
264
- /**
265
- * Create an Anthropic LLM client.
266
- *
267
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
268
- * @returns {LLMClient} Configured Anthropic client
269
- */
270
- declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
271
- /**
272
- * Create a Kimi LLM client.
273
- *
274
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
275
- * @returns {LLMClient} Configured Kimi client
276
- */
277
- declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
278
- /**
279
- * Create an OpenRouter LLM client.
280
- *
281
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
282
- * @returns {LLMClient} Configured OpenRouter client
283
- */
284
- declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
285
- /**
286
- * Create a Zhipu (GLM) LLM client via OpenRouter.
287
- *
288
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
289
- * @returns {LLMClient} Configured Zhipu client
290
- */
291
- declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
292
-
293
- export { ANTHROPIC_MODELS, type CacheAwareLLMCallOptions, type CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, type LLMCallOptions, LLMClient, type LLMClientOptions, type LLMFinishReason, type LLMProvider, type LLMResponse, type LLMStreamChunk, type LLMStreamOptions, type LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, type ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient };
1
+ import './rate-limiter-BqWOhaXY.js';
2
+ import '@langchain/openai';
3
+ import '@langchain/anthropic';
4
+ import 'zod';
5
+ export { A as ANTHROPIC_MODELS, C as CacheAwareLLMCallOptions, a as CacheableBlock, D as DEEPSEEK_MODELS, K as KIMI_MODELS, L as LLMCallOptions, i as LLMClient, j as LLMClientOptions, k as LLMFinishReason, l as LLMProvider, m as LLMResponse, n as LLMStreamChunk, o as LLMStreamOptions, p as LLMUsage, O as OPENAI_MODELS, q as OPENROUTER_MODELS, P as ProviderConfig, r as createAnthropicClient, s as createCreativeClient, t as createDeepSeekClient, u as createFixClient, v as createKimiClient, w as createOpenAIClient, x as createOpenRouterClient, y as createRequirementsClient, z as createZhipuClient, B as getAvailableProvider, E as getSharedLLMClient, F as isProviderAvailable, H as resetSharedLLMClient } from './client-DMCU9rVo.js';
6
+ import '@almadar/core';