@almadar/llm 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,388 @@
1
+ import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-BqWOhaXY.js';
2
+ import { ChatOpenAI } from '@langchain/openai';
3
+ import { ChatAnthropic } from '@langchain/anthropic';
4
+ import { z } from 'zod';
5
+
6
+ /**
7
+ * OpenAI Chat Completions wire-format types used by `LLMClient.callWithTools`.
8
+ *
9
+ * These mirror the public OpenAI Chat Completions API spec, which is also
10
+ * the protocol every OpenAI-compatible provider (DeepSeek, OpenRouter,
11
+ * Kimi, OrbGen, etc.) implements. The types are intentionally faithful
12
+ * to the wire format — when the LLM emits a `reasoning_content` field
13
+ * (DeepSeek V4 thinking mode), it's preserved verbatim and echoed back
14
+ * on the next round-trip.
15
+ */
16
+ interface ChatCompletionToolDef {
17
+ type: 'function';
18
+ function: {
19
+ name: string;
20
+ description: string;
21
+ /** JSON Schema describing the tool's parameters. */
22
+ parameters: {
23
+ [key: string]: unknown;
24
+ };
25
+ };
26
+ }
27
+ type ChatCompletionRole = 'system' | 'user' | 'assistant' | 'tool';
28
+ interface ChatCompletionToolCall {
29
+ id: string;
30
+ type: 'function';
31
+ function: {
32
+ name: string;
33
+ arguments: string;
34
+ };
35
+ }
36
+ interface ChatCompletionMessage {
37
+ role: ChatCompletionRole;
38
+ /** Null is valid (assistant-only) when the message exists purely to carry `tool_calls`. */
39
+ content: string | null;
40
+ /** Present on assistant turns that called one or more tools. */
41
+ tool_calls?: ChatCompletionToolCall[];
42
+ /** Present on tool-role messages — matches `tool_calls[*].id` of the preceding assistant turn. */
43
+ tool_call_id?: string;
44
+ /**
45
+ * DeepSeek V4 thinking-mode chain-of-thought string. Must be echoed
46
+ * back on the next round-trip when the assistant turn triggered
47
+ * tool_calls — that's the protocol contract that LangChain's
48
+ * ChatOpenAI converter breaks.
49
+ */
50
+ reasoning_content?: string;
51
+ }
52
+ interface ChatCompletionChoice {
53
+ index: number;
54
+ message: ChatCompletionMessage;
55
+ finish_reason: string;
56
+ }
57
+ interface ChatCompletionUsage {
58
+ prompt_tokens: number;
59
+ completion_tokens: number;
60
+ total_tokens: number;
61
+ }
62
+ interface ChatCompletionResponse {
63
+ choices: ChatCompletionChoice[];
64
+ usage?: ChatCompletionUsage;
65
+ }
66
+ declare function parseChatCompletionResponse(raw: string): ChatCompletionResponse;
67
+
68
+ type ChatModel = ChatOpenAI | ChatAnthropic;
69
+ type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
70
+ interface ProviderConfig {
71
+ apiKey: string;
72
+ baseUrl?: string;
73
+ defaultModel: string;
74
+ }
75
+ interface LLMClientOptions {
76
+ provider?: LLMProvider;
77
+ model?: string;
78
+ temperature?: number;
79
+ streaming?: boolean;
80
+ rateLimiter?: RateLimiterOptions;
81
+ useGlobalRateLimiter?: boolean;
82
+ trackTokens?: boolean;
83
+ }
84
+ interface LLMCallOptions<T = unknown> {
85
+ systemPrompt: string;
86
+ userPrompt: string;
87
+ schema?: z.ZodSchema<T>;
88
+ maxRetries?: number;
89
+ retryWithContext?: boolean;
90
+ maxTokens?: number;
91
+ skipSchemaValidation?: boolean;
92
+ temperature?: number;
93
+ }
94
+ interface CacheableBlock {
95
+ type: 'text';
96
+ text: string;
97
+ cache_control?: {
98
+ type: 'ephemeral';
99
+ };
100
+ }
101
+ interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
102
+ systemBlocks?: CacheableBlock[];
103
+ userBlocks?: CacheableBlock[];
104
+ rawText?: boolean;
105
+ }
106
+ interface LLMUsage {
107
+ promptTokens: number;
108
+ completionTokens: number;
109
+ totalTokens: number;
110
+ }
111
+ type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
112
+ interface LLMResponse<T> {
113
+ data: T;
114
+ raw: string;
115
+ finishReason: LLMFinishReason;
116
+ usage: LLMUsage | null;
117
+ }
118
+ interface LLMStreamOptions {
119
+ systemPrompt: string;
120
+ messages: Array<{
121
+ role: 'system' | 'user' | 'assistant';
122
+ content: string;
123
+ }>;
124
+ maxTokens?: number;
125
+ temperature?: number;
126
+ }
127
+ interface LLMStreamChunk {
128
+ content: string;
129
+ done: boolean;
130
+ }
131
+ declare const DEEPSEEK_MODELS: {
132
+ readonly CHAT: "deepseek-chat";
133
+ readonly CODER: "deepseek-coder";
134
+ readonly REASONER: "deepseek-reasoner";
135
+ readonly V4_PRO: "deepseek-v4-pro";
136
+ readonly V4_FLASH: "deepseek-v4-flash";
137
+ };
138
+ declare const OPENAI_MODELS: {
139
+ readonly GPT4O: "gpt-4o";
140
+ readonly GPT4O_MINI: "gpt-4o-mini";
141
+ readonly GPT4_TURBO: "gpt-4-turbo";
142
+ readonly GPT35_TURBO: "gpt-3.5-turbo";
143
+ readonly GPT_5_1: "gpt-5.1";
144
+ };
145
+ declare const ANTHROPIC_MODELS: {
146
+ readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
147
+ readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
148
+ readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
149
+ readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
150
+ };
151
+ declare const KIMI_MODELS: {
152
+ readonly K2_5: "kimi-k2.5";
153
+ };
154
+ declare const OPENROUTER_MODELS: {
155
+ readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
156
+ readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
157
+ readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
158
+ readonly GEMMA_3_4B: "google/gemma-3-4b-it";
159
+ readonly GEMMA_3_12B: "google/gemma-3-12b-it";
160
+ readonly GEMMA_3_27B: "google/gemma-3-27b-it";
161
+ readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
162
+ readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
163
+ readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
164
+ readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
165
+ readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
166
+ readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
167
+ readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
168
+ readonly KIMI_K2: "moonshotai/kimi-k2";
169
+ readonly GLM_4_7: "z-ai/glm-4.7";
170
+ };
171
+ declare class LLMClient {
172
+ private model;
173
+ private rateLimiter;
174
+ private tokenTracker;
175
+ private modelName;
176
+ private provider;
177
+ private providerConfig;
178
+ private temperature;
179
+ private streaming;
180
+ constructor(options?: LLMClientOptions);
181
+ private usesMaxCompletionTokens;
182
+ private createModel;
183
+ private getModelWithOptions;
184
+ /**
185
+ * Check if this model is a Qwen3.5 thinking model.
186
+ * These models burn all output tokens on internal reasoning
187
+ * unless thinking is explicitly disabled via /no_think prefix.
188
+ */
189
+ private isQwenThinkingModel;
190
+ /**
191
+ * Prepare user prompt with provider-specific adjustments.
192
+ * Qwen3.5 models require /no_think to disable reasoning mode.
193
+ */
194
+ private prepareUserPrompt;
195
+ getProvider(): LLMProvider;
196
+ getModelName(): string;
197
+ getModel(): ChatModel;
198
+ getRateLimiterStatus(): {
199
+ queueLength: number;
200
+ activeRequests: number;
201
+ minuteTokens: number;
202
+ secondTokens: number;
203
+ backoffMs: number;
204
+ };
205
+ getTokenUsage(): TokenUsage | null;
206
+ call<T>(options: LLMCallOptions<T>): Promise<T>;
207
+ callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
208
+ private extractFinishReason;
209
+ /**
210
+ * Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
211
+ * `timeout` option doesn't reliably fire when an HTTP connection
212
+ * half-opens (the response stream can sit open indefinitely), so every
213
+ * raw call gets wrapped in an AbortController-driven deadline. Callers
214
+ * can override by passing their own `signal`.
215
+ */
216
+ private static readonly DEFAULT_RAW_TIMEOUT_MS;
217
+ /**
218
+ * Wrap a `model.invoke(...)` with start/finish/error logging and a hard
219
+ * abort-driven timeout. Returns whatever the underlying `invoke` returns.
220
+ *
221
+ * The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
222
+ * used to be silent — when a connection half-opened, callers sat
223
+ * indefinitely with no feedback. This wrapper makes hangs visible (start
224
+ * + duration logs) and bounded (timeout fires with a clear error).
225
+ */
226
+ private invokeWithObservability;
227
+ callRaw(options: {
228
+ systemPrompt: string;
229
+ userPrompt: string;
230
+ maxTokens?: number;
231
+ signal?: AbortSignal;
232
+ }): Promise<string>;
233
+ callRawWithMetadata(options: {
234
+ systemPrompt: string;
235
+ userPrompt: string;
236
+ maxTokens?: number;
237
+ signal?: AbortSignal;
238
+ }): Promise<Omit<LLMResponse<string>, 'data'> & {
239
+ raw: string;
240
+ }>;
241
+ /**
242
+ * Call the LLM with a structured messages array.
243
+ *
244
+ * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
245
+ * this accepts a full conversation history with proper role separation.
246
+ * This enables:
247
+ * - Anthropic prompt caching on message boundaries (not just system prompt)
248
+ * - Proper tool_use/tool_result role handling across providers
249
+ * - Reduced token waste from string concatenation
250
+ *
251
+ * All providers support the messages format:
252
+ * - Anthropic: native messages API with cache_control
253
+ * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
254
+ * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
255
+ */
256
+ callWithMessages(options: {
257
+ messages: Array<{
258
+ role: string;
259
+ content: string;
260
+ }>;
261
+ maxTokens?: number;
262
+ signal?: AbortSignal;
263
+ }): Promise<Omit<LLMResponse<string>, 'data'> & {
264
+ raw: string;
265
+ }>;
266
+ /**
267
+ * Tool-calling chat-completion call that speaks the OpenAI wire format
268
+ * directly via `fetch`, bypassing LangChain's `ChatOpenAI` converter.
269
+ *
270
+ * MOTIVATION: LangChain's `convertMessagesToCompletionsMessageParams`
271
+ * silently drops every `additional_kwargs` field except `function_call`
272
+ * and `tool_calls`. DeepSeek V4 thinking-mode requires
273
+ * `reasoning_content` to be echoed back on assistant turns that
274
+ * triggered tool_calls; LangChain's converter strips it, the next
275
+ * round-trip fails with "400 The reasoning_content in the thinking
276
+ * mode must be passed back to the API." This method preserves every
277
+ * assistant field verbatim across round-trips.
278
+ *
279
+ * Supported providers: any OpenAI-compatible endpoint (openai,
280
+ * deepseek, openrouter, kimi, orbgen). Anthropic uses a different
281
+ * wire format and is intentionally not supported here — use
282
+ * `callWithMessages` for Anthropic.
283
+ *
284
+ * Defaults `parallel_tool_calls: false` — sequential tool dispatch is
285
+ * the protocol-safe baseline. Multi-tool-call assistant messages
286
+ * trigger DeepSeek's "insufficient tool messages" 400 error.
287
+ */
288
+ callWithTools(options: {
289
+ messages: ReadonlyArray<ChatCompletionMessage>;
290
+ tools: ReadonlyArray<ChatCompletionToolDef>;
291
+ maxTokens?: number;
292
+ parallelToolCalls?: boolean;
293
+ signal?: AbortSignal;
294
+ }): Promise<{
295
+ message: ChatCompletionMessage;
296
+ finishReason: string;
297
+ usage: LLMUsage | null;
298
+ }>;
299
+ /**
300
+ * Stream a raw text response as an async iterator of content chunks.
301
+ * Uses the underlying LangChain model's .stream() method.
302
+ *
303
+ * @param options - System prompt plus full message history
304
+ * @yields LLMStreamChunk with content deltas and a done flag
305
+ */
306
+ streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
307
+ private isRateLimitError;
308
+ callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
309
+ static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
310
+ }
311
+ declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
312
+ declare function resetSharedLLMClient(provider?: LLMProvider): void;
313
+ declare function getAvailableProvider(): LLMProvider;
314
+ declare function isProviderAvailable(provider: LLMProvider): boolean;
315
+ /**
316
+ * Create an LLM client optimized for requirements analysis.
317
+ *
318
+ * Uses lower temperature (0.3) for more deterministic output.
319
+ * Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
320
+ *
321
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
322
+ * @returns {LLMClient} Configured LLM client
323
+ */
324
+ declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
325
+ /**
326
+ * Create an LLM client optimized for creative tasks.
327
+ *
328
+ * Uses higher temperature (0.7) for more varied output.
329
+ * Defaults to GPT-4o or DeepSeek Reasoner.
330
+ *
331
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
332
+ * @returns {LLMClient} Configured LLM client
333
+ */
334
+ declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
335
+ /**
336
+ * Create an LLM client optimized for code fixing.
337
+ *
338
+ * Uses low temperature (0.2) for precise, deterministic fixes.
339
+ * Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
340
+ *
341
+ * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
342
+ * @returns {LLMClient} Configured LLM client
343
+ */
344
+ declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
345
+ /**
346
+ * Create a DeepSeek LLM client.
347
+ *
348
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
349
+ * @returns {LLMClient} Configured DeepSeek client
350
+ */
351
+ declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
352
+ /**
353
+ * Create an OpenAI LLM client.
354
+ *
355
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
356
+ * @returns {LLMClient} Configured OpenAI client
357
+ */
358
+ declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
359
+ /**
360
+ * Create an Anthropic LLM client.
361
+ *
362
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
363
+ * @returns {LLMClient} Configured Anthropic client
364
+ */
365
+ declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
366
+ /**
367
+ * Create a Kimi LLM client.
368
+ *
369
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
370
+ * @returns {LLMClient} Configured Kimi client
371
+ */
372
+ declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
373
+ /**
374
+ * Create an OpenRouter LLM client.
375
+ *
376
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
377
+ * @returns {LLMClient} Configured OpenRouter client
378
+ */
379
+ declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
380
+ /**
381
+ * Create a Zhipu (GLM) LLM client via OpenRouter.
382
+ *
383
+ * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
384
+ * @returns {LLMClient} Configured Zhipu client
385
+ */
386
+ declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
387
+
388
+ export { ANTHROPIC_MODELS as A, getAvailableProvider as B, type CacheAwareLLMCallOptions as C, DEEPSEEK_MODELS as D, getSharedLLMClient as E, isProviderAvailable as F, parseChatCompletionResponse as G, resetSharedLLMClient as H, KIMI_MODELS as K, type LLMCallOptions as L, OPENAI_MODELS as O, type ProviderConfig as P, type CacheableBlock as a, type ChatCompletionChoice as b, type ChatCompletionMessage as c, type ChatCompletionResponse as d, type ChatCompletionRole as e, type ChatCompletionToolCall as f, type ChatCompletionToolDef as g, type ChatCompletionUsage as h, LLMClient as i, type LLMClientOptions as j, type LLMFinishReason as k, type LLMProvider as l, type LLMResponse as m, type LLMStreamChunk as n, type LLMStreamOptions as o, type LLMUsage as p, OPENROUTER_MODELS as q, createAnthropicClient as r, createCreativeClient as s, createDeepSeekClient as t, createFixClient as u, createKimiClient as v, createOpenAIClient as w, createOpenRouterClient as x, createRequirementsClient as y, createZhipuClient as z };
package/dist/client.d.ts CHANGED
@@ -1,293 +1,5 @@
1
- import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-BqWOhaXY.js';
2
- import { ChatOpenAI } from '@langchain/openai';
3
- import { ChatAnthropic } from '@langchain/anthropic';
4
- import { z } from 'zod';
5
-
6
- type ChatModel = ChatOpenAI | ChatAnthropic;
7
- type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
8
- interface ProviderConfig {
9
- apiKey: string;
10
- baseUrl?: string;
11
- defaultModel: string;
12
- }
13
- interface LLMClientOptions {
14
- provider?: LLMProvider;
15
- model?: string;
16
- temperature?: number;
17
- streaming?: boolean;
18
- rateLimiter?: RateLimiterOptions;
19
- useGlobalRateLimiter?: boolean;
20
- trackTokens?: boolean;
21
- }
22
- interface LLMCallOptions<T = unknown> {
23
- systemPrompt: string;
24
- userPrompt: string;
25
- schema?: z.ZodSchema<T>;
26
- maxRetries?: number;
27
- retryWithContext?: boolean;
28
- maxTokens?: number;
29
- skipSchemaValidation?: boolean;
30
- temperature?: number;
31
- }
32
- interface CacheableBlock {
33
- type: 'text';
34
- text: string;
35
- cache_control?: {
36
- type: 'ephemeral';
37
- };
38
- }
39
- interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
40
- systemBlocks?: CacheableBlock[];
41
- userBlocks?: CacheableBlock[];
42
- rawText?: boolean;
43
- }
44
- interface LLMUsage {
45
- promptTokens: number;
46
- completionTokens: number;
47
- totalTokens: number;
48
- }
49
- type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
50
- interface LLMResponse<T> {
51
- data: T;
52
- raw: string;
53
- finishReason: LLMFinishReason;
54
- usage: LLMUsage | null;
55
- }
56
- interface LLMStreamOptions {
57
- systemPrompt: string;
58
- messages: Array<{
59
- role: 'system' | 'user' | 'assistant';
60
- content: string;
61
- }>;
62
- maxTokens?: number;
63
- temperature?: number;
64
- }
65
- interface LLMStreamChunk {
66
- content: string;
67
- done: boolean;
68
- }
69
- declare const DEEPSEEK_MODELS: {
70
- readonly CHAT: "deepseek-chat";
71
- readonly CODER: "deepseek-coder";
72
- readonly REASONER: "deepseek-reasoner";
73
- readonly V4_PRO: "deepseek-v4-pro";
74
- readonly V4_FLASH: "deepseek-v4-flash";
75
- };
76
- declare const OPENAI_MODELS: {
77
- readonly GPT4O: "gpt-4o";
78
- readonly GPT4O_MINI: "gpt-4o-mini";
79
- readonly GPT4_TURBO: "gpt-4-turbo";
80
- readonly GPT35_TURBO: "gpt-3.5-turbo";
81
- readonly GPT_5_1: "gpt-5.1";
82
- };
83
- declare const ANTHROPIC_MODELS: {
84
- readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
85
- readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
86
- readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
87
- readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
88
- };
89
- declare const KIMI_MODELS: {
90
- readonly K2_5: "kimi-k2.5";
91
- };
92
- declare const OPENROUTER_MODELS: {
93
- readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
94
- readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
95
- readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
96
- readonly GEMMA_3_4B: "google/gemma-3-4b-it";
97
- readonly GEMMA_3_12B: "google/gemma-3-12b-it";
98
- readonly GEMMA_3_27B: "google/gemma-3-27b-it";
99
- readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
100
- readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
101
- readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
102
- readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
103
- readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
104
- readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
105
- readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
106
- readonly KIMI_K2: "moonshotai/kimi-k2";
107
- readonly GLM_4_7: "z-ai/glm-4.7";
108
- };
109
- declare class LLMClient {
110
- private model;
111
- private rateLimiter;
112
- private tokenTracker;
113
- private modelName;
114
- private provider;
115
- private providerConfig;
116
- private temperature;
117
- private streaming;
118
- constructor(options?: LLMClientOptions);
119
- private usesMaxCompletionTokens;
120
- private createModel;
121
- private getModelWithOptions;
122
- /**
123
- * Check if this model is a Qwen3.5 thinking model.
124
- * These models burn all output tokens on internal reasoning
125
- * unless thinking is explicitly disabled via /no_think prefix.
126
- */
127
- private isQwenThinkingModel;
128
- /**
129
- * Prepare user prompt with provider-specific adjustments.
130
- * Qwen3.5 models require /no_think to disable reasoning mode.
131
- */
132
- private prepareUserPrompt;
133
- getProvider(): LLMProvider;
134
- getModelName(): string;
135
- getModel(): ChatModel;
136
- getRateLimiterStatus(): {
137
- queueLength: number;
138
- activeRequests: number;
139
- minuteTokens: number;
140
- secondTokens: number;
141
- backoffMs: number;
142
- };
143
- getTokenUsage(): TokenUsage | null;
144
- call<T>(options: LLMCallOptions<T>): Promise<T>;
145
- callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
146
- private extractFinishReason;
147
- /**
148
- * Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
149
- * `timeout` option doesn't reliably fire when an HTTP connection
150
- * half-opens (the response stream can sit open indefinitely), so every
151
- * raw call gets wrapped in an AbortController-driven deadline. Callers
152
- * can override by passing their own `signal`.
153
- */
154
- private static readonly DEFAULT_RAW_TIMEOUT_MS;
155
- /**
156
- * Wrap a `model.invoke(...)` with start/finish/error logging and a hard
157
- * abort-driven timeout. Returns whatever the underlying `invoke` returns.
158
- *
159
- * The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
160
- * used to be silent — when a connection half-opened, callers sat
161
- * indefinitely with no feedback. This wrapper makes hangs visible (start
162
- * + duration logs) and bounded (timeout fires with a clear error).
163
- */
164
- private invokeWithObservability;
165
- callRaw(options: {
166
- systemPrompt: string;
167
- userPrompt: string;
168
- maxTokens?: number;
169
- signal?: AbortSignal;
170
- }): Promise<string>;
171
- callRawWithMetadata(options: {
172
- systemPrompt: string;
173
- userPrompt: string;
174
- maxTokens?: number;
175
- signal?: AbortSignal;
176
- }): Promise<Omit<LLMResponse<string>, 'data'> & {
177
- raw: string;
178
- }>;
179
- /**
180
- * Call the LLM with a structured messages array.
181
- *
182
- * Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
183
- * this accepts a full conversation history with proper role separation.
184
- * This enables:
185
- * - Anthropic prompt caching on message boundaries (not just system prompt)
186
- * - Proper tool_use/tool_result role handling across providers
187
- * - Reduced token waste from string concatenation
188
- *
189
- * All providers support the messages format:
190
- * - Anthropic: native messages API with cache_control
191
- * - DeepSeek: OpenAI-compatible messages via ChatOpenAI
192
- * - OpenRouter: OpenAI-compatible messages via ChatOpenAI
193
- */
194
- callWithMessages(options: {
195
- messages: Array<{
196
- role: string;
197
- content: string;
198
- }>;
199
- maxTokens?: number;
200
- signal?: AbortSignal;
201
- }): Promise<Omit<LLMResponse<string>, 'data'> & {
202
- raw: string;
203
- }>;
204
- /**
205
- * Stream a raw text response as an async iterator of content chunks.
206
- * Uses the underlying LangChain model's .stream() method.
207
- *
208
- * @param options - System prompt plus full message history
209
- * @yields LLMStreamChunk with content deltas and a done flag
210
- */
211
- streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
212
- private isRateLimitError;
213
- callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
214
- static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
215
- }
216
- declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
217
- declare function resetSharedLLMClient(provider?: LLMProvider): void;
218
- declare function getAvailableProvider(): LLMProvider;
219
- declare function isProviderAvailable(provider: LLMProvider): boolean;
220
- /**
221
- * Create an LLM client optimized for requirements analysis.
222
- *
223
- * Uses lower temperature (0.3) for more deterministic output.
224
- * Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
225
- *
226
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
227
- * @returns {LLMClient} Configured LLM client
228
- */
229
- declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
230
- /**
231
- * Create an LLM client optimized for creative tasks.
232
- *
233
- * Uses higher temperature (0.7) for more varied output.
234
- * Defaults to GPT-4o or DeepSeek Reasoner.
235
- *
236
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
237
- * @returns {LLMClient} Configured LLM client
238
- */
239
- declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
240
- /**
241
- * Create an LLM client optimized for code fixing.
242
- *
243
- * Uses low temperature (0.2) for precise, deterministic fixes.
244
- * Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
245
- *
246
- * @param {Partial<LLMClientOptions>} [options] - Optional client configuration
247
- * @returns {LLMClient} Configured LLM client
248
- */
249
- declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
250
- /**
251
- * Create a DeepSeek LLM client.
252
- *
253
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
254
- * @returns {LLMClient} Configured DeepSeek client
255
- */
256
- declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
257
- /**
258
- * Create an OpenAI LLM client.
259
- *
260
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
261
- * @returns {LLMClient} Configured OpenAI client
262
- */
263
- declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
264
- /**
265
- * Create an Anthropic LLM client.
266
- *
267
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
268
- * @returns {LLMClient} Configured Anthropic client
269
- */
270
- declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
271
- /**
272
- * Create a Kimi LLM client.
273
- *
274
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
275
- * @returns {LLMClient} Configured Kimi client
276
- */
277
- declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
278
- /**
279
- * Create an OpenRouter LLM client.
280
- *
281
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
282
- * @returns {LLMClient} Configured OpenRouter client
283
- */
284
- declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
285
- /**
286
- * Create a Zhipu (GLM) LLM client via OpenRouter.
287
- *
288
- * @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
289
- * @returns {LLMClient} Configured Zhipu client
290
- */
291
- declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
292
-
293
- export { ANTHROPIC_MODELS, type CacheAwareLLMCallOptions, type CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, type LLMCallOptions, LLMClient, type LLMClientOptions, type LLMFinishReason, type LLMProvider, type LLMResponse, type LLMStreamChunk, type LLMStreamOptions, type LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, type ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient };
1
+ import './rate-limiter-BqWOhaXY.js';
2
+ import '@langchain/openai';
3
+ import '@langchain/anthropic';
4
+ import 'zod';
5
+ export { A as ANTHROPIC_MODELS, C as CacheAwareLLMCallOptions, a as CacheableBlock, D as DEEPSEEK_MODELS, K as KIMI_MODELS, L as LLMCallOptions, i as LLMClient, j as LLMClientOptions, k as LLMFinishReason, l as LLMProvider, m as LLMResponse, n as LLMStreamChunk, o as LLMStreamOptions, p as LLMUsage, O as OPENAI_MODELS, q as OPENROUTER_MODELS, P as ProviderConfig, r as createAnthropicClient, s as createCreativeClient, t as createDeepSeekClient, u as createFixClient, v as createKimiClient, w as createOpenAIClient, x as createOpenRouterClient, y as createRequirementsClient, z as createZhipuClient, B as getAvailableProvider, E as getSharedLLMClient, F as isProviderAvailable, H as resetSharedLLMClient } from './client-C_CnNdQx.js';
package/dist/client.js CHANGED
@@ -18,7 +18,7 @@ import {
18
18
  getSharedLLMClient,
19
19
  isProviderAvailable,
20
20
  resetSharedLLMClient
21
- } from "./chunk-AEFJ4WH3.js";
21
+ } from "./chunk-XL452RVW.js";
22
22
  import "./chunk-LZGCEPHN.js";
23
23
  import "./chunk-TGHGQB5I.js";
24
24
  export {