@almadar/llm 2.14.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-AEFJ4WH3.js → chunk-XL452RVW.js} +137 -1
- package/dist/chunk-XL452RVW.js.map +1 -0
- package/dist/client-C_CnNdQx.d.ts +388 -0
- package/dist/client.d.ts +5 -293
- package/dist/client.js +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +4 -2
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/src/client.ts +114 -0
- package/src/embedding-client.ts +2 -2
- package/src/index.ts +11 -0
- package/src/tool-call-types.ts +123 -0
- package/dist/chunk-AEFJ4WH3.js.map +0 -1
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
import { a as RateLimiterOptions, b as TokenUsage } from './rate-limiter-BqWOhaXY.js';
|
|
2
|
+
import { ChatOpenAI } from '@langchain/openai';
|
|
3
|
+
import { ChatAnthropic } from '@langchain/anthropic';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* OpenAI Chat Completions wire-format types used by `LLMClient.callWithTools`.
|
|
8
|
+
*
|
|
9
|
+
* These mirror the public OpenAI Chat Completions API spec, which is also
|
|
10
|
+
* the protocol every OpenAI-compatible provider (DeepSeek, OpenRouter,
|
|
11
|
+
* Kimi, OrbGen, etc.) implements. The types are intentionally faithful
|
|
12
|
+
* to the wire format — when the LLM emits a `reasoning_content` field
|
|
13
|
+
* (DeepSeek V4 thinking mode), it's preserved verbatim and echoed back
|
|
14
|
+
* on the next round-trip.
|
|
15
|
+
*/
|
|
16
|
+
interface ChatCompletionToolDef {
|
|
17
|
+
type: 'function';
|
|
18
|
+
function: {
|
|
19
|
+
name: string;
|
|
20
|
+
description: string;
|
|
21
|
+
/** JSON Schema describing the tool's parameters. */
|
|
22
|
+
parameters: {
|
|
23
|
+
[key: string]: unknown;
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
type ChatCompletionRole = 'system' | 'user' | 'assistant' | 'tool';
|
|
28
|
+
interface ChatCompletionToolCall {
|
|
29
|
+
id: string;
|
|
30
|
+
type: 'function';
|
|
31
|
+
function: {
|
|
32
|
+
name: string;
|
|
33
|
+
arguments: string;
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
interface ChatCompletionMessage {
|
|
37
|
+
role: ChatCompletionRole;
|
|
38
|
+
/** Null is valid (assistant-only) when the message exists purely to carry `tool_calls`. */
|
|
39
|
+
content: string | null;
|
|
40
|
+
/** Present on assistant turns that called one or more tools. */
|
|
41
|
+
tool_calls?: ChatCompletionToolCall[];
|
|
42
|
+
/** Present on tool-role messages — matches `tool_calls[*].id` of the preceding assistant turn. */
|
|
43
|
+
tool_call_id?: string;
|
|
44
|
+
/**
|
|
45
|
+
* DeepSeek V4 thinking-mode chain-of-thought string. Must be echoed
|
|
46
|
+
* back on the next round-trip when the assistant turn triggered
|
|
47
|
+
* tool_calls — that's the protocol contract that LangChain's
|
|
48
|
+
* ChatOpenAI converter breaks.
|
|
49
|
+
*/
|
|
50
|
+
reasoning_content?: string;
|
|
51
|
+
}
|
|
52
|
+
interface ChatCompletionChoice {
|
|
53
|
+
index: number;
|
|
54
|
+
message: ChatCompletionMessage;
|
|
55
|
+
finish_reason: string;
|
|
56
|
+
}
|
|
57
|
+
interface ChatCompletionUsage {
|
|
58
|
+
prompt_tokens: number;
|
|
59
|
+
completion_tokens: number;
|
|
60
|
+
total_tokens: number;
|
|
61
|
+
}
|
|
62
|
+
interface ChatCompletionResponse {
|
|
63
|
+
choices: ChatCompletionChoice[];
|
|
64
|
+
usage?: ChatCompletionUsage;
|
|
65
|
+
}
|
|
66
|
+
declare function parseChatCompletionResponse(raw: string): ChatCompletionResponse;
|
|
67
|
+
|
|
68
|
+
type ChatModel = ChatOpenAI | ChatAnthropic;
|
|
69
|
+
type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
|
|
70
|
+
interface ProviderConfig {
|
|
71
|
+
apiKey: string;
|
|
72
|
+
baseUrl?: string;
|
|
73
|
+
defaultModel: string;
|
|
74
|
+
}
|
|
75
|
+
interface LLMClientOptions {
|
|
76
|
+
provider?: LLMProvider;
|
|
77
|
+
model?: string;
|
|
78
|
+
temperature?: number;
|
|
79
|
+
streaming?: boolean;
|
|
80
|
+
rateLimiter?: RateLimiterOptions;
|
|
81
|
+
useGlobalRateLimiter?: boolean;
|
|
82
|
+
trackTokens?: boolean;
|
|
83
|
+
}
|
|
84
|
+
interface LLMCallOptions<T = unknown> {
|
|
85
|
+
systemPrompt: string;
|
|
86
|
+
userPrompt: string;
|
|
87
|
+
schema?: z.ZodSchema<T>;
|
|
88
|
+
maxRetries?: number;
|
|
89
|
+
retryWithContext?: boolean;
|
|
90
|
+
maxTokens?: number;
|
|
91
|
+
skipSchemaValidation?: boolean;
|
|
92
|
+
temperature?: number;
|
|
93
|
+
}
|
|
94
|
+
interface CacheableBlock {
|
|
95
|
+
type: 'text';
|
|
96
|
+
text: string;
|
|
97
|
+
cache_control?: {
|
|
98
|
+
type: 'ephemeral';
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
|
|
102
|
+
systemBlocks?: CacheableBlock[];
|
|
103
|
+
userBlocks?: CacheableBlock[];
|
|
104
|
+
rawText?: boolean;
|
|
105
|
+
}
|
|
106
|
+
interface LLMUsage {
|
|
107
|
+
promptTokens: number;
|
|
108
|
+
completionTokens: number;
|
|
109
|
+
totalTokens: number;
|
|
110
|
+
}
|
|
111
|
+
type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
|
|
112
|
+
interface LLMResponse<T> {
|
|
113
|
+
data: T;
|
|
114
|
+
raw: string;
|
|
115
|
+
finishReason: LLMFinishReason;
|
|
116
|
+
usage: LLMUsage | null;
|
|
117
|
+
}
|
|
118
|
+
interface LLMStreamOptions {
|
|
119
|
+
systemPrompt: string;
|
|
120
|
+
messages: Array<{
|
|
121
|
+
role: 'system' | 'user' | 'assistant';
|
|
122
|
+
content: string;
|
|
123
|
+
}>;
|
|
124
|
+
maxTokens?: number;
|
|
125
|
+
temperature?: number;
|
|
126
|
+
}
|
|
127
|
+
interface LLMStreamChunk {
|
|
128
|
+
content: string;
|
|
129
|
+
done: boolean;
|
|
130
|
+
}
|
|
131
|
+
declare const DEEPSEEK_MODELS: {
|
|
132
|
+
readonly CHAT: "deepseek-chat";
|
|
133
|
+
readonly CODER: "deepseek-coder";
|
|
134
|
+
readonly REASONER: "deepseek-reasoner";
|
|
135
|
+
readonly V4_PRO: "deepseek-v4-pro";
|
|
136
|
+
readonly V4_FLASH: "deepseek-v4-flash";
|
|
137
|
+
};
|
|
138
|
+
declare const OPENAI_MODELS: {
|
|
139
|
+
readonly GPT4O: "gpt-4o";
|
|
140
|
+
readonly GPT4O_MINI: "gpt-4o-mini";
|
|
141
|
+
readonly GPT4_TURBO: "gpt-4-turbo";
|
|
142
|
+
readonly GPT35_TURBO: "gpt-3.5-turbo";
|
|
143
|
+
readonly GPT_5_1: "gpt-5.1";
|
|
144
|
+
};
|
|
145
|
+
declare const ANTHROPIC_MODELS: {
|
|
146
|
+
readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
|
|
147
|
+
readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
|
|
148
|
+
readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
|
|
149
|
+
readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
|
|
150
|
+
};
|
|
151
|
+
declare const KIMI_MODELS: {
|
|
152
|
+
readonly K2_5: "kimi-k2.5";
|
|
153
|
+
};
|
|
154
|
+
declare const OPENROUTER_MODELS: {
|
|
155
|
+
readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
|
|
156
|
+
readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
|
|
157
|
+
readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
|
|
158
|
+
readonly GEMMA_3_4B: "google/gemma-3-4b-it";
|
|
159
|
+
readonly GEMMA_3_12B: "google/gemma-3-12b-it";
|
|
160
|
+
readonly GEMMA_3_27B: "google/gemma-3-27b-it";
|
|
161
|
+
readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
|
|
162
|
+
readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
|
|
163
|
+
readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
|
|
164
|
+
readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
|
|
165
|
+
readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
|
|
166
|
+
readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
|
|
167
|
+
readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
|
|
168
|
+
readonly KIMI_K2: "moonshotai/kimi-k2";
|
|
169
|
+
readonly GLM_4_7: "z-ai/glm-4.7";
|
|
170
|
+
};
|
|
171
|
+
declare class LLMClient {
|
|
172
|
+
private model;
|
|
173
|
+
private rateLimiter;
|
|
174
|
+
private tokenTracker;
|
|
175
|
+
private modelName;
|
|
176
|
+
private provider;
|
|
177
|
+
private providerConfig;
|
|
178
|
+
private temperature;
|
|
179
|
+
private streaming;
|
|
180
|
+
constructor(options?: LLMClientOptions);
|
|
181
|
+
private usesMaxCompletionTokens;
|
|
182
|
+
private createModel;
|
|
183
|
+
private getModelWithOptions;
|
|
184
|
+
/**
|
|
185
|
+
* Check if this model is a Qwen3.5 thinking model.
|
|
186
|
+
* These models burn all output tokens on internal reasoning
|
|
187
|
+
* unless thinking is explicitly disabled via /no_think prefix.
|
|
188
|
+
*/
|
|
189
|
+
private isQwenThinkingModel;
|
|
190
|
+
/**
|
|
191
|
+
* Prepare user prompt with provider-specific adjustments.
|
|
192
|
+
* Qwen3.5 models require /no_think to disable reasoning mode.
|
|
193
|
+
*/
|
|
194
|
+
private prepareUserPrompt;
|
|
195
|
+
getProvider(): LLMProvider;
|
|
196
|
+
getModelName(): string;
|
|
197
|
+
getModel(): ChatModel;
|
|
198
|
+
getRateLimiterStatus(): {
|
|
199
|
+
queueLength: number;
|
|
200
|
+
activeRequests: number;
|
|
201
|
+
minuteTokens: number;
|
|
202
|
+
secondTokens: number;
|
|
203
|
+
backoffMs: number;
|
|
204
|
+
};
|
|
205
|
+
getTokenUsage(): TokenUsage | null;
|
|
206
|
+
call<T>(options: LLMCallOptions<T>): Promise<T>;
|
|
207
|
+
callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
|
|
208
|
+
private extractFinishReason;
|
|
209
|
+
/**
|
|
210
|
+
* Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
|
|
211
|
+
* `timeout` option doesn't reliably fire when an HTTP connection
|
|
212
|
+
* half-opens (the response stream can sit open indefinitely), so every
|
|
213
|
+
* raw call gets wrapped in an AbortController-driven deadline. Callers
|
|
214
|
+
* can override by passing their own `signal`.
|
|
215
|
+
*/
|
|
216
|
+
private static readonly DEFAULT_RAW_TIMEOUT_MS;
|
|
217
|
+
/**
|
|
218
|
+
* Wrap a `model.invoke(...)` with start/finish/error logging and a hard
|
|
219
|
+
* abort-driven timeout. Returns whatever the underlying `invoke` returns.
|
|
220
|
+
*
|
|
221
|
+
* The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
|
|
222
|
+
* used to be silent — when a connection half-opened, callers sat
|
|
223
|
+
* indefinitely with no feedback. This wrapper makes hangs visible (start
|
|
224
|
+
* + duration logs) and bounded (timeout fires with a clear error).
|
|
225
|
+
*/
|
|
226
|
+
private invokeWithObservability;
|
|
227
|
+
callRaw(options: {
|
|
228
|
+
systemPrompt: string;
|
|
229
|
+
userPrompt: string;
|
|
230
|
+
maxTokens?: number;
|
|
231
|
+
signal?: AbortSignal;
|
|
232
|
+
}): Promise<string>;
|
|
233
|
+
callRawWithMetadata(options: {
|
|
234
|
+
systemPrompt: string;
|
|
235
|
+
userPrompt: string;
|
|
236
|
+
maxTokens?: number;
|
|
237
|
+
signal?: AbortSignal;
|
|
238
|
+
}): Promise<Omit<LLMResponse<string>, 'data'> & {
|
|
239
|
+
raw: string;
|
|
240
|
+
}>;
|
|
241
|
+
/**
|
|
242
|
+
* Call the LLM with a structured messages array.
|
|
243
|
+
*
|
|
244
|
+
* Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
|
|
245
|
+
* this accepts a full conversation history with proper role separation.
|
|
246
|
+
* This enables:
|
|
247
|
+
* - Anthropic prompt caching on message boundaries (not just system prompt)
|
|
248
|
+
* - Proper tool_use/tool_result role handling across providers
|
|
249
|
+
* - Reduced token waste from string concatenation
|
|
250
|
+
*
|
|
251
|
+
* All providers support the messages format:
|
|
252
|
+
* - Anthropic: native messages API with cache_control
|
|
253
|
+
* - DeepSeek: OpenAI-compatible messages via ChatOpenAI
|
|
254
|
+
* - OpenRouter: OpenAI-compatible messages via ChatOpenAI
|
|
255
|
+
*/
|
|
256
|
+
callWithMessages(options: {
|
|
257
|
+
messages: Array<{
|
|
258
|
+
role: string;
|
|
259
|
+
content: string;
|
|
260
|
+
}>;
|
|
261
|
+
maxTokens?: number;
|
|
262
|
+
signal?: AbortSignal;
|
|
263
|
+
}): Promise<Omit<LLMResponse<string>, 'data'> & {
|
|
264
|
+
raw: string;
|
|
265
|
+
}>;
|
|
266
|
+
/**
|
|
267
|
+
* Tool-calling chat-completion call that speaks the OpenAI wire format
|
|
268
|
+
* directly via `fetch`, bypassing LangChain's `ChatOpenAI` converter.
|
|
269
|
+
*
|
|
270
|
+
* MOTIVATION: LangChain's `convertMessagesToCompletionsMessageParams`
|
|
271
|
+
* silently drops every `additional_kwargs` field except `function_call`
|
|
272
|
+
* and `tool_calls`. DeepSeek V4 thinking-mode requires
|
|
273
|
+
* `reasoning_content` to be echoed back on assistant turns that
|
|
274
|
+
* triggered tool_calls; LangChain's converter strips it, the next
|
|
275
|
+
* round-trip fails with "400 The reasoning_content in the thinking
|
|
276
|
+
* mode must be passed back to the API." This method preserves every
|
|
277
|
+
* assistant field verbatim across round-trips.
|
|
278
|
+
*
|
|
279
|
+
* Supported providers: any OpenAI-compatible endpoint (openai,
|
|
280
|
+
* deepseek, openrouter, kimi, orbgen). Anthropic uses a different
|
|
281
|
+
* wire format and is intentionally not supported here — use
|
|
282
|
+
* `callWithMessages` for Anthropic.
|
|
283
|
+
*
|
|
284
|
+
* Defaults `parallel_tool_calls: false` — sequential tool dispatch is
|
|
285
|
+
* the protocol-safe baseline. Multi-tool-call assistant messages
|
|
286
|
+
* trigger DeepSeek's "insufficient tool messages" 400 error.
|
|
287
|
+
*/
|
|
288
|
+
callWithTools(options: {
|
|
289
|
+
messages: ReadonlyArray<ChatCompletionMessage>;
|
|
290
|
+
tools: ReadonlyArray<ChatCompletionToolDef>;
|
|
291
|
+
maxTokens?: number;
|
|
292
|
+
parallelToolCalls?: boolean;
|
|
293
|
+
signal?: AbortSignal;
|
|
294
|
+
}): Promise<{
|
|
295
|
+
message: ChatCompletionMessage;
|
|
296
|
+
finishReason: string;
|
|
297
|
+
usage: LLMUsage | null;
|
|
298
|
+
}>;
|
|
299
|
+
/**
|
|
300
|
+
* Stream a raw text response as an async iterator of content chunks.
|
|
301
|
+
* Uses the underlying LangChain model's .stream() method.
|
|
302
|
+
*
|
|
303
|
+
* @param options - System prompt plus full message history
|
|
304
|
+
* @yields LLMStreamChunk with content deltas and a done flag
|
|
305
|
+
*/
|
|
306
|
+
streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
|
|
307
|
+
private isRateLimitError;
|
|
308
|
+
callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
|
|
309
|
+
static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
|
|
310
|
+
}
|
|
311
|
+
declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
|
|
312
|
+
declare function resetSharedLLMClient(provider?: LLMProvider): void;
|
|
313
|
+
declare function getAvailableProvider(): LLMProvider;
|
|
314
|
+
declare function isProviderAvailable(provider: LLMProvider): boolean;
|
|
315
|
+
/**
|
|
316
|
+
* Create an LLM client optimized for requirements analysis.
|
|
317
|
+
*
|
|
318
|
+
* Uses lower temperature (0.3) for more deterministic output.
|
|
319
|
+
* Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
|
|
320
|
+
*
|
|
321
|
+
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
322
|
+
* @returns {LLMClient} Configured LLM client
|
|
323
|
+
*/
|
|
324
|
+
declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
325
|
+
/**
|
|
326
|
+
* Create an LLM client optimized for creative tasks.
|
|
327
|
+
*
|
|
328
|
+
* Uses higher temperature (0.7) for more varied output.
|
|
329
|
+
* Defaults to GPT-4o or DeepSeek Reasoner.
|
|
330
|
+
*
|
|
331
|
+
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
332
|
+
* @returns {LLMClient} Configured LLM client
|
|
333
|
+
*/
|
|
334
|
+
declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
335
|
+
/**
|
|
336
|
+
* Create an LLM client optimized for code fixing.
|
|
337
|
+
*
|
|
338
|
+
* Uses low temperature (0.2) for precise, deterministic fixes.
|
|
339
|
+
* Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
|
|
340
|
+
*
|
|
341
|
+
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
342
|
+
* @returns {LLMClient} Configured LLM client
|
|
343
|
+
*/
|
|
344
|
+
declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
345
|
+
/**
|
|
346
|
+
* Create a DeepSeek LLM client.
|
|
347
|
+
*
|
|
348
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
349
|
+
* @returns {LLMClient} Configured DeepSeek client
|
|
350
|
+
*/
|
|
351
|
+
declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
352
|
+
/**
|
|
353
|
+
* Create an OpenAI LLM client.
|
|
354
|
+
*
|
|
355
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
356
|
+
* @returns {LLMClient} Configured OpenAI client
|
|
357
|
+
*/
|
|
358
|
+
declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
359
|
+
/**
|
|
360
|
+
* Create an Anthropic LLM client.
|
|
361
|
+
*
|
|
362
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
363
|
+
* @returns {LLMClient} Configured Anthropic client
|
|
364
|
+
*/
|
|
365
|
+
declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
366
|
+
/**
|
|
367
|
+
* Create a Kimi LLM client.
|
|
368
|
+
*
|
|
369
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
370
|
+
* @returns {LLMClient} Configured Kimi client
|
|
371
|
+
*/
|
|
372
|
+
declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
373
|
+
/**
|
|
374
|
+
* Create an OpenRouter LLM client.
|
|
375
|
+
*
|
|
376
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
377
|
+
* @returns {LLMClient} Configured OpenRouter client
|
|
378
|
+
*/
|
|
379
|
+
declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
380
|
+
/**
|
|
381
|
+
* Create a Zhipu (GLM) LLM client via OpenRouter.
|
|
382
|
+
*
|
|
383
|
+
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
384
|
+
* @returns {LLMClient} Configured Zhipu client
|
|
385
|
+
*/
|
|
386
|
+
declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
387
|
+
|
|
388
|
+
export { ANTHROPIC_MODELS as A, getAvailableProvider as B, type CacheAwareLLMCallOptions as C, DEEPSEEK_MODELS as D, getSharedLLMClient as E, isProviderAvailable as F, parseChatCompletionResponse as G, resetSharedLLMClient as H, KIMI_MODELS as K, type LLMCallOptions as L, OPENAI_MODELS as O, type ProviderConfig as P, type CacheableBlock as a, type ChatCompletionChoice as b, type ChatCompletionMessage as c, type ChatCompletionResponse as d, type ChatCompletionRole as e, type ChatCompletionToolCall as f, type ChatCompletionToolDef as g, type ChatCompletionUsage as h, LLMClient as i, type LLMClientOptions as j, type LLMFinishReason as k, type LLMProvider as l, type LLMResponse as m, type LLMStreamChunk as n, type LLMStreamOptions as o, type LLMUsage as p, OPENROUTER_MODELS as q, createAnthropicClient as r, createCreativeClient as s, createDeepSeekClient as t, createFixClient as u, createKimiClient as v, createOpenAIClient as w, createOpenRouterClient as x, createRequirementsClient as y, createZhipuClient as z };
|
package/dist/client.d.ts
CHANGED
|
@@ -1,293 +1,5 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
|
|
6
|
-
type ChatModel = ChatOpenAI | ChatAnthropic;
|
|
7
|
-
type LLMProvider = 'openai' | 'deepseek' | 'anthropic' | 'kimi' | 'openrouter' | 'orbgen';
|
|
8
|
-
interface ProviderConfig {
|
|
9
|
-
apiKey: string;
|
|
10
|
-
baseUrl?: string;
|
|
11
|
-
defaultModel: string;
|
|
12
|
-
}
|
|
13
|
-
interface LLMClientOptions {
|
|
14
|
-
provider?: LLMProvider;
|
|
15
|
-
model?: string;
|
|
16
|
-
temperature?: number;
|
|
17
|
-
streaming?: boolean;
|
|
18
|
-
rateLimiter?: RateLimiterOptions;
|
|
19
|
-
useGlobalRateLimiter?: boolean;
|
|
20
|
-
trackTokens?: boolean;
|
|
21
|
-
}
|
|
22
|
-
interface LLMCallOptions<T = unknown> {
|
|
23
|
-
systemPrompt: string;
|
|
24
|
-
userPrompt: string;
|
|
25
|
-
schema?: z.ZodSchema<T>;
|
|
26
|
-
maxRetries?: number;
|
|
27
|
-
retryWithContext?: boolean;
|
|
28
|
-
maxTokens?: number;
|
|
29
|
-
skipSchemaValidation?: boolean;
|
|
30
|
-
temperature?: number;
|
|
31
|
-
}
|
|
32
|
-
interface CacheableBlock {
|
|
33
|
-
type: 'text';
|
|
34
|
-
text: string;
|
|
35
|
-
cache_control?: {
|
|
36
|
-
type: 'ephemeral';
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
interface CacheAwareLLMCallOptions<T = unknown> extends LLMCallOptions<T> {
|
|
40
|
-
systemBlocks?: CacheableBlock[];
|
|
41
|
-
userBlocks?: CacheableBlock[];
|
|
42
|
-
rawText?: boolean;
|
|
43
|
-
}
|
|
44
|
-
interface LLMUsage {
|
|
45
|
-
promptTokens: number;
|
|
46
|
-
completionTokens: number;
|
|
47
|
-
totalTokens: number;
|
|
48
|
-
}
|
|
49
|
-
type LLMFinishReason = 'stop' | 'length' | 'content_filter' | 'tool_calls' | null;
|
|
50
|
-
interface LLMResponse<T> {
|
|
51
|
-
data: T;
|
|
52
|
-
raw: string;
|
|
53
|
-
finishReason: LLMFinishReason;
|
|
54
|
-
usage: LLMUsage | null;
|
|
55
|
-
}
|
|
56
|
-
interface LLMStreamOptions {
|
|
57
|
-
systemPrompt: string;
|
|
58
|
-
messages: Array<{
|
|
59
|
-
role: 'system' | 'user' | 'assistant';
|
|
60
|
-
content: string;
|
|
61
|
-
}>;
|
|
62
|
-
maxTokens?: number;
|
|
63
|
-
temperature?: number;
|
|
64
|
-
}
|
|
65
|
-
interface LLMStreamChunk {
|
|
66
|
-
content: string;
|
|
67
|
-
done: boolean;
|
|
68
|
-
}
|
|
69
|
-
declare const DEEPSEEK_MODELS: {
|
|
70
|
-
readonly CHAT: "deepseek-chat";
|
|
71
|
-
readonly CODER: "deepseek-coder";
|
|
72
|
-
readonly REASONER: "deepseek-reasoner";
|
|
73
|
-
readonly V4_PRO: "deepseek-v4-pro";
|
|
74
|
-
readonly V4_FLASH: "deepseek-v4-flash";
|
|
75
|
-
};
|
|
76
|
-
declare const OPENAI_MODELS: {
|
|
77
|
-
readonly GPT4O: "gpt-4o";
|
|
78
|
-
readonly GPT4O_MINI: "gpt-4o-mini";
|
|
79
|
-
readonly GPT4_TURBO: "gpt-4-turbo";
|
|
80
|
-
readonly GPT35_TURBO: "gpt-3.5-turbo";
|
|
81
|
-
readonly GPT_5_1: "gpt-5.1";
|
|
82
|
-
};
|
|
83
|
-
declare const ANTHROPIC_MODELS: {
|
|
84
|
-
readonly CLAUDE_SONNET_4_5: "claude-sonnet-4-5-20250929";
|
|
85
|
-
readonly CLAUDE_SONNET_4: "claude-sonnet-4-20250514";
|
|
86
|
-
readonly CLAUDE_OPUS_4_5: "claude-opus-4-5-20250929";
|
|
87
|
-
readonly CLAUDE_3_5_HAIKU: "claude-3-5-haiku-20241022";
|
|
88
|
-
};
|
|
89
|
-
declare const KIMI_MODELS: {
|
|
90
|
-
readonly K2_5: "kimi-k2.5";
|
|
91
|
-
};
|
|
92
|
-
declare const OPENROUTER_MODELS: {
|
|
93
|
-
readonly QWEN_2_5_72B: "qwen/qwen-2.5-72b-instruct";
|
|
94
|
-
readonly QWEN_2_5_CODER_32B: "qwen/qwen-2.5-coder-32b-instruct";
|
|
95
|
-
readonly QWEN_3_235B: "qwen/qwen3-235b-a22b";
|
|
96
|
-
readonly GEMMA_3_4B: "google/gemma-3-4b-it";
|
|
97
|
-
readonly GEMMA_3_12B: "google/gemma-3-12b-it";
|
|
98
|
-
readonly GEMMA_3_27B: "google/gemma-3-27b-it";
|
|
99
|
-
readonly MINISTRAL_8B: "mistralai/ministral-8b-2512";
|
|
100
|
-
readonly MISTRAL_SMALL_3_1: "mistralai/mistral-small-3.1-24b-instruct";
|
|
101
|
-
readonly MISTRAL_MEDIUM_3_1: "mistralai/mistral-medium-3.1";
|
|
102
|
-
readonly LLAMA_3_3_70B: "meta-llama/llama-3.3-70b-instruct";
|
|
103
|
-
readonly LLAMA_3_1_405B: "meta-llama/llama-3.1-405b-instruct";
|
|
104
|
-
readonly LLAMA_4_MAVERICK: "meta-llama/llama-4-maverick";
|
|
105
|
-
readonly LLAMA_4_SCOUT: "meta-llama/llama-4-scout";
|
|
106
|
-
readonly KIMI_K2: "moonshotai/kimi-k2";
|
|
107
|
-
readonly GLM_4_7: "z-ai/glm-4.7";
|
|
108
|
-
};
|
|
109
|
-
declare class LLMClient {
|
|
110
|
-
private model;
|
|
111
|
-
private rateLimiter;
|
|
112
|
-
private tokenTracker;
|
|
113
|
-
private modelName;
|
|
114
|
-
private provider;
|
|
115
|
-
private providerConfig;
|
|
116
|
-
private temperature;
|
|
117
|
-
private streaming;
|
|
118
|
-
constructor(options?: LLMClientOptions);
|
|
119
|
-
private usesMaxCompletionTokens;
|
|
120
|
-
private createModel;
|
|
121
|
-
private getModelWithOptions;
|
|
122
|
-
/**
|
|
123
|
-
* Check if this model is a Qwen3.5 thinking model.
|
|
124
|
-
* These models burn all output tokens on internal reasoning
|
|
125
|
-
* unless thinking is explicitly disabled via /no_think prefix.
|
|
126
|
-
*/
|
|
127
|
-
private isQwenThinkingModel;
|
|
128
|
-
/**
|
|
129
|
-
* Prepare user prompt with provider-specific adjustments.
|
|
130
|
-
* Qwen3.5 models require /no_think to disable reasoning mode.
|
|
131
|
-
*/
|
|
132
|
-
private prepareUserPrompt;
|
|
133
|
-
getProvider(): LLMProvider;
|
|
134
|
-
getModelName(): string;
|
|
135
|
-
getModel(): ChatModel;
|
|
136
|
-
getRateLimiterStatus(): {
|
|
137
|
-
queueLength: number;
|
|
138
|
-
activeRequests: number;
|
|
139
|
-
minuteTokens: number;
|
|
140
|
-
secondTokens: number;
|
|
141
|
-
backoffMs: number;
|
|
142
|
-
};
|
|
143
|
-
getTokenUsage(): TokenUsage | null;
|
|
144
|
-
call<T>(options: LLMCallOptions<T>): Promise<T>;
|
|
145
|
-
callWithMetadata<T>(options: LLMCallOptions<T>): Promise<LLMResponse<T>>;
|
|
146
|
-
private extractFinishReason;
|
|
147
|
-
/**
|
|
148
|
-
* Default hard timeout for raw LLM invocations. The langchain ChatOpenAI
|
|
149
|
-
* `timeout` option doesn't reliably fire when an HTTP connection
|
|
150
|
-
* half-opens (the response stream can sit open indefinitely), so every
|
|
151
|
-
* raw call gets wrapped in an AbortController-driven deadline. Callers
|
|
152
|
-
* can override by passing their own `signal`.
|
|
153
|
-
*/
|
|
154
|
-
private static readonly DEFAULT_RAW_TIMEOUT_MS;
|
|
155
|
-
/**
|
|
156
|
-
* Wrap a `model.invoke(...)` with start/finish/error logging and a hard
|
|
157
|
-
* abort-driven timeout. Returns whatever the underlying `invoke` returns.
|
|
158
|
-
*
|
|
159
|
-
* The raw paths (`callRaw`, `callRawWithMetadata`, `callWithMessages`)
|
|
160
|
-
* used to be silent — when a connection half-opened, callers sat
|
|
161
|
-
* indefinitely with no feedback. This wrapper makes hangs visible (start
|
|
162
|
-
* + duration logs) and bounded (timeout fires with a clear error).
|
|
163
|
-
*/
|
|
164
|
-
private invokeWithObservability;
|
|
165
|
-
callRaw(options: {
|
|
166
|
-
systemPrompt: string;
|
|
167
|
-
userPrompt: string;
|
|
168
|
-
maxTokens?: number;
|
|
169
|
-
signal?: AbortSignal;
|
|
170
|
-
}): Promise<string>;
|
|
171
|
-
callRawWithMetadata(options: {
|
|
172
|
-
systemPrompt: string;
|
|
173
|
-
userPrompt: string;
|
|
174
|
-
maxTokens?: number;
|
|
175
|
-
signal?: AbortSignal;
|
|
176
|
-
}): Promise<Omit<LLMResponse<string>, 'data'> & {
|
|
177
|
-
raw: string;
|
|
178
|
-
}>;
|
|
179
|
-
/**
|
|
180
|
-
* Call the LLM with a structured messages array.
|
|
181
|
-
*
|
|
182
|
-
* Unlike callRawWithMetadata (which takes systemPrompt + userPrompt strings),
|
|
183
|
-
* this accepts a full conversation history with proper role separation.
|
|
184
|
-
* This enables:
|
|
185
|
-
* - Anthropic prompt caching on message boundaries (not just system prompt)
|
|
186
|
-
* - Proper tool_use/tool_result role handling across providers
|
|
187
|
-
* - Reduced token waste from string concatenation
|
|
188
|
-
*
|
|
189
|
-
* All providers support the messages format:
|
|
190
|
-
* - Anthropic: native messages API with cache_control
|
|
191
|
-
* - DeepSeek: OpenAI-compatible messages via ChatOpenAI
|
|
192
|
-
* - OpenRouter: OpenAI-compatible messages via ChatOpenAI
|
|
193
|
-
*/
|
|
194
|
-
callWithMessages(options: {
|
|
195
|
-
messages: Array<{
|
|
196
|
-
role: string;
|
|
197
|
-
content: string;
|
|
198
|
-
}>;
|
|
199
|
-
maxTokens?: number;
|
|
200
|
-
signal?: AbortSignal;
|
|
201
|
-
}): Promise<Omit<LLMResponse<string>, 'data'> & {
|
|
202
|
-
raw: string;
|
|
203
|
-
}>;
|
|
204
|
-
/**
|
|
205
|
-
* Stream a raw text response as an async iterator of content chunks.
|
|
206
|
-
* Uses the underlying LangChain model's .stream() method.
|
|
207
|
-
*
|
|
208
|
-
* @param options - System prompt plus full message history
|
|
209
|
-
* @yields LLMStreamChunk with content deltas and a done flag
|
|
210
|
-
*/
|
|
211
|
-
streamRaw(options: LLMStreamOptions): AsyncGenerator<LLMStreamChunk>;
|
|
212
|
-
private isRateLimitError;
|
|
213
|
-
callWithCache<T>(options: CacheAwareLLMCallOptions<T>): Promise<LLMResponse<T>>;
|
|
214
|
-
static cacheableBlock(text: string, cache?: boolean): CacheableBlock;
|
|
215
|
-
}
|
|
216
|
-
declare function getSharedLLMClient(options?: LLMClientOptions): LLMClient;
|
|
217
|
-
declare function resetSharedLLMClient(provider?: LLMProvider): void;
|
|
218
|
-
declare function getAvailableProvider(): LLMProvider;
|
|
219
|
-
declare function isProviderAvailable(provider: LLMProvider): boolean;
|
|
220
|
-
/**
|
|
221
|
-
* Create an LLM client optimized for requirements analysis.
|
|
222
|
-
*
|
|
223
|
-
* Uses lower temperature (0.3) for more deterministic output.
|
|
224
|
-
* Defaults to GPT-5.1 for OpenAI or DeepSeek Chat.
|
|
225
|
-
*
|
|
226
|
-
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
227
|
-
* @returns {LLMClient} Configured LLM client
|
|
228
|
-
*/
|
|
229
|
-
declare function createRequirementsClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
230
|
-
/**
|
|
231
|
-
* Create an LLM client optimized for creative tasks.
|
|
232
|
-
*
|
|
233
|
-
* Uses higher temperature (0.7) for more varied output.
|
|
234
|
-
* Defaults to GPT-4o or DeepSeek Reasoner.
|
|
235
|
-
*
|
|
236
|
-
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
237
|
-
* @returns {LLMClient} Configured LLM client
|
|
238
|
-
*/
|
|
239
|
-
declare function createCreativeClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
240
|
-
/**
|
|
241
|
-
* Create an LLM client optimized for code fixing.
|
|
242
|
-
*
|
|
243
|
-
* Uses low temperature (0.2) for precise, deterministic fixes.
|
|
244
|
-
* Defaults to GPT-4o Mini or DeepSeek Chat for cost efficiency.
|
|
245
|
-
*
|
|
246
|
-
* @param {Partial<LLMClientOptions>} [options] - Optional client configuration
|
|
247
|
-
* @returns {LLMClient} Configured LLM client
|
|
248
|
-
*/
|
|
249
|
-
declare function createFixClient(options?: Partial<LLMClientOptions>): LLMClient;
|
|
250
|
-
/**
|
|
251
|
-
* Create a DeepSeek LLM client.
|
|
252
|
-
*
|
|
253
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
254
|
-
* @returns {LLMClient} Configured DeepSeek client
|
|
255
|
-
*/
|
|
256
|
-
declare function createDeepSeekClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
257
|
-
/**
|
|
258
|
-
* Create an OpenAI LLM client.
|
|
259
|
-
*
|
|
260
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
261
|
-
* @returns {LLMClient} Configured OpenAI client
|
|
262
|
-
*/
|
|
263
|
-
declare function createOpenAIClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
264
|
-
/**
|
|
265
|
-
* Create an Anthropic LLM client.
|
|
266
|
-
*
|
|
267
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
268
|
-
* @returns {LLMClient} Configured Anthropic client
|
|
269
|
-
*/
|
|
270
|
-
declare function createAnthropicClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
271
|
-
/**
|
|
272
|
-
* Create a Kimi LLM client.
|
|
273
|
-
*
|
|
274
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
275
|
-
* @returns {LLMClient} Configured Kimi client
|
|
276
|
-
*/
|
|
277
|
-
declare function createKimiClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
278
|
-
/**
|
|
279
|
-
* Create an OpenRouter LLM client.
|
|
280
|
-
*
|
|
281
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
282
|
-
* @returns {LLMClient} Configured OpenRouter client
|
|
283
|
-
*/
|
|
284
|
-
declare function createOpenRouterClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
285
|
-
/**
|
|
286
|
-
* Create a Zhipu (GLM) LLM client via OpenRouter.
|
|
287
|
-
*
|
|
288
|
-
* @param {Partial<Omit<LLMClientOptions, 'provider'>>} [options] - Optional client configuration
|
|
289
|
-
* @returns {LLMClient} Configured Zhipu client
|
|
290
|
-
*/
|
|
291
|
-
declare function createZhipuClient(options?: Partial<Omit<LLMClientOptions, 'provider'>>): LLMClient;
|
|
292
|
-
|
|
293
|
-
export { ANTHROPIC_MODELS, type CacheAwareLLMCallOptions, type CacheableBlock, DEEPSEEK_MODELS, KIMI_MODELS, type LLMCallOptions, LLMClient, type LLMClientOptions, type LLMFinishReason, type LLMProvider, type LLMResponse, type LLMStreamChunk, type LLMStreamOptions, type LLMUsage, OPENAI_MODELS, OPENROUTER_MODELS, type ProviderConfig, createAnthropicClient, createCreativeClient, createDeepSeekClient, createFixClient, createKimiClient, createOpenAIClient, createOpenRouterClient, createRequirementsClient, createZhipuClient, getAvailableProvider, getSharedLLMClient, isProviderAvailable, resetSharedLLMClient };
|
|
1
|
+
import './rate-limiter-BqWOhaXY.js';
|
|
2
|
+
import '@langchain/openai';
|
|
3
|
+
import '@langchain/anthropic';
|
|
4
|
+
import 'zod';
|
|
5
|
+
export { A as ANTHROPIC_MODELS, C as CacheAwareLLMCallOptions, a as CacheableBlock, D as DEEPSEEK_MODELS, K as KIMI_MODELS, L as LLMCallOptions, i as LLMClient, j as LLMClientOptions, k as LLMFinishReason, l as LLMProvider, m as LLMResponse, n as LLMStreamChunk, o as LLMStreamOptions, p as LLMUsage, O as OPENAI_MODELS, q as OPENROUTER_MODELS, P as ProviderConfig, r as createAnthropicClient, s as createCreativeClient, t as createDeepSeekClient, u as createFixClient, v as createKimiClient, w as createOpenAIClient, x as createOpenRouterClient, y as createRequirementsClient, z as createZhipuClient, B as getAvailableProvider, E as getSharedLLMClient, F as isProviderAvailable, H as resetSharedLLMClient } from './client-C_CnNdQx.js';
|