universal-llm-client 4.3.0 → 4.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +34 -19
- package/README.md +62 -11
- package/dist/ai-model.d.ts +12 -2
- package/dist/ai-model.js +36 -2
- package/dist/auditor.d.ts +0 -1
- package/dist/auditor.js +0 -1
- package/dist/client.d.ts +0 -1
- package/dist/client.js +0 -1
- package/dist/gemma-channel.d.ts +13 -0
- package/dist/gemma-channel.js +37 -0
- package/dist/gemma-diffusion.d.ts +48 -0
- package/dist/gemma-diffusion.js +146 -0
- package/dist/http.d.ts +4 -1
- package/dist/http.js +14 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +4 -1
- package/dist/interfaces.d.ts +163 -8
- package/dist/interfaces.js +0 -1
- package/dist/mcp.d.ts +0 -1
- package/dist/mcp.js +0 -1
- package/dist/providers/anthropic.d.ts +0 -1
- package/dist/providers/anthropic.js +28 -4
- package/dist/providers/google.d.ts +22 -2
- package/dist/providers/google.js +223 -14
- package/dist/providers/index.d.ts +0 -1
- package/dist/providers/index.js +0 -1
- package/dist/providers/ollama.d.ts +2 -1
- package/dist/providers/ollama.js +59 -31
- package/dist/providers/openai.d.ts +16 -1
- package/dist/providers/openai.js +488 -81
- package/dist/router.d.ts +2 -1
- package/dist/router.js +4 -1
- package/dist/stream-decoder.d.ts +12 -1
- package/dist/stream-decoder.js +182 -6
- package/dist/structured-output.d.ts +0 -1
- package/dist/structured-output.js +0 -1
- package/dist/thinking.d.ts +35 -0
- package/dist/thinking.js +51 -0
- package/dist/tools.d.ts +0 -1
- package/dist/tools.js +0 -1
- package/dist/zod-adapter.d.ts +0 -1
- package/dist/zod-adapter.js +0 -1
- package/package.json +3 -1
- package/dist/ai-model.d.ts.map +0 -1
- package/dist/ai-model.js.map +0 -1
- package/dist/auditor.d.ts.map +0 -1
- package/dist/auditor.js.map +0 -1
- package/dist/client.d.ts.map +0 -1
- package/dist/client.js.map +0 -1
- package/dist/http.d.ts.map +0 -1
- package/dist/http.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/interfaces.d.ts.map +0 -1
- package/dist/interfaces.js.map +0 -1
- package/dist/mcp.d.ts.map +0 -1
- package/dist/mcp.js.map +0 -1
- package/dist/providers/anthropic.d.ts.map +0 -1
- package/dist/providers/anthropic.js.map +0 -1
- package/dist/providers/google.d.ts.map +0 -1
- package/dist/providers/google.js.map +0 -1
- package/dist/providers/index.d.ts.map +0 -1
- package/dist/providers/index.js.map +0 -1
- package/dist/providers/ollama.d.ts.map +0 -1
- package/dist/providers/ollama.js.map +0 -1
- package/dist/providers/openai.d.ts.map +0 -1
- package/dist/providers/openai.js.map +0 -1
- package/dist/router.d.ts.map +0 -1
- package/dist/router.js.map +0 -1
- package/dist/stream-decoder.d.ts.map +0 -1
- package/dist/stream-decoder.js.map +0 -1
- package/dist/structured-output.d.ts.map +0 -1
- package/dist/structured-output.js.map +0 -1
- package/dist/tools.d.ts.map +0 -1
- package/dist/tools.js.map +0 -1
- package/dist/zod-adapter.d.ts.map +0 -1
- package/dist/zod-adapter.js.map +0 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,11 +7,11 @@
|
|
|
7
7
|
* @module universal-llm-client
|
|
8
8
|
*/
|
|
9
9
|
export { AIModel } from './ai-model.js';
|
|
10
|
-
export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
|
|
10
|
+
export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ThinkingLevel, type DeepResearchOptions, type DeepResearchResult, type DeepResearchStep, type DeepResearchEvent, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
|
|
11
11
|
export { type Auditor, type AuditEvent, type AuditEventType, NoopAuditor, ConsoleAuditor, BufferedAuditor, } from './auditor.js';
|
|
12
12
|
export { type StreamDecoder, type DecodedEvent, type DecoderCallback, type DecoderType, type DecoderOptions, type DecoderFactory, createDecoder, registerDecoder, getRegisteredDecoders, PassthroughDecoder, StandardChatDecoder, InterleavedReasoningDecoder, } from './stream-decoder.js';
|
|
13
13
|
export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } from './tools.js';
|
|
14
14
|
export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, type HttpRequestOptions, type HttpResponse, } from './http.js';
|
|
15
|
+
export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, type GemmaDiffusionParsed, type GemmaParsedToolCall, } from './gemma-diffusion.js';
|
|
15
16
|
export { MCPToolBridge, type MCPBridgeConfig, type MCPServerConfig, type MCPTool, } from './mcp.js';
|
|
16
17
|
export { StructuredOutputError, type StructuredOutputErrorOptions, type StructuredOutputOptions, type StructuredOutputResult, type StructuredOutputSuccess, type StructuredOutputFailure, type JSONSchema, type SchemaProvider, type ProviderSchema, type SchemaConfig, isStructuredOutputSuccess, isStructuredOutputFailure, normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonSchema, getJsonSchemaFromConfig, parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences, StreamingJsonParser, type StreamingStructuredResult, } from './structured-output.js';
|
|
17
|
-
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.js
CHANGED
|
@@ -35,6 +35,10 @@ export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } fr
|
|
|
35
35
|
// ============================================================================
|
|
36
36
|
export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, } from './http.js';
|
|
37
37
|
// ============================================================================
|
|
38
|
+
// DiffusionGemma Native Protocol (vLLM without server-side parsers)
|
|
39
|
+
// ============================================================================
|
|
40
|
+
export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, } from './gemma-diffusion.js';
|
|
41
|
+
// ============================================================================
|
|
38
42
|
// MCP Integration
|
|
39
43
|
// ============================================================================
|
|
40
44
|
export { MCPToolBridge, } from './mcp.js';
|
|
@@ -48,4 +52,3 @@ normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonS
|
|
|
48
52
|
parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences,
|
|
49
53
|
// Streaming parser
|
|
50
54
|
StreamingJsonParser, } from './structured-output.js';
|
|
51
|
-
//# sourceMappingURL=index.js.map
|
package/dist/interfaces.d.ts
CHANGED
|
@@ -43,7 +43,45 @@ export interface ProviderConfig {
|
|
|
43
43
|
region?: string;
|
|
44
44
|
/** Google API version (default: "v1beta") */
|
|
45
45
|
apiVersion?: 'v1' | 'v1beta';
|
|
46
|
+
/**
|
|
47
|
+
* Extra headers merged into requests, applied by providers that use
|
|
48
|
+
* `buildHeaders` — **OpenAI-compatible and Ollama**. Google/Vertex and
|
|
49
|
+
* Anthropic build their own auth headers and ignore this. Useful for Azure
|
|
50
|
+
* (api-key), custom gateways, or non-standard auth. Merged after the default
|
|
51
|
+
* auth header (later entries win).
|
|
52
|
+
*/
|
|
53
|
+
headers?: Record<string, string>;
|
|
54
|
+
/**
|
|
55
|
+
* Extra query parameters appended to request URLs — **OpenAI-compatible
|
|
56
|
+
* provider only**. Useful for Azure OpenAI (e.g. { 'api-version': '2024-10-21' }).
|
|
57
|
+
*/
|
|
58
|
+
queryParams?: Record<string, string>;
|
|
59
|
+
/**
|
|
60
|
+
* Override the name of the header that carries the API key (default:
|
|
61
|
+
* "Authorization") — **OpenAI-compatible and Ollama only** (via `buildHeaders`).
|
|
62
|
+
* Common alternative for Azure and some gateways: "api-key".
|
|
63
|
+
*/
|
|
64
|
+
authHeader?: string;
|
|
65
|
+
/**
|
|
66
|
+
* Prefix placed before the apiKey value in the auth header (OpenAI-compatible
|
|
67
|
+
* and Ollama only). Default: "Bearer " when authHeader is Authorization (or
|
|
68
|
+
* unset), otherwise "". Set to "" explicitly for "api-key: <yourkey>" style auth.
|
|
69
|
+
*/
|
|
70
|
+
authPrefix?: string;
|
|
71
|
+
/**
|
|
72
|
+
* For OpenAI-compatible providers only: the URL path segment to append after the base URL.
|
|
73
|
+
* Default: "/v1".
|
|
74
|
+
* Set to "" (or "/") to disable the automatic append. This is required when supplying
|
|
75
|
+
* a full Azure deployment URL such as ".../deployments/my-deploy".
|
|
76
|
+
*/
|
|
77
|
+
apiBasePath?: string;
|
|
46
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Unified reasoning-effort level. Mapped to each provider's native control:
|
|
81
|
+
* Gemini 3.x `thinkingConfig.thinkingLevel`, OpenAI `reasoning_effort`,
|
|
82
|
+
* Gemini 2.5 `thinkingBudget`, Anthropic `budget_tokens`, vLLM/Ollama on/off.
|
|
83
|
+
*/
|
|
84
|
+
export type ThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
|
|
47
85
|
export interface AIModelConfig {
|
|
48
86
|
/** Model name (used across all providers unless overridden) */
|
|
49
87
|
model: string;
|
|
@@ -51,8 +89,8 @@ export interface AIModelConfig {
|
|
|
51
89
|
providers: ProviderConfig[];
|
|
52
90
|
/** Default parameters for all requests (temperature, top_p, etc.) */
|
|
53
91
|
defaultParameters?: Record<string, unknown>;
|
|
54
|
-
/** Enable thinking/reasoning
|
|
55
|
-
thinking?: boolean;
|
|
92
|
+
/** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
|
|
93
|
+
thinking?: boolean | ThinkingLevel;
|
|
56
94
|
/** Request timeout in ms (default: 30000) */
|
|
57
95
|
timeout?: number;
|
|
58
96
|
/** Retries per provider before failover (default: 2) */
|
|
@@ -73,8 +111,8 @@ export interface LLMClientOptions {
|
|
|
73
111
|
modelType?: AIModelType;
|
|
74
112
|
/** Default parameters for requests */
|
|
75
113
|
defaultParameters?: Record<string, unknown>;
|
|
76
|
-
/** Enable thinking/reasoning
|
|
77
|
-
thinking?: boolean;
|
|
114
|
+
/** Enable thinking/reasoning — `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
|
|
115
|
+
thinking?: boolean | ThinkingLevel;
|
|
78
116
|
/** Request timeout in ms */
|
|
79
117
|
timeout?: number;
|
|
80
118
|
/** Number of retries for failed requests */
|
|
@@ -87,6 +125,31 @@ export interface LLMClientOptions {
|
|
|
87
125
|
region?: string;
|
|
88
126
|
/** Google API version */
|
|
89
127
|
apiVersion?: 'v1' | 'v1beta';
|
|
128
|
+
/**
|
|
129
|
+
* Force the DiffusionGemma native channel protocol on/off for
|
|
130
|
+
* OpenAI-compatible backends (skip_special_tokens:false + client-side
|
|
131
|
+
* reasoning/tool-call parsing). Auto-detected from the model name when
|
|
132
|
+
* omitted. See gemma-diffusion.ts.
|
|
133
|
+
*/
|
|
134
|
+
gemmaNativeProtocol?: boolean;
|
|
135
|
+
/**
|
|
136
|
+
* Extra headers merged for every request from this provider instance.
|
|
137
|
+
* Populated from ProviderConfig.headers for advanced auth / gateway scenarios
|
|
138
|
+
* (Azure api-key style, custom x- headers, etc.).
|
|
139
|
+
*/
|
|
140
|
+
extraHeaders?: Record<string, string>;
|
|
141
|
+
/** Extra query parameters appended to request URLs (from ProviderConfig.queryParams). */
|
|
142
|
+
queryParams?: Record<string, string>;
|
|
143
|
+
/** Auth header name override (from ProviderConfig.authHeader). */
|
|
144
|
+
authHeader?: string;
|
|
145
|
+
/** Auth value prefix (from ProviderConfig.authPrefix). */
|
|
146
|
+
authPrefix?: string;
|
|
147
|
+
/**
|
|
148
|
+
* For openai-compatible clients: the sub-path to append (from ProviderConfig.apiBasePath).
|
|
149
|
+
* Defaults to "/v1"; `undefined` keeps that default. Set to "" or "/" to disable
|
|
150
|
+
* the append (when the base URL already contains the full path).
|
|
151
|
+
*/
|
|
152
|
+
apiBasePath?: string;
|
|
90
153
|
}
|
|
91
154
|
export interface LLMTextContent {
|
|
92
155
|
type: 'text';
|
|
@@ -235,6 +298,14 @@ export interface ChatOptions {
|
|
|
235
298
|
temperature?: number;
|
|
236
299
|
/** Max tokens to generate */
|
|
237
300
|
maxTokens?: number;
|
|
301
|
+
/**
|
|
302
|
+
* Enable/disable/level model thinking for this request, overriding the
|
|
303
|
+
* model-level `thinking` config. `true`/`false` or a level
|
|
304
|
+
* ('minimal' | 'low' | 'medium' | 'high'). Mapped per provider: Gemini
|
|
305
|
+
* `thinkingLevel`/`thinkingBudget`, OpenAI `reasoning_effort`, vLLM
|
|
306
|
+
* `enable_thinking`, Anthropic `budget_tokens`, Ollama `think`.
|
|
307
|
+
*/
|
|
308
|
+
thinking?: boolean | ThinkingLevel;
|
|
238
309
|
/** Tool definitions (auto-populated from registry if not set) */
|
|
239
310
|
tools?: LLMToolDefinition[];
|
|
240
311
|
/** Tool choice mode */
|
|
@@ -245,7 +316,11 @@ export interface ChatOptions {
|
|
|
245
316
|
signal?: AbortSignal;
|
|
246
317
|
/** Enable/disable tool execution for chatWithTools */
|
|
247
318
|
executeTools?: boolean;
|
|
248
|
-
/**
|
|
319
|
+
/**
|
|
320
|
+
* Enable provider-side prompt caching when supported.
|
|
321
|
+
* - Anthropic: Adds cache_control: { type: 'ephemeral' } to the system prompt block (most common high-impact pattern).
|
|
322
|
+
* - Other providers: May be passed through via parameters/headers or ignored; consult provider docs.
|
|
323
|
+
*/
|
|
249
324
|
enablePromptCaching?: boolean;
|
|
250
325
|
/** Maximum tool execution rounds (default: 10) */
|
|
251
326
|
maxIterations?: number;
|
|
@@ -332,9 +407,23 @@ export interface TokenUsageInfo {
|
|
|
332
407
|
* via `DecodedEvent { type: 'thinking' }`); consult the provider.
|
|
333
408
|
*/
|
|
334
409
|
reasoningTokens?: number;
|
|
410
|
+
/**
|
|
411
|
+
* Total request duration in milliseconds. Server-measured where the
|
|
412
|
+
* provider reports it (Ollama `total_duration`); otherwise client-measured
|
|
413
|
+
* wall-clock (OpenAI-compatible / vLLM return no timing in `usage`).
|
|
414
|
+
*/
|
|
415
|
+
durationMs?: number;
|
|
416
|
+
/**
|
|
417
|
+
* Decode throughput in output tokens/second. Server-precise for Ollama
|
|
418
|
+
* (`eval_count / eval_duration`); derived from `outputTokens / durationMs`
|
|
419
|
+
* for providers without server-side timing (OpenAI-compatible / vLLM).
|
|
420
|
+
*/
|
|
421
|
+
tokensPerSecond?: number;
|
|
335
422
|
}
|
|
336
423
|
export interface LLMChatResponse<T = unknown> {
|
|
337
424
|
message: LLMChatMessage;
|
|
425
|
+
/** Provider finish reason when available (e.g. Ollama done_reason, Google finishReason) */
|
|
426
|
+
finishReason?: string;
|
|
338
427
|
/** Reasoning/thinking content from the model (if supported) */
|
|
339
428
|
reasoning?: string;
|
|
340
429
|
/** Token usage info */
|
|
@@ -375,9 +464,16 @@ export interface OllamaResponse {
|
|
|
375
464
|
tool_calls?: LLMToolCall[];
|
|
376
465
|
};
|
|
377
466
|
done: boolean;
|
|
467
|
+
done_reason?: string;
|
|
468
|
+
/** Total request time in nanoseconds. */
|
|
469
|
+
total_duration?: number;
|
|
470
|
+
/** Model load time in nanoseconds. */
|
|
471
|
+
load_duration?: number;
|
|
378
472
|
prompt_eval_count?: number;
|
|
379
473
|
eval_count?: number;
|
|
474
|
+
/** Prompt evaluation time in nanoseconds. */
|
|
380
475
|
prompt_eval_duration?: number;
|
|
476
|
+
/** Generation time in nanoseconds. */
|
|
381
477
|
eval_duration?: number;
|
|
382
478
|
}
|
|
383
479
|
export interface OpenAIResponse {
|
|
@@ -390,6 +486,13 @@ export interface OpenAIResponse {
|
|
|
390
486
|
message: {
|
|
391
487
|
role: string;
|
|
392
488
|
content: string | null;
|
|
489
|
+
/**
|
|
490
|
+
* Chain-of-thought from reasoning models exposed via a dedicated
|
|
491
|
+
* field (vLLM `--reasoning-parser`, DeepSeek-R1, etc.). vLLM uses
|
|
492
|
+
* `reasoning_content`; some gateways use `reasoning`.
|
|
493
|
+
*/
|
|
494
|
+
reasoning?: string;
|
|
495
|
+
reasoning_content?: string;
|
|
393
496
|
tool_calls?: LLMToolCall[];
|
|
394
497
|
};
|
|
395
498
|
finish_reason: string;
|
|
@@ -426,8 +529,8 @@ export interface OpenAIModelInfo {
|
|
|
426
529
|
export interface GooglePart {
|
|
427
530
|
text?: string;
|
|
428
531
|
functionCall?: {
|
|
429
|
-
name
|
|
430
|
-
args
|
|
532
|
+
name?: string;
|
|
533
|
+
args?: Record<string, unknown>;
|
|
431
534
|
};
|
|
432
535
|
functionResponse?: {
|
|
433
536
|
name: string;
|
|
@@ -437,6 +540,8 @@ export interface GooglePart {
|
|
|
437
540
|
mimeType: string;
|
|
438
541
|
data: string;
|
|
439
542
|
};
|
|
543
|
+
/** True when this part is a reasoning summary (requires `includeThoughts`). */
|
|
544
|
+
thought?: boolean;
|
|
440
545
|
/** Gemini 3.x thought signature — must be echoed back on functionCall parts */
|
|
441
546
|
thoughtSignature?: string;
|
|
442
547
|
}
|
|
@@ -507,6 +612,57 @@ export interface GoogleResponse {
|
|
|
507
612
|
thoughtsTokenCount?: number;
|
|
508
613
|
};
|
|
509
614
|
}
|
|
615
|
+
/** Options for an agentic Deep Research interaction (Gemini-only). */
|
|
616
|
+
export interface DeepResearchOptions {
|
|
617
|
+
/** Research agent id (default 'deep-research-preview-04-2026'). */
|
|
618
|
+
agent?: string;
|
|
619
|
+
/** Tools the agent may use, e.g. 'google_search', 'url_context', 'code_execution'. */
|
|
620
|
+
tools?: string[];
|
|
621
|
+
/** Emit intermediate reasoning ('auto') or not ('none'). Default 'auto'. */
|
|
622
|
+
thinkingSummaries?: 'auto' | 'none';
|
|
623
|
+
/** Continue a prior interaction (follow-up question). */
|
|
624
|
+
previousInteractionId?: string;
|
|
625
|
+
/** Poll interval in ms while awaiting completion (default 5000). */
|
|
626
|
+
pollIntervalMs?: number;
|
|
627
|
+
/** Overall timeout in ms before giving up the poll loop (default 600000). */
|
|
628
|
+
timeoutMs?: number;
|
|
629
|
+
/** Abort signal forwarded to every request. */
|
|
630
|
+
signal?: AbortSignal;
|
|
631
|
+
}
|
|
632
|
+
/** One intermediate step in a Deep Research interaction. */
|
|
633
|
+
export interface DeepResearchStep {
|
|
634
|
+
type?: string;
|
|
635
|
+
content?: Array<{
|
|
636
|
+
text?: string;
|
|
637
|
+
[k: string]: unknown;
|
|
638
|
+
}>;
|
|
639
|
+
[k: string]: unknown;
|
|
640
|
+
}
|
|
641
|
+
/** Terminal (or last-polled) state of a Deep Research interaction. */
|
|
642
|
+
export interface DeepResearchResult {
|
|
643
|
+
id: string;
|
|
644
|
+
status: 'in_progress' | 'completed' | 'failed' | string;
|
|
645
|
+
/** Final research report (`output_text`) when completed. */
|
|
646
|
+
report?: string;
|
|
647
|
+
steps?: DeepResearchStep[];
|
|
648
|
+
error?: unknown;
|
|
649
|
+
/** The raw last interaction object from the API. */
|
|
650
|
+
raw?: unknown;
|
|
651
|
+
}
|
|
652
|
+
/** Streaming Deep Research event (from `step.delta` updates). */
|
|
653
|
+
export type DeepResearchEvent = {
|
|
654
|
+
type: 'thought';
|
|
655
|
+
content: string;
|
|
656
|
+
} | {
|
|
657
|
+
type: 'text';
|
|
658
|
+
content: string;
|
|
659
|
+
} | {
|
|
660
|
+
type: 'image';
|
|
661
|
+
content: unknown;
|
|
662
|
+
} | {
|
|
663
|
+
type: 'status';
|
|
664
|
+
status: string;
|
|
665
|
+
};
|
|
510
666
|
/** Create a text content part */
|
|
511
667
|
export declare function textContent(text: string): LLMTextContent;
|
|
512
668
|
/** Create an image content part from base64 data or URL */
|
|
@@ -521,4 +677,3 @@ export declare function hasImages(content: LLMMessageContent): boolean;
|
|
|
521
677
|
export declare function audioContent(base64Data: string, mimeType: string): LLMAudioContent;
|
|
522
678
|
/** Check if message content contains audio */
|
|
523
679
|
export declare function hasAudio(content: LLMMessageContent): boolean;
|
|
524
|
-
//# sourceMappingURL=interfaces.d.ts.map
|
package/dist/interfaces.js
CHANGED
package/dist/mcp.d.ts
CHANGED
package/dist/mcp.js
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* - Streaming: content_block_start/delta/stop events with typed deltas
|
|
15
15
|
*/
|
|
16
16
|
import { BaseLLMClient } from '../client.js';
|
|
17
|
+
import { resolveThinking, anthropicThinkingBudget } from '../thinking.js';
|
|
17
18
|
import { httpRequest, httpStream, parseSSE } from '../http.js';
|
|
18
19
|
import { StandardChatDecoder } from '../stream-decoder.js';
|
|
19
20
|
// ============================================================================
|
|
@@ -271,6 +272,17 @@ export class AnthropicClient extends BaseLLMClient {
|
|
|
271
272
|
.map(m => typeof m.content === 'string' ? m.content : this.extractText(m.content))
|
|
272
273
|
.join('\n\n')
|
|
273
274
|
: undefined;
|
|
275
|
+
// Prompt caching support (Anthropic-specific, high impact for long system prompts / RAG)
|
|
276
|
+
let system = systemPrompt;
|
|
277
|
+
if (options?.enablePromptCaching && systemPrompt) {
|
|
278
|
+
system = [
|
|
279
|
+
{
|
|
280
|
+
type: 'text',
|
|
281
|
+
text: systemPrompt,
|
|
282
|
+
cache_control: { type: 'ephemeral' },
|
|
283
|
+
},
|
|
284
|
+
];
|
|
285
|
+
}
|
|
274
286
|
// Convert tools from OpenAI format to Anthropic format
|
|
275
287
|
const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
|
|
276
288
|
const anthropicTools = tools?.map(t => this.convertToolDef(t));
|
|
@@ -285,15 +297,28 @@ export class AnthropicClient extends BaseLLMClient {
|
|
|
285
297
|
else if (options?.toolChoice === 'auto') {
|
|
286
298
|
toolChoice = { type: 'auto' };
|
|
287
299
|
}
|
|
300
|
+
// Unified thinking flag → Anthropic extended thinking. Per-call overrides
|
|
301
|
+
// model config; the level sets `budget_tokens` (kept < max_tokens). The
|
|
302
|
+
// API forbids a custom temperature while thinking is enabled, so it is
|
|
303
|
+
// omitted in that case (the required default of 1 applies).
|
|
304
|
+
const thinking = resolveThinking(options?.thinking, this.options.thinking);
|
|
305
|
+
const thinkingOn = thinking?.enabled === true;
|
|
306
|
+
const requestedMax = options?.maxTokens ?? 4096;
|
|
307
|
+
// Extended thinking requires budget_tokens >= 1024 AND < max_tokens, so when
|
|
308
|
+
// thinking is on we bump max_tokens to guarantee headroom for the answer.
|
|
309
|
+
const budget = thinkingOn ? anthropicThinkingBudget(thinking?.level, requestedMax) : 0;
|
|
310
|
+
const maxTokens = thinkingOn ? Math.max(requestedMax, budget + 1024) : requestedMax;
|
|
288
311
|
const body = {
|
|
289
312
|
model: this.options.model,
|
|
290
313
|
messages: this.convertMessages(nonSystemMessages),
|
|
291
|
-
max_tokens:
|
|
292
|
-
...(
|
|
314
|
+
max_tokens: maxTokens,
|
|
315
|
+
...(system && { system }),
|
|
293
316
|
...(anthropicTools?.length && { tools: anthropicTools }),
|
|
294
317
|
...(toolChoice && { tool_choice: toolChoice }),
|
|
295
318
|
...(stream && { stream: true }),
|
|
296
|
-
...(
|
|
319
|
+
...(thinkingOn
|
|
320
|
+
? { thinking: { type: 'enabled', budget_tokens: budget } }
|
|
321
|
+
: (options?.temperature !== undefined && { temperature: options.temperature })),
|
|
297
322
|
};
|
|
298
323
|
return body;
|
|
299
324
|
}
|
|
@@ -521,4 +546,3 @@ export class AnthropicClient extends BaseLLMClient {
|
|
|
521
546
|
.join('');
|
|
522
547
|
}
|
|
523
548
|
}
|
|
524
|
-
//# sourceMappingURL=anthropic.js.map
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* streaming, embeddings, and system prompt handling.
|
|
7
7
|
*/
|
|
8
8
|
import { BaseLLMClient } from '../client.js';
|
|
9
|
-
import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions } from '../interfaces.js';
|
|
9
|
+
import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions, DeepResearchOptions, DeepResearchResult, DeepResearchEvent } from '../interfaces.js';
|
|
10
10
|
import type { DecodedEvent } from '../stream-decoder.js';
|
|
11
11
|
import type { Auditor } from '../auditor.js';
|
|
12
12
|
export declare class GoogleClient extends BaseLLMClient {
|
|
@@ -20,6 +20,26 @@ export declare class GoogleClient extends BaseLLMClient {
|
|
|
20
20
|
private getHeaders;
|
|
21
21
|
chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
|
|
22
22
|
chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
|
|
23
|
+
/** Deep Research is available via Google AI Studio only (not Vertex AI). */
|
|
24
|
+
supportsDeepResearch(): boolean;
|
|
25
|
+
private interactionsBase;
|
|
26
|
+
private deepResearchHeaders;
|
|
27
|
+
private buildInteractionBody;
|
|
28
|
+
private toDeepResearchResult;
|
|
29
|
+
/** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
|
|
30
|
+
private drRequest;
|
|
31
|
+
/**
|
|
32
|
+
* Run an agentic Deep Research interaction: create it, then poll until it
|
|
33
|
+
* completes/fails or the timeout elapses. Returns the final report + steps.
|
|
34
|
+
*/
|
|
35
|
+
deepResearch(input: string, opts?: DeepResearchOptions): Promise<DeepResearchResult>;
|
|
36
|
+
/**
|
|
37
|
+
* Stream a Deep Research interaction's intermediate updates (`step.delta`
|
|
38
|
+
* thought/text/image events) and return the final result. Best-effort:
|
|
39
|
+
* falls back to the created interaction object if the stream ends early.
|
|
40
|
+
*/
|
|
41
|
+
deepResearchStream(input: string, opts?: DeepResearchOptions): AsyncGenerator<DeepResearchEvent, DeepResearchResult, unknown>;
|
|
42
|
+
private delay;
|
|
23
43
|
embed(text: string): Promise<number[]>;
|
|
24
44
|
getModels(): Promise<string[]>;
|
|
25
45
|
private buildRequestBody;
|
|
@@ -28,6 +48,7 @@ export declare class GoogleClient extends BaseLLMClient {
|
|
|
28
48
|
private convertContentToGoogleParts;
|
|
29
49
|
private convertToGoogleTool;
|
|
30
50
|
private convertFunctionCallToToolCall;
|
|
51
|
+
private parseToolArguments;
|
|
31
52
|
private parseGoogleResponse;
|
|
32
53
|
/**
|
|
33
54
|
* Retry HTTP requests for Flex tier when receiving 503/429 errors.
|
|
@@ -35,4 +56,3 @@ export declare class GoogleClient extends BaseLLMClient {
|
|
|
35
56
|
*/
|
|
36
57
|
private fetchWithFlexRetry;
|
|
37
58
|
}
|
|
38
|
-
//# sourceMappingURL=google.d.ts.map
|