universal-llm-client 4.3.0 → 4.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/CHANGELOG.md +34 -19
  2. package/README.md +62 -11
  3. package/dist/ai-model.d.ts +12 -2
  4. package/dist/ai-model.js +36 -2
  5. package/dist/auditor.d.ts +0 -1
  6. package/dist/auditor.js +0 -1
  7. package/dist/client.d.ts +0 -1
  8. package/dist/client.js +0 -1
  9. package/dist/gemma-channel.d.ts +13 -0
  10. package/dist/gemma-channel.js +37 -0
  11. package/dist/gemma-diffusion.d.ts +48 -0
  12. package/dist/gemma-diffusion.js +146 -0
  13. package/dist/http.d.ts +4 -1
  14. package/dist/http.js +14 -2
  15. package/dist/index.d.ts +2 -2
  16. package/dist/index.js +4 -1
  17. package/dist/interfaces.d.ts +163 -8
  18. package/dist/interfaces.js +0 -1
  19. package/dist/mcp.d.ts +0 -1
  20. package/dist/mcp.js +0 -1
  21. package/dist/providers/anthropic.d.ts +0 -1
  22. package/dist/providers/anthropic.js +28 -4
  23. package/dist/providers/google.d.ts +22 -2
  24. package/dist/providers/google.js +223 -14
  25. package/dist/providers/index.d.ts +0 -1
  26. package/dist/providers/index.js +0 -1
  27. package/dist/providers/ollama.d.ts +2 -1
  28. package/dist/providers/ollama.js +59 -31
  29. package/dist/providers/openai.d.ts +16 -1
  30. package/dist/providers/openai.js +488 -81
  31. package/dist/router.d.ts +2 -1
  32. package/dist/router.js +4 -1
  33. package/dist/stream-decoder.d.ts +12 -1
  34. package/dist/stream-decoder.js +182 -6
  35. package/dist/structured-output.d.ts +0 -1
  36. package/dist/structured-output.js +0 -1
  37. package/dist/thinking.d.ts +35 -0
  38. package/dist/thinking.js +51 -0
  39. package/dist/tools.d.ts +0 -1
  40. package/dist/tools.js +0 -1
  41. package/dist/zod-adapter.d.ts +0 -1
  42. package/dist/zod-adapter.js +0 -1
  43. package/package.json +3 -1
  44. package/dist/ai-model.d.ts.map +0 -1
  45. package/dist/ai-model.js.map +0 -1
  46. package/dist/auditor.d.ts.map +0 -1
  47. package/dist/auditor.js.map +0 -1
  48. package/dist/client.d.ts.map +0 -1
  49. package/dist/client.js.map +0 -1
  50. package/dist/http.d.ts.map +0 -1
  51. package/dist/http.js.map +0 -1
  52. package/dist/index.d.ts.map +0 -1
  53. package/dist/index.js.map +0 -1
  54. package/dist/interfaces.d.ts.map +0 -1
  55. package/dist/interfaces.js.map +0 -1
  56. package/dist/mcp.d.ts.map +0 -1
  57. package/dist/mcp.js.map +0 -1
  58. package/dist/providers/anthropic.d.ts.map +0 -1
  59. package/dist/providers/anthropic.js.map +0 -1
  60. package/dist/providers/google.d.ts.map +0 -1
  61. package/dist/providers/google.js.map +0 -1
  62. package/dist/providers/index.d.ts.map +0 -1
  63. package/dist/providers/index.js.map +0 -1
  64. package/dist/providers/ollama.d.ts.map +0 -1
  65. package/dist/providers/ollama.js.map +0 -1
  66. package/dist/providers/openai.d.ts.map +0 -1
  67. package/dist/providers/openai.js.map +0 -1
  68. package/dist/router.d.ts.map +0 -1
  69. package/dist/router.js.map +0 -1
  70. package/dist/stream-decoder.d.ts.map +0 -1
  71. package/dist/stream-decoder.js.map +0 -1
  72. package/dist/structured-output.d.ts.map +0 -1
  73. package/dist/structured-output.js.map +0 -1
  74. package/dist/tools.d.ts.map +0 -1
  75. package/dist/tools.js.map +0 -1
  76. package/dist/zod-adapter.d.ts.map +0 -1
  77. package/dist/zod-adapter.js.map +0 -1
package/dist/index.d.ts CHANGED
@@ -7,11 +7,11 @@
7
7
  * @module universal-llm-client
8
8
  */
9
9
  export { AIModel } from './ai-model.js';
10
- export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
10
+ export { AIModelApiType, AIModelType, type AIModelConfig, type ProviderConfig, type LLMClientOptions, type LLMChatMessage, type LLMMessageContent, type LLMContentPart, type LLMTextContent, type LLMImageContent, type LLMAudioContent, type LLMChatResponse, type TokenUsageInfo, type LLMToolCall, type LLMToolDefinition, type LLMFunction, type ToolHandler, type ToolExecutionResult, type ToolRegistry, type ToolRegistryEntry, type ChatOptions, type ResponseFormat, type OutputOptions, type ThinkingLevel, type DeepResearchOptions, type DeepResearchResult, type DeepResearchStep, type DeepResearchEvent, type ModelMetadata, textContent, imageContent, multimodalMessage, extractTextContent, hasImages, audioContent, hasAudio, } from './interfaces.js';
11
11
  export { type Auditor, type AuditEvent, type AuditEventType, NoopAuditor, ConsoleAuditor, BufferedAuditor, } from './auditor.js';
12
12
  export { type StreamDecoder, type DecodedEvent, type DecoderCallback, type DecoderType, type DecoderOptions, type DecoderFactory, createDecoder, registerDecoder, getRegisteredDecoders, PassthroughDecoder, StandardChatDecoder, InterleavedReasoningDecoder, } from './stream-decoder.js';
13
13
  export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } from './tools.js';
14
14
  export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, type HttpRequestOptions, type HttpResponse, } from './http.js';
15
+ export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, type GemmaDiffusionParsed, type GemmaParsedToolCall, } from './gemma-diffusion.js';
15
16
  export { MCPToolBridge, type MCPBridgeConfig, type MCPServerConfig, type MCPTool, } from './mcp.js';
16
17
  export { StructuredOutputError, type StructuredOutputErrorOptions, type StructuredOutputOptions, type StructuredOutputResult, type StructuredOutputSuccess, type StructuredOutputFailure, type JSONSchema, type SchemaProvider, type ProviderSchema, type SchemaConfig, isStructuredOutputSuccess, isStructuredOutputFailure, normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonSchema, getJsonSchemaFromConfig, parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences, StreamingJsonParser, type StreamingStructuredResult, } from './structured-output.js';
17
- //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -35,6 +35,10 @@ export { ToolBuilder, ToolExecutor, createTimeTool, createRandomNumberTool, } fr
35
35
  // ============================================================================
36
36
  export { httpRequest, httpStream, parseNDJSON, parseSSE, buildHeaders, } from './http.js';
37
37
  // ============================================================================
38
+ // DiffusionGemma Native Protocol (vLLM without server-side parsers)
39
+ // ============================================================================
40
+ export { isGemmaDiffusionModel, parseGemmaDiffusionOutput, gemmaArgsToJson, } from './gemma-diffusion.js';
41
+ // ============================================================================
38
42
  // MCP Integration
39
43
  // ============================================================================
40
44
  export { MCPToolBridge, } from './mcp.js';
@@ -48,4 +52,3 @@ normalizeJsonSchema, convertToProviderSchema, stripUnsupportedFeatures, getJsonS
48
52
  parseStructured, tryParseStructured, validateStructuredOutput, stripJsonFences,
49
53
  // Streaming parser
50
54
  StreamingJsonParser, } from './structured-output.js';
51
- //# sourceMappingURL=index.js.map
@@ -43,7 +43,45 @@ export interface ProviderConfig {
43
43
  region?: string;
44
44
  /** Google API version (default: "v1beta") */
45
45
  apiVersion?: 'v1' | 'v1beta';
46
+ /**
47
+ * Extra headers merged into requests, applied by providers that use
48
+ * `buildHeaders` — **OpenAI-compatible and Ollama**. Google/Vertex and
49
+ * Anthropic build their own auth headers and ignore this. Useful for Azure
50
+ * (api-key), custom gateways, or non-standard auth. Merged after the default
51
+ * auth header (later entries win).
52
+ */
53
+ headers?: Record<string, string>;
54
+ /**
55
+ * Extra query parameters appended to request URLs — **OpenAI-compatible
56
+ * provider only**. Useful for Azure OpenAI (e.g. { 'api-version': '2024-10-21' }).
57
+ */
58
+ queryParams?: Record<string, string>;
59
+ /**
60
+ * Override the name of the header that carries the API key (default:
61
+ * "Authorization") — **OpenAI-compatible and Ollama only** (via `buildHeaders`).
62
+ * Common alternative for Azure and some gateways: "api-key".
63
+ */
64
+ authHeader?: string;
65
+ /**
66
+ * Prefix placed before the apiKey value in the auth header (OpenAI-compatible
67
+ * and Ollama only). Default: "Bearer " when authHeader is Authorization (or
68
+ * unset), otherwise "". Set to "" explicitly for "api-key: <yourkey>" style auth.
69
+ */
70
+ authPrefix?: string;
71
+ /**
72
+ * For OpenAI-compatible providers only: the URL path segment to append after the base URL.
73
+ * Default: "/v1".
74
+ * Set to "" (or "/") to disable the automatic append. This is required when supplying
75
+ * a full Azure deployment URL such as ".../deployments/my-deploy".
76
+ */
77
+ apiBasePath?: string;
46
78
  }
79
+ /**
80
+ * Unified reasoning-effort level. Mapped to each provider's native control:
81
+ * Gemini 3.x `thinkingConfig.thinkingLevel`, OpenAI `reasoning_effort`,
82
+ * Gemini 2.5 `thinkingBudget`, Anthropic `budget_tokens`, vLLM/Ollama on/off.
83
+ */
84
+ export type ThinkingLevel = 'minimal' | 'low' | 'medium' | 'high';
47
85
  export interface AIModelConfig {
48
86
  /** Model name (used across all providers unless overridden) */
49
87
  model: string;
@@ -51,8 +89,8 @@ export interface AIModelConfig {
51
89
  providers: ProviderConfig[];
52
90
  /** Default parameters for all requests (temperature, top_p, etc.) */
53
91
  defaultParameters?: Record<string, unknown>;
54
- /** Enable thinking/reasoning mode */
55
- thinking?: boolean;
92
+ /** Enable thinking/reasoning `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
93
+ thinking?: boolean | ThinkingLevel;
56
94
  /** Request timeout in ms (default: 30000) */
57
95
  timeout?: number;
58
96
  /** Retries per provider before failover (default: 2) */
@@ -73,8 +111,8 @@ export interface LLMClientOptions {
73
111
  modelType?: AIModelType;
74
112
  /** Default parameters for requests */
75
113
  defaultParameters?: Record<string, unknown>;
76
- /** Enable thinking/reasoning mode */
77
- thinking?: boolean;
114
+ /** Enable thinking/reasoning `true`/`false` or a level ('minimal' | 'low' | 'medium' | 'high'). */
115
+ thinking?: boolean | ThinkingLevel;
78
116
  /** Request timeout in ms */
79
117
  timeout?: number;
80
118
  /** Number of retries for failed requests */
@@ -87,6 +125,31 @@ export interface LLMClientOptions {
87
125
  region?: string;
88
126
  /** Google API version */
89
127
  apiVersion?: 'v1' | 'v1beta';
128
+ /**
129
+ * Force the DiffusionGemma native channel protocol on/off for
130
+ * OpenAI-compatible backends (skip_special_tokens:false + client-side
131
+ * reasoning/tool-call parsing). Auto-detected from the model name when
132
+ * omitted. See gemma-diffusion.ts.
133
+ */
134
+ gemmaNativeProtocol?: boolean;
135
+ /**
136
+ * Extra headers merged for every request from this provider instance.
137
+ * Populated from ProviderConfig.headers for advanced auth / gateway scenarios
138
+ * (Azure api-key style, custom x- headers, etc.).
139
+ */
140
+ extraHeaders?: Record<string, string>;
141
+ /** Extra query parameters appended to request URLs (from ProviderConfig.queryParams). */
142
+ queryParams?: Record<string, string>;
143
+ /** Auth header name override (from ProviderConfig.authHeader). */
144
+ authHeader?: string;
145
+ /** Auth value prefix (from ProviderConfig.authPrefix). */
146
+ authPrefix?: string;
147
+ /**
148
+ * For openai-compatible clients: the sub-path to append (from ProviderConfig.apiBasePath).
149
+ * Defaults to "/v1"; `undefined` keeps that default. Set to "" or "/" to disable
150
+ * the append (when the base URL already contains the full path).
151
+ */
152
+ apiBasePath?: string;
90
153
  }
91
154
  export interface LLMTextContent {
92
155
  type: 'text';
@@ -235,6 +298,14 @@ export interface ChatOptions {
235
298
  temperature?: number;
236
299
  /** Max tokens to generate */
237
300
  maxTokens?: number;
301
+ /**
302
+ * Enable/disable/level model thinking for this request, overriding the
303
+ * model-level `thinking` config. `true`/`false` or a level
304
+ * ('minimal' | 'low' | 'medium' | 'high'). Mapped per provider: Gemini
305
+ * `thinkingLevel`/`thinkingBudget`, OpenAI `reasoning_effort`, vLLM
306
+ * `enable_thinking`, Anthropic `budget_tokens`, Ollama `think`.
307
+ */
308
+ thinking?: boolean | ThinkingLevel;
238
309
  /** Tool definitions (auto-populated from registry if not set) */
239
310
  tools?: LLMToolDefinition[];
240
311
  /** Tool choice mode */
@@ -245,7 +316,11 @@ export interface ChatOptions {
245
316
  signal?: AbortSignal;
246
317
  /** Enable/disable tool execution for chatWithTools */
247
318
  executeTools?: boolean;
248
- /** Enable prompt caching (Provider specific feature, opt-in for Anthropic) */
319
+ /**
320
+ * Enable provider-side prompt caching when supported.
321
+ * - Anthropic: Adds cache_control: { type: 'ephemeral' } to the system prompt block (most common high-impact pattern).
322
+ * - Other providers: May be passed through via parameters/headers or ignored; consult provider docs.
323
+ */
249
324
  enablePromptCaching?: boolean;
250
325
  /** Maximum tool execution rounds (default: 10) */
251
326
  maxIterations?: number;
@@ -332,9 +407,23 @@ export interface TokenUsageInfo {
332
407
  * via `DecodedEvent { type: 'thinking' }`); consult the provider.
333
408
  */
334
409
  reasoningTokens?: number;
410
+ /**
411
+ * Total request duration in milliseconds. Server-measured where the
412
+ * provider reports it (Ollama `total_duration`); otherwise client-measured
413
+ * wall-clock (OpenAI-compatible / vLLM return no timing in `usage`).
414
+ */
415
+ durationMs?: number;
416
+ /**
417
+ * Decode throughput in output tokens/second. Server-precise for Ollama
418
+ * (`eval_count / eval_duration`); derived from `outputTokens / durationMs`
419
+ * for providers without server-side timing (OpenAI-compatible / vLLM).
420
+ */
421
+ tokensPerSecond?: number;
335
422
  }
336
423
  export interface LLMChatResponse<T = unknown> {
337
424
  message: LLMChatMessage;
425
+ /** Provider finish reason when available (e.g. Ollama done_reason, Google finishReason) */
426
+ finishReason?: string;
338
427
  /** Reasoning/thinking content from the model (if supported) */
339
428
  reasoning?: string;
340
429
  /** Token usage info */
@@ -375,9 +464,16 @@ export interface OllamaResponse {
375
464
  tool_calls?: LLMToolCall[];
376
465
  };
377
466
  done: boolean;
467
+ done_reason?: string;
468
+ /** Total request time in nanoseconds. */
469
+ total_duration?: number;
470
+ /** Model load time in nanoseconds. */
471
+ load_duration?: number;
378
472
  prompt_eval_count?: number;
379
473
  eval_count?: number;
474
+ /** Prompt evaluation time in nanoseconds. */
380
475
  prompt_eval_duration?: number;
476
+ /** Generation time in nanoseconds. */
381
477
  eval_duration?: number;
382
478
  }
383
479
  export interface OpenAIResponse {
@@ -390,6 +486,13 @@ export interface OpenAIResponse {
390
486
  message: {
391
487
  role: string;
392
488
  content: string | null;
489
+ /**
490
+ * Chain-of-thought from reasoning models exposed via a dedicated
491
+ * field (vLLM `--reasoning-parser`, DeepSeek-R1, etc.). vLLM uses
492
+ * `reasoning_content`; some gateways use `reasoning`.
493
+ */
494
+ reasoning?: string;
495
+ reasoning_content?: string;
393
496
  tool_calls?: LLMToolCall[];
394
497
  };
395
498
  finish_reason: string;
@@ -426,8 +529,8 @@ export interface OpenAIModelInfo {
426
529
  export interface GooglePart {
427
530
  text?: string;
428
531
  functionCall?: {
429
- name: string;
430
- args: Record<string, unknown>;
532
+ name?: string;
533
+ args?: Record<string, unknown>;
431
534
  };
432
535
  functionResponse?: {
433
536
  name: string;
@@ -437,6 +540,8 @@ export interface GooglePart {
437
540
  mimeType: string;
438
541
  data: string;
439
542
  };
543
+ /** True when this part is a reasoning summary (requires `includeThoughts`). */
544
+ thought?: boolean;
440
545
  /** Gemini 3.x thought signature — must be echoed back on functionCall parts */
441
546
  thoughtSignature?: string;
442
547
  }
@@ -507,6 +612,57 @@ export interface GoogleResponse {
507
612
  thoughtsTokenCount?: number;
508
613
  };
509
614
  }
615
+ /** Options for an agentic Deep Research interaction (Gemini-only). */
616
+ export interface DeepResearchOptions {
617
+ /** Research agent id (default 'deep-research-preview-04-2026'). */
618
+ agent?: string;
619
+ /** Tools the agent may use, e.g. 'google_search', 'url_context', 'code_execution'. */
620
+ tools?: string[];
621
+ /** Emit intermediate reasoning ('auto') or not ('none'). Default 'auto'. */
622
+ thinkingSummaries?: 'auto' | 'none';
623
+ /** Continue a prior interaction (follow-up question). */
624
+ previousInteractionId?: string;
625
+ /** Poll interval in ms while awaiting completion (default 5000). */
626
+ pollIntervalMs?: number;
627
+ /** Overall timeout in ms before giving up the poll loop (default 600000). */
628
+ timeoutMs?: number;
629
+ /** Abort signal forwarded to every request. */
630
+ signal?: AbortSignal;
631
+ }
632
+ /** One intermediate step in a Deep Research interaction. */
633
+ export interface DeepResearchStep {
634
+ type?: string;
635
+ content?: Array<{
636
+ text?: string;
637
+ [k: string]: unknown;
638
+ }>;
639
+ [k: string]: unknown;
640
+ }
641
+ /** Terminal (or last-polled) state of a Deep Research interaction. */
642
+ export interface DeepResearchResult {
643
+ id: string;
644
+ status: 'in_progress' | 'completed' | 'failed' | string;
645
+ /** Final research report (`output_text`) when completed. */
646
+ report?: string;
647
+ steps?: DeepResearchStep[];
648
+ error?: unknown;
649
+ /** The raw last interaction object from the API. */
650
+ raw?: unknown;
651
+ }
652
+ /** Streaming Deep Research event (from `step.delta` updates). */
653
+ export type DeepResearchEvent = {
654
+ type: 'thought';
655
+ content: string;
656
+ } | {
657
+ type: 'text';
658
+ content: string;
659
+ } | {
660
+ type: 'image';
661
+ content: unknown;
662
+ } | {
663
+ type: 'status';
664
+ status: string;
665
+ };
510
666
  /** Create a text content part */
511
667
  export declare function textContent(text: string): LLMTextContent;
512
668
  /** Create an image content part from base64 data or URL */
@@ -521,4 +677,3 @@ export declare function hasImages(content: LLMMessageContent): boolean;
521
677
  export declare function audioContent(base64Data: string, mimeType: string): LLMAudioContent;
522
678
  /** Check if message content contains audio */
523
679
  export declare function hasAudio(content: LLMMessageContent): boolean;
524
- //# sourceMappingURL=interfaces.d.ts.map
@@ -74,4 +74,3 @@ export function hasAudio(content) {
74
74
  return false;
75
75
  return content.some(part => part.type === 'audio');
76
76
  }
77
- //# sourceMappingURL=interfaces.js.map
package/dist/mcp.d.ts CHANGED
@@ -82,4 +82,3 @@ export declare class MCPToolBridge {
82
82
  private createToolHandler;
83
83
  private convertInputSchema;
84
84
  }
85
- //# sourceMappingURL=mcp.d.ts.map
package/dist/mcp.js CHANGED
@@ -252,4 +252,3 @@ export class MCPToolBridge {
252
252
  };
253
253
  }
254
254
  }
255
- //# sourceMappingURL=mcp.js.map
@@ -53,4 +53,3 @@ export declare class AnthropicClient extends BaseLLMClient {
53
53
  /** Extract text from multimodal content */
54
54
  private extractText;
55
55
  }
56
- //# sourceMappingURL=anthropic.d.ts.map
@@ -14,6 +14,7 @@
14
14
  * - Streaming: content_block_start/delta/stop events with typed deltas
15
15
  */
16
16
  import { BaseLLMClient } from '../client.js';
17
+ import { resolveThinking, anthropicThinkingBudget } from '../thinking.js';
17
18
  import { httpRequest, httpStream, parseSSE } from '../http.js';
18
19
  import { StandardChatDecoder } from '../stream-decoder.js';
19
20
  // ============================================================================
@@ -271,6 +272,17 @@ export class AnthropicClient extends BaseLLMClient {
271
272
  .map(m => typeof m.content === 'string' ? m.content : this.extractText(m.content))
272
273
  .join('\n\n')
273
274
  : undefined;
275
+ // Prompt caching support (Anthropic-specific, high impact for long system prompts / RAG)
276
+ let system = systemPrompt;
277
+ if (options?.enablePromptCaching && systemPrompt) {
278
+ system = [
279
+ {
280
+ type: 'text',
281
+ text: systemPrompt,
282
+ cache_control: { type: 'ephemeral' },
283
+ },
284
+ ];
285
+ }
274
286
  // Convert tools from OpenAI format to Anthropic format
275
287
  const tools = options?.tools ?? (Object.keys(this.toolRegistry).length > 0 ? this.getToolDefinitions() : undefined);
276
288
  const anthropicTools = tools?.map(t => this.convertToolDef(t));
@@ -285,15 +297,28 @@ export class AnthropicClient extends BaseLLMClient {
285
297
  else if (options?.toolChoice === 'auto') {
286
298
  toolChoice = { type: 'auto' };
287
299
  }
300
+ // Unified thinking flag → Anthropic extended thinking. Per-call overrides
301
+ // model config; the level sets `budget_tokens` (kept < max_tokens). The
302
+ // API forbids a custom temperature while thinking is enabled, so it is
303
+ // omitted in that case (the required default of 1 applies).
304
+ const thinking = resolveThinking(options?.thinking, this.options.thinking);
305
+ const thinkingOn = thinking?.enabled === true;
306
+ const requestedMax = options?.maxTokens ?? 4096;
307
+ // Extended thinking requires budget_tokens >= 1024 AND < max_tokens, so when
308
+ // thinking is on we bump max_tokens to guarantee headroom for the answer.
309
+ const budget = thinkingOn ? anthropicThinkingBudget(thinking?.level, requestedMax) : 0;
310
+ const maxTokens = thinkingOn ? Math.max(requestedMax, budget + 1024) : requestedMax;
288
311
  const body = {
289
312
  model: this.options.model,
290
313
  messages: this.convertMessages(nonSystemMessages),
291
- max_tokens: options?.maxTokens ?? 4096,
292
- ...(systemPrompt && { system: systemPrompt }),
314
+ max_tokens: maxTokens,
315
+ ...(system && { system }),
293
316
  ...(anthropicTools?.length && { tools: anthropicTools }),
294
317
  ...(toolChoice && { tool_choice: toolChoice }),
295
318
  ...(stream && { stream: true }),
296
- ...(options?.temperature !== undefined && { temperature: options.temperature }),
319
+ ...(thinkingOn
320
+ ? { thinking: { type: 'enabled', budget_tokens: budget } }
321
+ : (options?.temperature !== undefined && { temperature: options.temperature })),
297
322
  };
298
323
  return body;
299
324
  }
@@ -521,4 +546,3 @@ export class AnthropicClient extends BaseLLMClient {
521
546
  .join('');
522
547
  }
523
548
  }
524
- //# sourceMappingURL=anthropic.js.map
@@ -6,7 +6,7 @@
6
6
  * streaming, embeddings, and system prompt handling.
7
7
  */
8
8
  import { BaseLLMClient } from '../client.js';
9
- import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions } from '../interfaces.js';
9
+ import type { LLMClientOptions, LLMChatMessage, LLMChatResponse, ChatOptions, DeepResearchOptions, DeepResearchResult, DeepResearchEvent } from '../interfaces.js';
10
10
  import type { DecodedEvent } from '../stream-decoder.js';
11
11
  import type { Auditor } from '../auditor.js';
12
12
  export declare class GoogleClient extends BaseLLMClient {
@@ -20,6 +20,26 @@ export declare class GoogleClient extends BaseLLMClient {
20
20
  private getHeaders;
21
21
  chat(messages: LLMChatMessage[], options?: ChatOptions): Promise<LLMChatResponse>;
22
22
  chatStream(messages: LLMChatMessage[], options?: ChatOptions): AsyncGenerator<DecodedEvent, LLMChatResponse | void, unknown>;
23
+ /** Deep Research is available via Google AI Studio only (not Vertex AI). */
24
+ supportsDeepResearch(): boolean;
25
+ private interactionsBase;
26
+ private deepResearchHeaders;
27
+ private buildInteractionBody;
28
+ private toDeepResearchResult;
29
+ /** httpRequest with small backoff retries — the preview interactions API is flaky (503s). */
30
+ private drRequest;
31
+ /**
32
+ * Run an agentic Deep Research interaction: create it, then poll until it
33
+ * completes/fails or the timeout elapses. Returns the final report + steps.
34
+ */
35
+ deepResearch(input: string, opts?: DeepResearchOptions): Promise<DeepResearchResult>;
36
+ /**
37
+ * Stream a Deep Research interaction's intermediate updates (`step.delta`
38
+ * thought/text/image events) and return the final result. Best-effort:
39
+ * falls back to the created interaction object if the stream ends early.
40
+ */
41
+ deepResearchStream(input: string, opts?: DeepResearchOptions): AsyncGenerator<DeepResearchEvent, DeepResearchResult, unknown>;
42
+ private delay;
23
43
  embed(text: string): Promise<number[]>;
24
44
  getModels(): Promise<string[]>;
25
45
  private buildRequestBody;
@@ -28,6 +48,7 @@ export declare class GoogleClient extends BaseLLMClient {
28
48
  private convertContentToGoogleParts;
29
49
  private convertToGoogleTool;
30
50
  private convertFunctionCallToToolCall;
51
+ private parseToolArguments;
31
52
  private parseGoogleResponse;
32
53
  /**
33
54
  * Retry HTTP requests for Flex tier when receiving 503/429 errors.
@@ -35,4 +56,3 @@ export declare class GoogleClient extends BaseLLMClient {
35
56
  */
36
57
  private fetchWithFlexRetry;
37
58
  }
38
- //# sourceMappingURL=google.d.ts.map