@llumiverse/drivers 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/lib/cjs/bedrock/index.js +49 -62
  2. package/lib/cjs/bedrock/index.js.map +1 -1
  3. package/lib/cjs/groq/index.js +7 -5
  4. package/lib/cjs/groq/index.js.map +1 -1
  5. package/lib/cjs/huggingface_ie.js +4 -4
  6. package/lib/cjs/huggingface_ie.js.map +1 -1
  7. package/lib/cjs/mistral/index.js +5 -5
  8. package/lib/cjs/mistral/index.js.map +1 -1
  9. package/lib/cjs/openai/index.js +35 -8
  10. package/lib/cjs/openai/index.js.map +1 -1
  11. package/lib/cjs/replicate.js +4 -4
  12. package/lib/cjs/replicate.js.map +1 -1
  13. package/lib/cjs/shared/claude-thinking.js +60 -0
  14. package/lib/cjs/shared/claude-thinking.js.map +1 -0
  15. package/lib/cjs/togetherai/index.js +4 -4
  16. package/lib/cjs/togetherai/index.js.map +1 -1
  17. package/lib/cjs/vertexai/models/claude.js +18 -16
  18. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  19. package/lib/cjs/vertexai/models/gemini.js +58 -10
  20. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  21. package/lib/cjs/vertexai/models/imagen.js +2 -2
  22. package/lib/cjs/vertexai/models/imagen.js.map +1 -1
  23. package/lib/cjs/watsonx/index.js +4 -4
  24. package/lib/cjs/watsonx/index.js.map +1 -1
  25. package/lib/esm/bedrock/index.js +49 -62
  26. package/lib/esm/bedrock/index.js.map +1 -1
  27. package/lib/esm/groq/index.js +7 -5
  28. package/lib/esm/groq/index.js.map +1 -1
  29. package/lib/esm/huggingface_ie.js +5 -5
  30. package/lib/esm/huggingface_ie.js.map +1 -1
  31. package/lib/esm/mistral/index.js +5 -5
  32. package/lib/esm/mistral/index.js.map +1 -1
  33. package/lib/esm/openai/index.js +36 -9
  34. package/lib/esm/openai/index.js.map +1 -1
  35. package/lib/esm/replicate.js +4 -4
  36. package/lib/esm/replicate.js.map +1 -1
  37. package/lib/esm/shared/claude-thinking.js +57 -0
  38. package/lib/esm/shared/claude-thinking.js.map +1 -0
  39. package/lib/esm/togetherai/index.js +4 -4
  40. package/lib/esm/togetherai/index.js.map +1 -1
  41. package/lib/esm/vertexai/models/claude.js +19 -17
  42. package/lib/esm/vertexai/models/claude.js.map +1 -1
  43. package/lib/esm/vertexai/models/gemini.js +58 -10
  44. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  45. package/lib/esm/vertexai/models/imagen.js +2 -2
  46. package/lib/esm/vertexai/models/imagen.js.map +1 -1
  47. package/lib/esm/watsonx/index.js +4 -4
  48. package/lib/esm/watsonx/index.js.map +1 -1
  49. package/lib/types/bedrock/index.d.ts +6 -6
  50. package/lib/types/bedrock/index.d.ts.map +1 -1
  51. package/lib/types/groq/index.d.ts +1 -1
  52. package/lib/types/groq/index.d.ts.map +1 -1
  53. package/lib/types/huggingface_ie.d.ts +1 -1
  54. package/lib/types/huggingface_ie.d.ts.map +1 -1
  55. package/lib/types/mistral/index.d.ts +2 -2
  56. package/lib/types/mistral/index.d.ts.map +1 -1
  57. package/lib/types/openai/index.d.ts +1 -1
  58. package/lib/types/openai/index.d.ts.map +1 -1
  59. package/lib/types/replicate.d.ts +1 -1
  60. package/lib/types/replicate.d.ts.map +1 -1
  61. package/lib/types/shared/claude-thinking.d.ts +36 -0
  62. package/lib/types/shared/claude-thinking.d.ts.map +1 -0
  63. package/lib/types/togetherai/index.d.ts +1 -1
  64. package/lib/types/togetherai/index.d.ts.map +1 -1
  65. package/lib/types/vertexai/models/claude.d.ts +4 -4
  66. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  67. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  68. package/lib/types/watsonx/index.d.ts +1 -1
  69. package/lib/types/watsonx/index.d.ts.map +1 -1
  70. package/package.json +11 -11
  71. package/src/bedrock/index.ts +75 -87
  72. package/src/groq/index.ts +9 -8
  73. package/src/huggingface_ie.ts +5 -5
  74. package/src/mistral/index.ts +6 -6
  75. package/src/openai/index.ts +46 -16
  76. package/src/replicate.ts +5 -5
  77. package/src/shared/claude-thinking.ts +88 -0
  78. package/src/togetherai/index.ts +5 -5
  79. package/src/vertexai/models/claude.ts +32 -27
  80. package/src/vertexai/models/gemini.ts +57 -11
  81. package/src/vertexai/models/imagen.ts +2 -2
  82. package/src/watsonx/index.ts +5 -5
@@ -1,6 +1,6 @@
1
1
  import {
2
- AIModel,
3
2
  AbstractDriver,
3
+ AIModel,
4
4
  Completion,
5
5
  CompletionChunkObject,
6
6
  CompletionResult,
@@ -10,26 +10,26 @@ import {
10
10
  EmbeddingsResult,
11
11
  ExecutionOptions,
12
12
  ExecutionTokenUsage,
13
+ getConversationMeta,
14
+ getModelCapabilities,
15
+ incrementConversationTurn,
13
16
  JSONSchema,
14
17
  LlumiverseError,
15
18
  LlumiverseErrorContext,
19
+ modelModalitiesToArray,
16
20
  ModelType,
17
21
  OpenAiDalleOptions,
18
22
  OpenAiGptImageOptions,
19
23
  Providers,
24
+ stripBase64ImagesFromConversation,
25
+ stripHeartbeatsFromConversation,
26
+ supportsToolUse,
20
27
  ToolDefinition,
21
28
  ToolUse,
22
29
  TrainingJob,
23
30
  TrainingJobStatus,
24
31
  TrainingOptions,
25
32
  TrainingPromptOptions,
26
- getConversationMeta,
27
- getModelCapabilities,
28
- incrementConversationTurn,
29
- modelModalitiesToArray,
30
- stripBase64ImagesFromConversation,
31
- stripHeartbeatsFromConversation,
32
- supportsToolUse,
33
33
  truncateLargeTextInConversation,
34
34
  unwrapConversationArray,
35
35
  } from "@llumiverse/core";
@@ -61,6 +61,29 @@ function textToCompletionResult(text: string): CompletionResult[] {
61
61
  return text ? [{ type: "text", value: text }] : [];
62
62
  }
63
63
 
64
+ function isOpenAIReasoningModel(model: string): boolean {
65
+ const normalized = model.toLowerCase();
66
+ return normalized.includes("o1")
67
+ || normalized.includes("o3")
68
+ || normalized.includes("o4")
69
+ || normalized.includes("gpt-5");
70
+ }
71
+
72
+ function isGpt5ProModel(model: string): boolean {
73
+ const modelName = model.toLowerCase().split('/').pop() ?? model.toLowerCase();
74
+ return /^gpt-5(?:\.\d+)?-pro/.test(modelName);
75
+ }
76
+
77
+ function openAIReasoningEffort(model: string, effort: string | undefined): "low" | "medium" | "high" | undefined {
78
+ if (!effort || !isOpenAIReasoningModel(model)) {
79
+ return undefined;
80
+ }
81
+ if (isGpt5ProModel(model)) {
82
+ return "high";
83
+ }
84
+ return effort === "low" || effort === "medium" || effort === "high" ? effort : undefined;
85
+ }
86
+
64
87
  //TODO: Do we need a list?, replace with if statements and modernize?
65
88
  const supportFineTunning = new Set([
66
89
  "gpt-3.5-turbo-1106",
@@ -109,8 +132,11 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
109
132
  }
110
133
 
111
134
  async requestTextCompletionStream(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
112
- if (options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking") {
113
- this.logger.warn({ options: options.model_options }, "Invalid model options");
135
+ if (options.model_options?._option_id !== undefined &&
136
+ options.model_options?._option_id !== "openai-text" &&
137
+ options.model_options?._option_id !== "openai-thinking" &&
138
+ options.model_options?._option_id !== "text-fallback") {
139
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
114
140
  }
115
141
 
116
142
  // Include conversation history (same as non-streaming)
@@ -144,8 +170,9 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
144
170
  }
145
171
  }
146
172
 
147
- const reasoning = model_options?.reasoning_effort ? { effort: model_options.reasoning_effort } : undefined;
148
- const isReasoningModel = /\b(o1|o3|o4)\b/.test(options.model);
173
+ const isReasoningModel = isOpenAIReasoningModel(options.model);
174
+ const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort);
175
+ const reasoning = effort ? { effort } : undefined;
149
176
 
150
177
  const stream = await this.service.responses.create({
151
178
  stream: true,
@@ -170,8 +197,10 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
170
197
  }
171
198
 
172
199
  async requestTextCompletion(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<Completion> {
173
- if (options.model_options?._option_id !== "openai-text" && options.model_options?._option_id !== "openai-thinking") {
174
- this.logger.warn({ options: options.model_options }, "Invalid model options");
200
+ if (options.model_options?._option_id !== undefined &&
201
+ options.model_options?._option_id !== "openai-text" &&
202
+ options.model_options?._option_id !== "openai-thinking") {
203
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
175
204
  }
176
205
 
177
206
  convertRoles(prompt, options.model);
@@ -204,8 +233,9 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
204
233
  }
205
234
  }
206
235
 
207
- const reasoning = model_options?.reasoning_effort ? { effort: model_options.reasoning_effort } : undefined;
208
- const isReasoningModel = /\b(o1|o3|o4)\b/.test(options.model);
236
+ const isReasoningModel = isOpenAIReasoningModel(options.model);
237
+ const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort);
238
+ const reasoning = effort ? { effort } : undefined;
209
239
 
210
240
  const res = await this.service.responses.create({
211
241
  stream: false,
package/src/replicate.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import {
2
- AIModel,
3
2
  AbstractDriver,
3
+ AIModel,
4
4
  Completion,
5
5
  CompletionChunkObject,
6
6
  DataSource,
@@ -65,8 +65,8 @@ export class ReplicateDriver extends AbstractDriver<DriverOptions, string> {
65
65
  }
66
66
 
67
67
  async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
68
- if (options.model_options?._option_id !== "text-fallback") {
69
- this.logger.warn({ options: options.model_options }, "Invalid model options");
68
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
69
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
70
70
  }
71
71
  options.model_options = options.model_options as TextFallbackOptions;
72
72
 
@@ -110,8 +110,8 @@ export class ReplicateDriver extends AbstractDriver<DriverOptions, string> {
110
110
  }
111
111
 
112
112
  async requestTextCompletion(prompt: string, options: ExecutionOptions) {
113
- if (options.model_options?._option_id !== "text-fallback") {
114
- this.logger.warn({ options: options.model_options }, "Invalid model options");
113
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
114
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
115
115
  }
116
116
  options.model_options = options.model_options as TextFallbackOptions;
117
117
  const model = ReplicateDriver.parseModelId(options.model);
@@ -0,0 +1,88 @@
1
+ import type { OutputConfig, ThinkingConfigParam } from "@anthropic-ai/sdk/resources/messages.js";
2
+ import {
3
+ hasSamplingParameterRestriction,
4
+ isClaudeVersionGTE,
5
+ supportsAdaptiveThinking,
6
+ } from "@llumiverse/core";
7
+
8
+ /**
9
+ * Common Claude model options relevant to thinking/effort configuration.
10
+ * Works with both VertexAIClaudeOptions and BedrockClaudeOptions.
11
+ */
12
+ export interface ClaudeThinkingInput {
13
+ thinking_budget_tokens?: number;
14
+ effort?: NonNullable<OutputConfig['effort']>;
15
+ /** Controls whether thinking content is included in the response. Does not enable thinking. */
16
+ include_thoughts?: boolean;
17
+ }
18
+
19
+ /**
20
+ * Result of resolving Claude thinking and effort configuration.
21
+ */
22
+ export interface ClaudeThinkingResult {
23
+ /** Thinking/reasoning config to include in the API payload. */
24
+ thinking: ThinkingConfigParam | undefined;
25
+ /** Output config (effort) to include in the API payload, if applicable. */
26
+ outputConfig: OutputConfig | undefined;
27
+ /** Whether sampling parameters (temperature, top_p, top_k) should be stripped. */
28
+ hasSamplingRestriction: boolean;
29
+ /** Whether the model supports thinking at all (>= Claude 3.7). */
30
+ supportsThinking: boolean;
31
+ }
32
+
33
+ /**
34
+ * Resolve thinking and effort configuration for a Claude model.
35
+ *
36
+ * - Extended thinking: enabled by setting `thinking_budget_tokens`.
37
+ * - Adaptive thinking: enabled by setting `effort` on models that support it (Opus 4.6+, Sonnet 4.6+).
38
+ * - `include_thoughts`: display-only; does not enable thinking.
39
+ *
40
+ * @param model - The model identifier string
41
+ * @param options - User-provided Claude options (thinking_budget_tokens, effort, include_thoughts)
42
+ */
43
+ export function resolveClaudeThinking(model: string, options?: ClaudeThinkingInput): ClaudeThinkingResult {
44
+ const supportsAdaptive = supportsAdaptiveThinking(model);
45
+ const samplingRestriction = hasSamplingParameterRestriction(model);
46
+ const supportsThinking = isClaudeVersionGTE(model, 3, 7);
47
+ const budgetTokens = options?.thinking_budget_tokens;
48
+ // Adaptive thinking is active when the caller supplies an effort level on a
49
+ // model that supports it. Extended thinking is active when a budget is set.
50
+ const adaptiveEnabled = supportsAdaptive && options?.effort != null;
51
+ const extendedEnabled = budgetTokens != null;
52
+
53
+ let thinking: ThinkingConfigParam | undefined;
54
+
55
+ if (!supportsThinking) {
56
+ // Pre-3.7 models: no thinking support
57
+ thinking = undefined;
58
+ } else if (extendedEnabled) {
59
+ // Explicit budget — use extended thinking regardless of adaptive support.
60
+ // On adaptive models this uses the deprecated path, but user input takes priority.
61
+ thinking = {
62
+ type: "enabled" as const,
63
+ budget_tokens: budgetTokens,
64
+ };
65
+ } else if (supportsAdaptive) {
66
+ // Adaptive models: enable when effort is set, omit otherwise (thinking is OFF by default).
67
+ // display controls whether thinking blocks are returned; defaults to omitted.
68
+ thinking = adaptiveEnabled
69
+ ? { type: "adaptive" as const, display: options?.include_thoughts ? "summarized" : "omitted" }
70
+ : undefined;
71
+ } else {
72
+ // Older thinking models (3.7, 4.5): no adaptive support, thinking is always disabled
73
+ // unless an explicit budget is provided (handled above).
74
+ thinking = { type: "disabled" as const };
75
+ }
76
+
77
+ // Output config for effort parameter (Opus 4.5+, Sonnet 4.6+, all 4.7+)
78
+ const outputConfig: OutputConfig | undefined = options?.effort
79
+ ? { effort: options.effort }
80
+ : undefined;
81
+
82
+ return {
83
+ thinking,
84
+ outputConfig,
85
+ hasSamplingRestriction: samplingRestriction,
86
+ supportsThinking,
87
+ };
88
+ }
@@ -1,4 +1,4 @@
1
- import { AIModel, AbstractDriver, Completion, CompletionChunkObject, DriverOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
1
+ import { AbstractDriver, AIModel, Completion, CompletionChunkObject, DriverOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
2
2
  import { transformSSEStream } from "@llumiverse/core/async";
3
3
  import { FetchClient } from "@vertesia/api-fetch-client";
4
4
  import { TextCompletion, TogetherModelInfo } from "./interfaces.js";
@@ -30,8 +30,8 @@ export class TogetherAIDriver extends AbstractDriver<TogetherAIDriverOptions, st
30
30
  }
31
31
 
32
32
  async requestTextCompletion(prompt: string, options: ExecutionOptions): Promise<Completion> {
33
- if (options.model_options?._option_id !== "text-fallback") {
34
- this.logger.warn({ options: options.model_options }, "Invalid model options");
33
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
34
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
35
35
  }
36
36
  options.model_options = options.model_options as TextFallbackOptions;
37
37
 
@@ -72,8 +72,8 @@ export class TogetherAIDriver extends AbstractDriver<TogetherAIDriverOptions, st
72
72
  }
73
73
 
74
74
  async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
75
- if (options.model_options?._option_id !== "text-fallback") {
76
- this.logger.warn({ options: options.model_options }, "Invalid model options");
75
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
76
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
77
77
  }
78
78
  options.model_options = options.model_options as TextFallbackOptions;
79
79
 
@@ -11,27 +11,28 @@ import {
11
11
  RateLimitError,
12
12
  UnprocessableEntityError,
13
13
  } from '@anthropic-ai/sdk/error';
14
- import { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from "@anthropic-ai/sdk/resources/index.js";
15
- import { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
16
- import { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
14
+ import type { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from "@anthropic-ai/sdk/resources/index.js";
15
+ import type { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
16
+ import type { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
17
17
  import {
18
- AIModel, Completion, CompletionChunkObject, ExecutionOptions, ExecutionTokenUsage,
18
+ type AIModel, type Completion, type CompletionChunkObject, type ExecutionOptions, type ExecutionTokenUsage,
19
19
  getConversationMeta,
20
20
  getMaxTokensLimitVertexAi,
21
21
  incrementConversationTurn,
22
- JSONObject,
23
- LlumiverseError, LlumiverseErrorContext,
22
+ type JSONObject,
23
+ LlumiverseError, type LlumiverseErrorContext,
24
24
  ModelType,
25
- PromptRole, PromptSegment, readStreamAsBase64, readStreamAsString, StatelessExecutionOptions,
25
+ PromptRole, type PromptSegment, readStreamAsBase64, readStreamAsString, type StatelessExecutionOptions,
26
26
  stripBase64ImagesFromConversation,
27
27
  stripHeartbeatsFromConversation,
28
- ToolUse,
28
+ type ToolUse,
29
29
  truncateLargeTextInConversation,
30
- VertexAIClaudeOptions
30
+ type VertexAIClaudeOptions,
31
31
  } from "@llumiverse/core";
32
32
  import { asyncMap } from "@llumiverse/core/async";
33
- import { VertexAIDriver } from "../index.js";
34
- import { ModelDefinition } from "../models.js";
33
+ import { resolveClaudeThinking } from "../../shared/claude-thinking.js";
34
+ import type { VertexAIDriver } from "../index.js";
35
+ import type { ModelDefinition } from "../models.js";
35
36
 
36
37
  export const ANTHROPIC_REGIONS: Record<string, string> = {
37
38
  us: "us-east5",
@@ -313,10 +314,13 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
313
314
  options = { ...options, model: modelName };
314
315
 
315
316
  const client = await driver.getAnthropicClient(region);
316
- options.model_options = options.model_options as VertexAIClaudeOptions;
317
+ const model_options = options.model_options as VertexAIClaudeOptions | undefined;
317
318
 
318
- if (options.model_options?._option_id !== "vertexai-claude") {
319
- driver.logger.warn({ options: options.model_options }, "Invalid model options");
319
+ if (model_options?._option_id !== undefined &&
320
+ model_options?._option_id !== "vertexai-claude" &&
321
+ model_options?._option_id !== "text-fallback"
322
+ ) {
323
+ driver.logger.debug({ options: options.model_options }, "Unexpected option id");
320
324
  }
321
325
 
322
326
  let conversation = updateConversation(options.conversation as ClaudePrompt, prompt);
@@ -328,7 +332,7 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
328
332
  const result = await client.messages.create(nonStreamingPayload, requestOptions) satisfies Message;
329
333
 
330
334
  // Use the new function to collect text content, including thinking if enabled
331
- const includeThoughts = options.model_options?.include_thoughts ?? false;
335
+ const includeThoughts = model_options?.include_thoughts ?? false;
332
336
  const text = collectAllTextContent(result.content, includeThoughts);
333
337
  const tool_use = collectTools(result.content);
334
338
 
@@ -371,8 +375,11 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
371
375
  const client = await driver.getAnthropicClient(region);
372
376
  const model_options = options.model_options as VertexAIClaudeOptions | undefined;
373
377
 
374
- if (model_options?._option_id !== "vertexai-claude") {
375
- driver.logger.warn({ options: options.model_options }, "Invalid model options");
378
+ if ((model_options?._option_id !== undefined &&
379
+ model_options?._option_id !== "vertexai-claude" &&
380
+ model_options?._option_id !== "text-fallback")
381
+ ) {
382
+ driver.logger.debug({ options: options.model_options }, "Unexpected option id");
376
383
  }
377
384
 
378
385
  // Include conversation history (same as non-streaming)
@@ -979,23 +986,21 @@ function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { pa
979
986
  }
980
987
  }
981
988
 
989
+ // Resolve thinking, effort, and sampling restriction using shared Claude helper
990
+ const { thinking, outputConfig, hasSamplingRestriction } = resolveClaudeThinking(modelName, model_options);
991
+
982
992
  const payload = {
983
993
  messages: sanitizedMessages,
984
994
  system: sanitizedSystem,
985
995
  tools: sanitizedTools,
986
- temperature: model_options?.temperature,
996
+ temperature: hasSamplingRestriction ? undefined : model_options?.temperature,
987
997
  model: modelName,
988
998
  max_tokens: maxToken(options),
989
- top_p: model_options?.temperature != null ? undefined : model_options?.top_p,
990
- top_k: model_options?.top_k,
999
+ top_p: hasSamplingRestriction ? undefined : (model_options?.temperature != null ? undefined : model_options?.top_p),
1000
+ top_k: hasSamplingRestriction ? undefined : model_options?.top_k,
991
1001
  stop_sequences: model_options?.stop_sequence,
992
- thinking: model_options?.thinking_mode ?
993
- {
994
- budget_tokens: model_options?.thinking_budget_tokens ?? 1024,
995
- type: "enabled" as const
996
- } : {
997
- type: "disabled" as const
998
- }
1002
+ thinking,
1003
+ ...(outputConfig && { output_config: outputConfig }),
999
1004
  };
1000
1005
 
1001
1006
  return { payload, requestOptions };
@@ -243,9 +243,12 @@ const recoverableToolCallReasons = [
243
243
  function geminiThinkingBudget(option: StatelessExecutionOptions) {
244
244
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
245
245
  // If thinking_budget_tokens is explicitly set in model options, use it directly
246
- if (model_options?.thinking_budget_tokens) {
246
+ if (model_options?.thinking_budget_tokens !== undefined) {
247
247
  return model_options.thinking_budget_tokens;
248
248
  }
249
+ if (model_options?.effort) {
250
+ return geminiBudgetForEffort(option.model, model_options.effort);
251
+ }
249
252
  // Set minimum thinking level by default.
250
253
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
251
254
  if (getGeminiModelVersion(option.model) === '2.5') {
@@ -257,33 +260,76 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
257
260
  return undefined;
258
261
  }
259
262
 
263
+ function geminiThinkingLevelForEffort(model: string, effort: VertexAIGeminiOptions["effort"]): ThinkingLevel | undefined {
264
+ if (model.includes("gemini-3-pro-image")) {
265
+ return ThinkingLevel.HIGH;
266
+ }
267
+ if (model.includes("gemini-3.1-flash-image")) {
268
+ return effort === "low" ? ThinkingLevel.MINIMAL : ThinkingLevel.HIGH;
269
+ }
270
+ switch (effort) {
271
+ case "low":
272
+ return ThinkingLevel.LOW;
273
+ case "medium":
274
+ return ThinkingLevel.MEDIUM;
275
+ case "high":
276
+ return ThinkingLevel.HIGH;
277
+ default:
278
+ return undefined;
279
+ }
280
+ }
281
+
282
+ function geminiBudgetForEffort(model: string, effort: NonNullable<VertexAIGeminiOptions["effort"]>): number {
283
+ const isFlashLite = model.includes("flash-lite");
284
+ const isFlash = model.includes("flash") && !isFlashLite;
285
+ const isPro = model.includes("pro");
286
+
287
+ if (effort === "low") {
288
+ if (isPro) return 128;
289
+ if (isFlashLite) return 512;
290
+ if (isFlash) return 1;
291
+ return 1024;
292
+ }
293
+ if (effort === "medium") {
294
+ return 8192;
295
+ }
296
+ if (isPro) return 32768;
297
+ if (isFlash || isFlashLite) return 24576;
298
+ return 8192;
299
+ }
300
+
260
301
  function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
261
302
  const model_options = option.model_options as VertexAIGeminiOptions | undefined;
262
303
 
263
304
  // If thinking options are explicitly set in model options, use them directly
264
305
  const include_thoughts = model_options?.include_thoughts ?? false;
265
- if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
306
+ if (model_options?.thinking_budget_tokens !== undefined || model_options?.thinking_level) {
266
307
  return {
267
308
  includeThoughts: include_thoughts,
268
309
  thinkingBudget: model_options.thinking_budget_tokens,
269
310
  thinkingLevel: model_options.thinking_level,
270
311
  };
271
312
  }
313
+ if (model_options?.effort) {
314
+ if (isGeminiModelVersionGte(option.model, '3.0')) {
315
+ return {
316
+ includeThoughts: include_thoughts,
317
+ thinkingLevel: geminiThinkingLevelForEffort(option.model, model_options.effort),
318
+ };
319
+ }
320
+ return {
321
+ includeThoughts: include_thoughts,
322
+ thinkingBudget: geminiBudgetForEffort(option.model, model_options.effort),
323
+ };
324
+ }
272
325
 
273
326
  // Set a low thinking level by default.
274
327
  // Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
275
328
  // https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
276
329
  if (isGeminiModelVersionGte(option.model, '3.0')) {
277
- if (option.model.includes("gemini-3-pro-image")) {
278
- // Does not support thinking level.
279
- return {
280
- includeThoughts: include_thoughts,
281
- thinkingBudget: -1
282
- };
283
- }
284
330
  return {
285
331
  includeThoughts: include_thoughts,
286
- thinkingLevel: ThinkingLevel.LOW
332
+ thinkingLevel: option.model.includes("gemini-3-pro-image") ? ThinkingLevel.HIGH : ThinkingLevel.LOW
287
333
  };
288
334
  }
289
335
  if (isGeminiModelVersionGte(option.model, '2.5')) {
@@ -914,4 +960,4 @@ function formatFunctionResponse(response: string): JSONObject {
914
960
  } else {
915
961
  return { output: response };
916
962
  }
917
- }
963
+ }
@@ -323,8 +323,8 @@ export class ImagenModelDefinition {
323
323
  }
324
324
 
325
325
  async requestImageGeneration(driver: VertexAIDriver, prompt: ImagenPrompt, options: ExecutionOptions): Promise<Completion> {
326
- if (options.model_options?._option_id !== "vertexai-imagen") {
327
- driver.logger.warn({ options: options.model_options }, "Invalid model options");
326
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "vertexai-imagen") {
327
+ driver.logger.debug({ options: options.model_options }, "Unexpected option id");
328
328
  }
329
329
  options.model_options = options.model_options as ImagenOptions | undefined;
330
330
 
@@ -1,4 +1,4 @@
1
- import { AIModel, AbstractDriver, Completion, CompletionChunkObject, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
1
+ import { AbstractDriver, AIModel, Completion, CompletionChunkObject, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
2
2
  import { transformSSEStream } from "@llumiverse/core/async";
3
3
  import { FetchClient } from "@vertesia/api-fetch-client";
4
4
  import { GenerateEmbeddingPayload, GenerateEmbeddingResponse, WatsonAuthToken, WatsonxListModelResponse, WatsonxModelSpec, WatsonxTextGenerationPayload, WatsonxTextGenerationResponse } from "./interfaces.js";
@@ -30,8 +30,8 @@ export class WatsonxDriver extends AbstractDriver<WatsonxDriverOptions, string>
30
30
  }
31
31
 
32
32
  async requestTextCompletion(prompt: string, options: ExecutionOptions): Promise<Completion> {
33
- if (options.model_options?._option_id !== "text-fallback") {
34
- this.logger.warn({ options: options.model_options }, "Invalid model options");
33
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
34
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
35
35
  }
36
36
  options.model_options = options.model_options as TextFallbackOptions | undefined;
37
37
 
@@ -65,8 +65,8 @@ export class WatsonxDriver extends AbstractDriver<WatsonxDriverOptions, string>
65
65
  }
66
66
 
67
67
  async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
68
- if (options.model_options?._option_id !== "text-fallback") {
69
- this.logger.warn({ options: options.model_options }, "Invalid model options");
68
+ if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
69
+ this.logger.debug({ options: options.model_options }, "Unexpected option id");
70
70
  }
71
71
  options.model_options = options.model_options as TextFallbackOptions | undefined;
72
72
  const payload: WatsonxTextGenerationPayload = {