@llumiverse/drivers 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/bedrock/index.js +49 -62
- package/lib/cjs/bedrock/index.js.map +1 -1
- package/lib/cjs/groq/index.js +7 -5
- package/lib/cjs/groq/index.js.map +1 -1
- package/lib/cjs/huggingface_ie.js +4 -4
- package/lib/cjs/huggingface_ie.js.map +1 -1
- package/lib/cjs/mistral/index.js +5 -5
- package/lib/cjs/mistral/index.js.map +1 -1
- package/lib/cjs/openai/index.js +35 -8
- package/lib/cjs/openai/index.js.map +1 -1
- package/lib/cjs/replicate.js +4 -4
- package/lib/cjs/replicate.js.map +1 -1
- package/lib/cjs/shared/claude-thinking.js +60 -0
- package/lib/cjs/shared/claude-thinking.js.map +1 -0
- package/lib/cjs/togetherai/index.js +4 -4
- package/lib/cjs/togetherai/index.js.map +1 -1
- package/lib/cjs/vertexai/models/claude.js +18 -16
- package/lib/cjs/vertexai/models/claude.js.map +1 -1
- package/lib/cjs/vertexai/models/gemini.js +58 -10
- package/lib/cjs/vertexai/models/gemini.js.map +1 -1
- package/lib/cjs/vertexai/models/imagen.js +2 -2
- package/lib/cjs/vertexai/models/imagen.js.map +1 -1
- package/lib/cjs/watsonx/index.js +4 -4
- package/lib/cjs/watsonx/index.js.map +1 -1
- package/lib/esm/bedrock/index.js +49 -62
- package/lib/esm/bedrock/index.js.map +1 -1
- package/lib/esm/groq/index.js +7 -5
- package/lib/esm/groq/index.js.map +1 -1
- package/lib/esm/huggingface_ie.js +5 -5
- package/lib/esm/huggingface_ie.js.map +1 -1
- package/lib/esm/mistral/index.js +5 -5
- package/lib/esm/mistral/index.js.map +1 -1
- package/lib/esm/openai/index.js +36 -9
- package/lib/esm/openai/index.js.map +1 -1
- package/lib/esm/replicate.js +4 -4
- package/lib/esm/replicate.js.map +1 -1
- package/lib/esm/shared/claude-thinking.js +57 -0
- package/lib/esm/shared/claude-thinking.js.map +1 -0
- package/lib/esm/togetherai/index.js +4 -4
- package/lib/esm/togetherai/index.js.map +1 -1
- package/lib/esm/vertexai/models/claude.js +19 -17
- package/lib/esm/vertexai/models/claude.js.map +1 -1
- package/lib/esm/vertexai/models/gemini.js +58 -10
- package/lib/esm/vertexai/models/gemini.js.map +1 -1
- package/lib/esm/vertexai/models/imagen.js +2 -2
- package/lib/esm/vertexai/models/imagen.js.map +1 -1
- package/lib/esm/watsonx/index.js +4 -4
- package/lib/esm/watsonx/index.js.map +1 -1
- package/lib/types/bedrock/index.d.ts +6 -6
- package/lib/types/bedrock/index.d.ts.map +1 -1
- package/lib/types/groq/index.d.ts +1 -1
- package/lib/types/groq/index.d.ts.map +1 -1
- package/lib/types/huggingface_ie.d.ts +1 -1
- package/lib/types/huggingface_ie.d.ts.map +1 -1
- package/lib/types/mistral/index.d.ts +2 -2
- package/lib/types/mistral/index.d.ts.map +1 -1
- package/lib/types/openai/index.d.ts +1 -1
- package/lib/types/openai/index.d.ts.map +1 -1
- package/lib/types/replicate.d.ts +1 -1
- package/lib/types/replicate.d.ts.map +1 -1
- package/lib/types/shared/claude-thinking.d.ts +36 -0
- package/lib/types/shared/claude-thinking.d.ts.map +1 -0
- package/lib/types/togetherai/index.d.ts +1 -1
- package/lib/types/togetherai/index.d.ts.map +1 -1
- package/lib/types/vertexai/models/claude.d.ts +4 -4
- package/lib/types/vertexai/models/claude.d.ts.map +1 -1
- package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
- package/lib/types/watsonx/index.d.ts +1 -1
- package/lib/types/watsonx/index.d.ts.map +1 -1
- package/package.json +11 -11
- package/src/bedrock/index.ts +75 -87
- package/src/groq/index.ts +9 -8
- package/src/huggingface_ie.ts +5 -5
- package/src/mistral/index.ts +6 -6
- package/src/openai/index.ts +46 -16
- package/src/replicate.ts +5 -5
- package/src/shared/claude-thinking.ts +88 -0
- package/src/togetherai/index.ts +5 -5
- package/src/vertexai/models/claude.ts +32 -27
- package/src/vertexai/models/gemini.ts +57 -11
- package/src/vertexai/models/imagen.ts +2 -2
- package/src/watsonx/index.ts +5 -5
package/src/openai/index.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
|
-
AIModel,
|
|
3
2
|
AbstractDriver,
|
|
3
|
+
AIModel,
|
|
4
4
|
Completion,
|
|
5
5
|
CompletionChunkObject,
|
|
6
6
|
CompletionResult,
|
|
@@ -10,26 +10,26 @@ import {
|
|
|
10
10
|
EmbeddingsResult,
|
|
11
11
|
ExecutionOptions,
|
|
12
12
|
ExecutionTokenUsage,
|
|
13
|
+
getConversationMeta,
|
|
14
|
+
getModelCapabilities,
|
|
15
|
+
incrementConversationTurn,
|
|
13
16
|
JSONSchema,
|
|
14
17
|
LlumiverseError,
|
|
15
18
|
LlumiverseErrorContext,
|
|
19
|
+
modelModalitiesToArray,
|
|
16
20
|
ModelType,
|
|
17
21
|
OpenAiDalleOptions,
|
|
18
22
|
OpenAiGptImageOptions,
|
|
19
23
|
Providers,
|
|
24
|
+
stripBase64ImagesFromConversation,
|
|
25
|
+
stripHeartbeatsFromConversation,
|
|
26
|
+
supportsToolUse,
|
|
20
27
|
ToolDefinition,
|
|
21
28
|
ToolUse,
|
|
22
29
|
TrainingJob,
|
|
23
30
|
TrainingJobStatus,
|
|
24
31
|
TrainingOptions,
|
|
25
32
|
TrainingPromptOptions,
|
|
26
|
-
getConversationMeta,
|
|
27
|
-
getModelCapabilities,
|
|
28
|
-
incrementConversationTurn,
|
|
29
|
-
modelModalitiesToArray,
|
|
30
|
-
stripBase64ImagesFromConversation,
|
|
31
|
-
stripHeartbeatsFromConversation,
|
|
32
|
-
supportsToolUse,
|
|
33
33
|
truncateLargeTextInConversation,
|
|
34
34
|
unwrapConversationArray,
|
|
35
35
|
} from "@llumiverse/core";
|
|
@@ -61,6 +61,29 @@ function textToCompletionResult(text: string): CompletionResult[] {
|
|
|
61
61
|
return text ? [{ type: "text", value: text }] : [];
|
|
62
62
|
}
|
|
63
63
|
|
|
64
|
+
function isOpenAIReasoningModel(model: string): boolean {
|
|
65
|
+
const normalized = model.toLowerCase();
|
|
66
|
+
return normalized.includes("o1")
|
|
67
|
+
|| normalized.includes("o3")
|
|
68
|
+
|| normalized.includes("o4")
|
|
69
|
+
|| normalized.includes("gpt-5");
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function isGpt5ProModel(model: string): boolean {
|
|
73
|
+
const modelName = model.toLowerCase().split('/').pop() ?? model.toLowerCase();
|
|
74
|
+
return /^gpt-5(?:\.\d+)?-pro/.test(modelName);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function openAIReasoningEffort(model: string, effort: string | undefined): "low" | "medium" | "high" | undefined {
|
|
78
|
+
if (!effort || !isOpenAIReasoningModel(model)) {
|
|
79
|
+
return undefined;
|
|
80
|
+
}
|
|
81
|
+
if (isGpt5ProModel(model)) {
|
|
82
|
+
return "high";
|
|
83
|
+
}
|
|
84
|
+
return effort === "low" || effort === "medium" || effort === "high" ? effort : undefined;
|
|
85
|
+
}
|
|
86
|
+
|
|
64
87
|
//TODO: Do we need a list?, replace with if statements and modernize?
|
|
65
88
|
const supportFineTunning = new Set([
|
|
66
89
|
"gpt-3.5-turbo-1106",
|
|
@@ -109,8 +132,11 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
|
|
|
109
132
|
}
|
|
110
133
|
|
|
111
134
|
async requestTextCompletionStream(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
|
|
112
|
-
if (options.model_options?._option_id !==
|
|
113
|
-
|
|
135
|
+
if (options.model_options?._option_id !== undefined &&
|
|
136
|
+
options.model_options?._option_id !== "openai-text" &&
|
|
137
|
+
options.model_options?._option_id !== "openai-thinking" &&
|
|
138
|
+
options.model_options?._option_id !== "text-fallback") {
|
|
139
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
114
140
|
}
|
|
115
141
|
|
|
116
142
|
// Include conversation history (same as non-streaming)
|
|
@@ -144,8 +170,9 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
|
|
|
144
170
|
}
|
|
145
171
|
}
|
|
146
172
|
|
|
147
|
-
const
|
|
148
|
-
const
|
|
173
|
+
const isReasoningModel = isOpenAIReasoningModel(options.model);
|
|
174
|
+
const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort);
|
|
175
|
+
const reasoning = effort ? { effort } : undefined;
|
|
149
176
|
|
|
150
177
|
const stream = await this.service.responses.create({
|
|
151
178
|
stream: true,
|
|
@@ -170,8 +197,10 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
|
|
|
170
197
|
}
|
|
171
198
|
|
|
172
199
|
async requestTextCompletion(prompt: ResponseInputItem[], options: ExecutionOptions): Promise<Completion> {
|
|
173
|
-
if (options.model_options?._option_id !==
|
|
174
|
-
|
|
200
|
+
if (options.model_options?._option_id !== undefined &&
|
|
201
|
+
options.model_options?._option_id !== "openai-text" &&
|
|
202
|
+
options.model_options?._option_id !== "openai-thinking") {
|
|
203
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
175
204
|
}
|
|
176
205
|
|
|
177
206
|
convertRoles(prompt, options.model);
|
|
@@ -204,8 +233,9 @@ export abstract class BaseOpenAIDriver extends AbstractDriver<
|
|
|
204
233
|
}
|
|
205
234
|
}
|
|
206
235
|
|
|
207
|
-
const
|
|
208
|
-
const
|
|
236
|
+
const isReasoningModel = isOpenAIReasoningModel(options.model);
|
|
237
|
+
const effort = openAIReasoningEffort(options.model, model_options?.effort ?? model_options?.reasoning_effort);
|
|
238
|
+
const reasoning = effort ? { effort } : undefined;
|
|
209
239
|
|
|
210
240
|
const res = await this.service.responses.create({
|
|
211
241
|
stream: false,
|
package/src/replicate.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
|
-
AIModel,
|
|
3
2
|
AbstractDriver,
|
|
3
|
+
AIModel,
|
|
4
4
|
Completion,
|
|
5
5
|
CompletionChunkObject,
|
|
6
6
|
DataSource,
|
|
@@ -65,8 +65,8 @@ export class ReplicateDriver extends AbstractDriver<DriverOptions, string> {
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
|
|
68
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
69
|
-
this.logger.
|
|
68
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
69
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
70
70
|
}
|
|
71
71
|
options.model_options = options.model_options as TextFallbackOptions;
|
|
72
72
|
|
|
@@ -110,8 +110,8 @@ export class ReplicateDriver extends AbstractDriver<DriverOptions, string> {
|
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
async requestTextCompletion(prompt: string, options: ExecutionOptions) {
|
|
113
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
114
|
-
this.logger.
|
|
113
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
114
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
115
115
|
}
|
|
116
116
|
options.model_options = options.model_options as TextFallbackOptions;
|
|
117
117
|
const model = ReplicateDriver.parseModelId(options.model);
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { OutputConfig, ThinkingConfigParam } from "@anthropic-ai/sdk/resources/messages.js";
|
|
2
|
+
import {
|
|
3
|
+
hasSamplingParameterRestriction,
|
|
4
|
+
isClaudeVersionGTE,
|
|
5
|
+
supportsAdaptiveThinking,
|
|
6
|
+
} from "@llumiverse/core";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Common Claude model options relevant to thinking/effort configuration.
|
|
10
|
+
* Works with both VertexAIClaudeOptions and BedrockClaudeOptions.
|
|
11
|
+
*/
|
|
12
|
+
export interface ClaudeThinkingInput {
|
|
13
|
+
thinking_budget_tokens?: number;
|
|
14
|
+
effort?: NonNullable<OutputConfig['effort']>;
|
|
15
|
+
/** Controls whether thinking content is included in the response. Does not enable thinking. */
|
|
16
|
+
include_thoughts?: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Result of resolving Claude thinking and effort configuration.
|
|
21
|
+
*/
|
|
22
|
+
export interface ClaudeThinkingResult {
|
|
23
|
+
/** Thinking/reasoning config to include in the API payload. */
|
|
24
|
+
thinking: ThinkingConfigParam | undefined;
|
|
25
|
+
/** Output config (effort) to include in the API payload, if applicable. */
|
|
26
|
+
outputConfig: OutputConfig | undefined;
|
|
27
|
+
/** Whether sampling parameters (temperature, top_p, top_k) should be stripped. */
|
|
28
|
+
hasSamplingRestriction: boolean;
|
|
29
|
+
/** Whether the model supports thinking at all (>= Claude 3.7). */
|
|
30
|
+
supportsThinking: boolean;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Resolve thinking and effort configuration for a Claude model.
|
|
35
|
+
*
|
|
36
|
+
* - Extended thinking: enabled by setting `thinking_budget_tokens`.
|
|
37
|
+
* - Adaptive thinking: enabled by setting `effort` on models that support it (Opus 4.6+, Sonnet 4.6+).
|
|
38
|
+
* - `include_thoughts`: display-only; does not enable thinking.
|
|
39
|
+
*
|
|
40
|
+
* @param model - The model identifier string
|
|
41
|
+
* @param options - User-provided Claude options (thinking_budget_tokens, effort, include_thoughts)
|
|
42
|
+
*/
|
|
43
|
+
export function resolveClaudeThinking(model: string, options?: ClaudeThinkingInput): ClaudeThinkingResult {
|
|
44
|
+
const supportsAdaptive = supportsAdaptiveThinking(model);
|
|
45
|
+
const samplingRestriction = hasSamplingParameterRestriction(model);
|
|
46
|
+
const supportsThinking = isClaudeVersionGTE(model, 3, 7);
|
|
47
|
+
const budgetTokens = options?.thinking_budget_tokens;
|
|
48
|
+
// Adaptive thinking is active when the caller supplies an effort level on a
|
|
49
|
+
// model that supports it. Extended thinking is active when a budget is set.
|
|
50
|
+
const adaptiveEnabled = supportsAdaptive && options?.effort != null;
|
|
51
|
+
const extendedEnabled = budgetTokens != null;
|
|
52
|
+
|
|
53
|
+
let thinking: ThinkingConfigParam | undefined;
|
|
54
|
+
|
|
55
|
+
if (!supportsThinking) {
|
|
56
|
+
// Pre-3.7 models: no thinking support
|
|
57
|
+
thinking = undefined;
|
|
58
|
+
} else if (extendedEnabled) {
|
|
59
|
+
// Explicit budget — use extended thinking regardless of adaptive support.
|
|
60
|
+
// On adaptive models this uses the deprecated path, but user input takes priority.
|
|
61
|
+
thinking = {
|
|
62
|
+
type: "enabled" as const,
|
|
63
|
+
budget_tokens: budgetTokens,
|
|
64
|
+
};
|
|
65
|
+
} else if (supportsAdaptive) {
|
|
66
|
+
// Adaptive models: enable when effort is set, omit otherwise (thinking is OFF by default).
|
|
67
|
+
// display controls whether thinking blocks are returned; defaults to omitted.
|
|
68
|
+
thinking = adaptiveEnabled
|
|
69
|
+
? { type: "adaptive" as const, display: options?.include_thoughts ? "summarized" : "omitted" }
|
|
70
|
+
: undefined;
|
|
71
|
+
} else {
|
|
72
|
+
// Older thinking models (3.7, 4.5): no adaptive support, thinking is always disabled
|
|
73
|
+
// unless an explicit budget is provided (handled above).
|
|
74
|
+
thinking = { type: "disabled" as const };
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Output config for effort parameter (Opus 4.5+, Sonnet 4.6+, all 4.7+)
|
|
78
|
+
const outputConfig: OutputConfig | undefined = options?.effort
|
|
79
|
+
? { effort: options.effort }
|
|
80
|
+
: undefined;
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
thinking,
|
|
84
|
+
outputConfig,
|
|
85
|
+
hasSamplingRestriction: samplingRestriction,
|
|
86
|
+
supportsThinking,
|
|
87
|
+
};
|
|
88
|
+
}
|
package/src/togetherai/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AbstractDriver, AIModel, Completion, CompletionChunkObject, DriverOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
|
|
2
2
|
import { transformSSEStream } from "@llumiverse/core/async";
|
|
3
3
|
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
4
4
|
import { TextCompletion, TogetherModelInfo } from "./interfaces.js";
|
|
@@ -30,8 +30,8 @@ export class TogetherAIDriver extends AbstractDriver<TogetherAIDriverOptions, st
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
async requestTextCompletion(prompt: string, options: ExecutionOptions): Promise<Completion> {
|
|
33
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
34
|
-
this.logger.
|
|
33
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
34
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
35
35
|
}
|
|
36
36
|
options.model_options = options.model_options as TextFallbackOptions;
|
|
37
37
|
|
|
@@ -72,8 +72,8 @@ export class TogetherAIDriver extends AbstractDriver<TogetherAIDriverOptions, st
|
|
|
72
72
|
}
|
|
73
73
|
|
|
74
74
|
async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
|
|
75
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
76
|
-
this.logger.
|
|
75
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
76
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
77
77
|
}
|
|
78
78
|
options.model_options = options.model_options as TextFallbackOptions;
|
|
79
79
|
|
|
@@ -11,27 +11,28 @@ import {
|
|
|
11
11
|
RateLimitError,
|
|
12
12
|
UnprocessableEntityError,
|
|
13
13
|
} from '@anthropic-ai/sdk/error';
|
|
14
|
-
import { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from "@anthropic-ai/sdk/resources/index.js";
|
|
15
|
-
import { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
|
|
16
|
-
import { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
|
|
14
|
+
import type { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, Message, MessageParam, TextBlockParam, ToolResultBlockParam } from "@anthropic-ai/sdk/resources/index.js";
|
|
15
|
+
import type { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
|
|
16
|
+
import type { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
|
|
17
17
|
import {
|
|
18
|
-
AIModel, Completion, CompletionChunkObject, ExecutionOptions, ExecutionTokenUsage,
|
|
18
|
+
type AIModel, type Completion, type CompletionChunkObject, type ExecutionOptions, type ExecutionTokenUsage,
|
|
19
19
|
getConversationMeta,
|
|
20
20
|
getMaxTokensLimitVertexAi,
|
|
21
21
|
incrementConversationTurn,
|
|
22
|
-
JSONObject,
|
|
23
|
-
LlumiverseError, LlumiverseErrorContext,
|
|
22
|
+
type JSONObject,
|
|
23
|
+
LlumiverseError, type LlumiverseErrorContext,
|
|
24
24
|
ModelType,
|
|
25
|
-
PromptRole, PromptSegment, readStreamAsBase64, readStreamAsString, StatelessExecutionOptions,
|
|
25
|
+
PromptRole, type PromptSegment, readStreamAsBase64, readStreamAsString, type StatelessExecutionOptions,
|
|
26
26
|
stripBase64ImagesFromConversation,
|
|
27
27
|
stripHeartbeatsFromConversation,
|
|
28
|
-
ToolUse,
|
|
28
|
+
type ToolUse,
|
|
29
29
|
truncateLargeTextInConversation,
|
|
30
|
-
VertexAIClaudeOptions
|
|
30
|
+
type VertexAIClaudeOptions,
|
|
31
31
|
} from "@llumiverse/core";
|
|
32
32
|
import { asyncMap } from "@llumiverse/core/async";
|
|
33
|
-
import {
|
|
34
|
-
import {
|
|
33
|
+
import { resolveClaudeThinking } from "../../shared/claude-thinking.js";
|
|
34
|
+
import type { VertexAIDriver } from "../index.js";
|
|
35
|
+
import type { ModelDefinition } from "../models.js";
|
|
35
36
|
|
|
36
37
|
export const ANTHROPIC_REGIONS: Record<string, string> = {
|
|
37
38
|
us: "us-east5",
|
|
@@ -313,10 +314,13 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
313
314
|
options = { ...options, model: modelName };
|
|
314
315
|
|
|
315
316
|
const client = await driver.getAnthropicClient(region);
|
|
316
|
-
|
|
317
|
+
const model_options = options.model_options as VertexAIClaudeOptions | undefined;
|
|
317
318
|
|
|
318
|
-
if (
|
|
319
|
-
|
|
319
|
+
if (model_options?._option_id !== undefined &&
|
|
320
|
+
model_options?._option_id !== "vertexai-claude" &&
|
|
321
|
+
model_options?._option_id !== "text-fallback"
|
|
322
|
+
) {
|
|
323
|
+
driver.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
320
324
|
}
|
|
321
325
|
|
|
322
326
|
let conversation = updateConversation(options.conversation as ClaudePrompt, prompt);
|
|
@@ -328,7 +332,7 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
328
332
|
const result = await client.messages.create(nonStreamingPayload, requestOptions) satisfies Message;
|
|
329
333
|
|
|
330
334
|
// Use the new function to collect text content, including thinking if enabled
|
|
331
|
-
const includeThoughts =
|
|
335
|
+
const includeThoughts = model_options?.include_thoughts ?? false;
|
|
332
336
|
const text = collectAllTextContent(result.content, includeThoughts);
|
|
333
337
|
const tool_use = collectTools(result.content);
|
|
334
338
|
|
|
@@ -371,8 +375,11 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
371
375
|
const client = await driver.getAnthropicClient(region);
|
|
372
376
|
const model_options = options.model_options as VertexAIClaudeOptions | undefined;
|
|
373
377
|
|
|
374
|
-
if (model_options?._option_id !==
|
|
375
|
-
|
|
378
|
+
if ((model_options?._option_id !== undefined &&
|
|
379
|
+
model_options?._option_id !== "vertexai-claude" &&
|
|
380
|
+
model_options?._option_id !== "text-fallback")
|
|
381
|
+
) {
|
|
382
|
+
driver.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
376
383
|
}
|
|
377
384
|
|
|
378
385
|
// Include conversation history (same as non-streaming)
|
|
@@ -979,23 +986,21 @@ function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { pa
|
|
|
979
986
|
}
|
|
980
987
|
}
|
|
981
988
|
|
|
989
|
+
// Resolve thinking, effort, and sampling restriction using shared Claude helper
|
|
990
|
+
const { thinking, outputConfig, hasSamplingRestriction } = resolveClaudeThinking(modelName, model_options);
|
|
991
|
+
|
|
982
992
|
const payload = {
|
|
983
993
|
messages: sanitizedMessages,
|
|
984
994
|
system: sanitizedSystem,
|
|
985
995
|
tools: sanitizedTools,
|
|
986
|
-
temperature: model_options?.temperature,
|
|
996
|
+
temperature: hasSamplingRestriction ? undefined : model_options?.temperature,
|
|
987
997
|
model: modelName,
|
|
988
998
|
max_tokens: maxToken(options),
|
|
989
|
-
top_p: model_options?.temperature != null ? undefined : model_options?.top_p,
|
|
990
|
-
top_k: model_options?.top_k,
|
|
999
|
+
top_p: hasSamplingRestriction ? undefined : (model_options?.temperature != null ? undefined : model_options?.top_p),
|
|
1000
|
+
top_k: hasSamplingRestriction ? undefined : model_options?.top_k,
|
|
991
1001
|
stop_sequences: model_options?.stop_sequence,
|
|
992
|
-
thinking
|
|
993
|
-
|
|
994
|
-
budget_tokens: model_options?.thinking_budget_tokens ?? 1024,
|
|
995
|
-
type: "enabled" as const
|
|
996
|
-
} : {
|
|
997
|
-
type: "disabled" as const
|
|
998
|
-
}
|
|
1002
|
+
thinking,
|
|
1003
|
+
...(outputConfig && { output_config: outputConfig }),
|
|
999
1004
|
};
|
|
1000
1005
|
|
|
1001
1006
|
return { payload, requestOptions };
|
|
@@ -243,9 +243,12 @@ const recoverableToolCallReasons = [
|
|
|
243
243
|
function geminiThinkingBudget(option: StatelessExecutionOptions) {
|
|
244
244
|
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
245
245
|
// If thinking_budget_tokens is explicitly set in model options, use it directly
|
|
246
|
-
if (model_options?.thinking_budget_tokens) {
|
|
246
|
+
if (model_options?.thinking_budget_tokens !== undefined) {
|
|
247
247
|
return model_options.thinking_budget_tokens;
|
|
248
248
|
}
|
|
249
|
+
if (model_options?.effort) {
|
|
250
|
+
return geminiBudgetForEffort(option.model, model_options.effort);
|
|
251
|
+
}
|
|
249
252
|
// Set minimum thinking level by default.
|
|
250
253
|
// Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
251
254
|
if (getGeminiModelVersion(option.model) === '2.5') {
|
|
@@ -257,33 +260,76 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
|
|
|
257
260
|
return undefined;
|
|
258
261
|
}
|
|
259
262
|
|
|
263
|
+
function geminiThinkingLevelForEffort(model: string, effort: VertexAIGeminiOptions["effort"]): ThinkingLevel | undefined {
|
|
264
|
+
if (model.includes("gemini-3-pro-image")) {
|
|
265
|
+
return ThinkingLevel.HIGH;
|
|
266
|
+
}
|
|
267
|
+
if (model.includes("gemini-3.1-flash-image")) {
|
|
268
|
+
return effort === "low" ? ThinkingLevel.MINIMAL : ThinkingLevel.HIGH;
|
|
269
|
+
}
|
|
270
|
+
switch (effort) {
|
|
271
|
+
case "low":
|
|
272
|
+
return ThinkingLevel.LOW;
|
|
273
|
+
case "medium":
|
|
274
|
+
return ThinkingLevel.MEDIUM;
|
|
275
|
+
case "high":
|
|
276
|
+
return ThinkingLevel.HIGH;
|
|
277
|
+
default:
|
|
278
|
+
return undefined;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function geminiBudgetForEffort(model: string, effort: NonNullable<VertexAIGeminiOptions["effort"]>): number {
|
|
283
|
+
const isFlashLite = model.includes("flash-lite");
|
|
284
|
+
const isFlash = model.includes("flash") && !isFlashLite;
|
|
285
|
+
const isPro = model.includes("pro");
|
|
286
|
+
|
|
287
|
+
if (effort === "low") {
|
|
288
|
+
if (isPro) return 128;
|
|
289
|
+
if (isFlashLite) return 512;
|
|
290
|
+
if (isFlash) return 1;
|
|
291
|
+
return 1024;
|
|
292
|
+
}
|
|
293
|
+
if (effort === "medium") {
|
|
294
|
+
return 8192;
|
|
295
|
+
}
|
|
296
|
+
if (isPro) return 32768;
|
|
297
|
+
if (isFlash || isFlashLite) return 24576;
|
|
298
|
+
return 8192;
|
|
299
|
+
}
|
|
300
|
+
|
|
260
301
|
function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
|
|
261
302
|
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
262
303
|
|
|
263
304
|
// If thinking options are explicitly set in model options, use them directly
|
|
264
305
|
const include_thoughts = model_options?.include_thoughts ?? false;
|
|
265
|
-
if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
|
|
306
|
+
if (model_options?.thinking_budget_tokens !== undefined || model_options?.thinking_level) {
|
|
266
307
|
return {
|
|
267
308
|
includeThoughts: include_thoughts,
|
|
268
309
|
thinkingBudget: model_options.thinking_budget_tokens,
|
|
269
310
|
thinkingLevel: model_options.thinking_level,
|
|
270
311
|
};
|
|
271
312
|
}
|
|
313
|
+
if (model_options?.effort) {
|
|
314
|
+
if (isGeminiModelVersionGte(option.model, '3.0')) {
|
|
315
|
+
return {
|
|
316
|
+
includeThoughts: include_thoughts,
|
|
317
|
+
thinkingLevel: geminiThinkingLevelForEffort(option.model, model_options.effort),
|
|
318
|
+
};
|
|
319
|
+
}
|
|
320
|
+
return {
|
|
321
|
+
includeThoughts: include_thoughts,
|
|
322
|
+
thinkingBudget: geminiBudgetForEffort(option.model, model_options.effort),
|
|
323
|
+
};
|
|
324
|
+
}
|
|
272
325
|
|
|
273
326
|
// Set a low thinking level by default.
|
|
274
327
|
// Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
275
328
|
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
|
|
276
329
|
if (isGeminiModelVersionGte(option.model, '3.0')) {
|
|
277
|
-
if (option.model.includes("gemini-3-pro-image")) {
|
|
278
|
-
// Does not support thinking level.
|
|
279
|
-
return {
|
|
280
|
-
includeThoughts: include_thoughts,
|
|
281
|
-
thinkingBudget: -1
|
|
282
|
-
};
|
|
283
|
-
}
|
|
284
330
|
return {
|
|
285
331
|
includeThoughts: include_thoughts,
|
|
286
|
-
thinkingLevel: ThinkingLevel.LOW
|
|
332
|
+
thinkingLevel: option.model.includes("gemini-3-pro-image") ? ThinkingLevel.HIGH : ThinkingLevel.LOW
|
|
287
333
|
};
|
|
288
334
|
}
|
|
289
335
|
if (isGeminiModelVersionGte(option.model, '2.5')) {
|
|
@@ -914,4 +960,4 @@ function formatFunctionResponse(response: string): JSONObject {
|
|
|
914
960
|
} else {
|
|
915
961
|
return { output: response };
|
|
916
962
|
}
|
|
917
|
-
}
|
|
963
|
+
}
|
|
@@ -323,8 +323,8 @@ export class ImagenModelDefinition {
|
|
|
323
323
|
}
|
|
324
324
|
|
|
325
325
|
async requestImageGeneration(driver: VertexAIDriver, prompt: ImagenPrompt, options: ExecutionOptions): Promise<Completion> {
|
|
326
|
-
if (options.model_options?._option_id !== "vertexai-imagen") {
|
|
327
|
-
driver.logger.
|
|
326
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "vertexai-imagen") {
|
|
327
|
+
driver.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
328
328
|
}
|
|
329
329
|
options.model_options = options.model_options as ImagenOptions | undefined;
|
|
330
330
|
|
package/src/watsonx/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { AbstractDriver, AIModel, Completion, CompletionChunkObject, DriverOptions, EmbeddingsOptions, EmbeddingsResult, ExecutionOptions, TextFallbackOptions } from "@llumiverse/core";
|
|
2
2
|
import { transformSSEStream } from "@llumiverse/core/async";
|
|
3
3
|
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
4
4
|
import { GenerateEmbeddingPayload, GenerateEmbeddingResponse, WatsonAuthToken, WatsonxListModelResponse, WatsonxModelSpec, WatsonxTextGenerationPayload, WatsonxTextGenerationResponse } from "./interfaces.js";
|
|
@@ -30,8 +30,8 @@ export class WatsonxDriver extends AbstractDriver<WatsonxDriverOptions, string>
|
|
|
30
30
|
}
|
|
31
31
|
|
|
32
32
|
async requestTextCompletion(prompt: string, options: ExecutionOptions): Promise<Completion> {
|
|
33
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
34
|
-
this.logger.
|
|
33
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
34
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
35
35
|
}
|
|
36
36
|
options.model_options = options.model_options as TextFallbackOptions | undefined;
|
|
37
37
|
|
|
@@ -65,8 +65,8 @@ export class WatsonxDriver extends AbstractDriver<WatsonxDriverOptions, string>
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
async requestTextCompletionStream(prompt: string, options: ExecutionOptions): Promise<AsyncIterable<CompletionChunkObject>> {
|
|
68
|
-
if (options.model_options?._option_id !== "text-fallback") {
|
|
69
|
-
this.logger.
|
|
68
|
+
if (options.model_options?._option_id !== undefined && options.model_options?._option_id !== "text-fallback") {
|
|
69
|
+
this.logger.debug({ options: options.model_options }, "Unexpected option id");
|
|
70
70
|
}
|
|
71
71
|
options.model_options = options.model_options as TextFallbackOptions | undefined;
|
|
72
72
|
const payload: WatsonxTextGenerationPayload = {
|