@llumiverse/drivers 1.0.0-dev.20260224.234313Z → 1.0.0-dev.20260331.080752Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/bedrock/converse.js +86 -12
- package/lib/cjs/bedrock/converse.js.map +1 -1
- package/lib/cjs/bedrock/index.js +208 -1
- package/lib/cjs/bedrock/index.js.map +1 -1
- package/lib/cjs/groq/index.js +7 -4
- package/lib/cjs/groq/index.js.map +1 -1
- package/lib/cjs/openai/index.js +457 -26
- package/lib/cjs/openai/index.js.map +1 -1
- package/lib/cjs/openai/openai_compatible.js +1 -0
- package/lib/cjs/openai/openai_compatible.js.map +1 -1
- package/lib/cjs/vertexai/index.js +42 -0
- package/lib/cjs/vertexai/index.js.map +1 -1
- package/lib/cjs/vertexai/models/claude.js +230 -2
- package/lib/cjs/vertexai/models/claude.js.map +1 -1
- package/lib/cjs/vertexai/models/gemini.js +261 -41
- package/lib/cjs/vertexai/models/gemini.js.map +1 -1
- package/lib/cjs/vertexai/models.js +1 -1
- package/lib/cjs/vertexai/models.js.map +1 -1
- package/lib/esm/bedrock/converse.js +80 -6
- package/lib/esm/bedrock/converse.js.map +1 -1
- package/lib/esm/bedrock/index.js +207 -2
- package/lib/esm/bedrock/index.js.map +1 -1
- package/lib/esm/groq/index.js +7 -4
- package/lib/esm/groq/index.js.map +1 -1
- package/lib/esm/openai/index.js +456 -27
- package/lib/esm/openai/index.js.map +1 -1
- package/lib/esm/openai/openai_compatible.js +1 -0
- package/lib/esm/openai/openai_compatible.js.map +1 -1
- package/lib/esm/vertexai/index.js +43 -1
- package/lib/esm/vertexai/index.js.map +1 -1
- package/lib/esm/vertexai/models/claude.js +229 -3
- package/lib/esm/vertexai/models/claude.js.map +1 -1
- package/lib/esm/vertexai/models/gemini.js +262 -43
- package/lib/esm/vertexai/models/gemini.js.map +1 -1
- package/lib/esm/vertexai/models.js +1 -1
- package/lib/esm/vertexai/models.js.map +1 -1
- package/lib/types/bedrock/converse.d.ts +1 -2
- package/lib/types/bedrock/converse.d.ts.map +1 -1
- package/lib/types/bedrock/index.d.ts +53 -1
- package/lib/types/bedrock/index.d.ts.map +1 -1
- package/lib/types/openai/index.d.ts +96 -1
- package/lib/types/openai/index.d.ts.map +1 -1
- package/lib/types/openai/openai_compatible.d.ts +5 -0
- package/lib/types/openai/openai_compatible.d.ts.map +1 -1
- package/lib/types/openai/openai_format.d.ts +1 -1
- package/lib/types/vertexai/index.d.ts +11 -1
- package/lib/types/vertexai/index.d.ts.map +1 -1
- package/lib/types/vertexai/models/claude.d.ts +64 -1
- package/lib/types/vertexai/models/claude.d.ts.map +1 -1
- package/lib/types/vertexai/models/gemini.d.ts +61 -1
- package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
- package/lib/types/vertexai/models.d.ts +6 -1
- package/lib/types/vertexai/models.d.ts.map +1 -1
- package/package.json +9 -9
- package/src/bedrock/converse.ts +85 -10
- package/src/bedrock/error-handling.test.ts +352 -0
- package/src/bedrock/index.ts +225 -1
- package/src/groq/index.ts +9 -4
- package/src/openai/error-handling.test.ts +567 -0
- package/src/openai/index.ts +505 -29
- package/src/openai/openai_compatible.ts +7 -0
- package/src/openai/openai_format.ts +1 -1
- package/src/vertexai/index.ts +56 -5
- package/src/vertexai/models/claude-error-handling.test.ts +432 -0
- package/src/vertexai/models/claude.ts +273 -7
- package/src/vertexai/models/gemini-error-handling.test.ts +353 -0
- package/src/vertexai/models/gemini.ts +304 -48
- package/src/vertexai/models.ts +7 -2
|
@@ -1,17 +1,24 @@
|
|
|
1
|
+
import type { ApiError } from "@google/genai";
|
|
1
2
|
import {
|
|
2
3
|
Content, FinishReason, FunctionCallingConfigMode, FunctionDeclaration, GenerateContentConfig, GenerateContentParameters,
|
|
3
4
|
GenerateContentResponseUsageMetadata,
|
|
4
|
-
HarmBlockThreshold, HarmCategory, Modality, Part,
|
|
5
|
+
HarmBlockThreshold, HarmCategory, Modality, Part,
|
|
6
|
+
ProminentPeople,
|
|
7
|
+
SafetySetting, Schema, ThinkingConfig,
|
|
8
|
+
ThinkingLevel,
|
|
9
|
+
Tool, Type
|
|
5
10
|
} from "@google/genai";
|
|
6
11
|
import {
|
|
7
12
|
AIModel, Completion, CompletionChunkObject, CompletionResult, ExecutionOptions,
|
|
8
13
|
ExecutionTokenUsage,
|
|
9
14
|
getConversationMeta,
|
|
10
|
-
|
|
15
|
+
getGeminiModelVersion,
|
|
11
16
|
incrementConversationTurn,
|
|
12
|
-
|
|
17
|
+
isGeminiModelVersionGte,
|
|
18
|
+
JSONObject, JSONSchema, LlumiverseError, LlumiverseErrorContext, ModelType, PromptOptions, PromptRole,
|
|
13
19
|
PromptSegment, readStreamAsBase64, StatelessExecutionOptions,
|
|
14
20
|
stripBase64ImagesFromConversation,
|
|
21
|
+
stripHeartbeatsFromConversation,
|
|
15
22
|
ToolDefinition, ToolUse,
|
|
16
23
|
truncateLargeTextInConversation,
|
|
17
24
|
unwrapConversationArray,
|
|
@@ -53,15 +60,36 @@ const geminiSafetySettings: SafetySetting[] = [
|
|
|
53
60
|
}
|
|
54
61
|
];
|
|
55
62
|
|
|
63
|
+
// We do the mapping here rather than in common to avoid bringing the SDK into the common package.
|
|
64
|
+
function getProminentPeopleOption(prominentPeople?: "PROMINENT_PEOPLE_UNSPECIFIED" | "ALLOW_PROMINENT_PEOPLE" | "BLOCK_PROMINENT_PEOPLE") {
|
|
65
|
+
switch (prominentPeople) {
|
|
66
|
+
case "ALLOW_PROMINENT_PEOPLE":
|
|
67
|
+
return ProminentPeople.ALLOW_PROMINENT_PEOPLE;
|
|
68
|
+
case "BLOCK_PROMINENT_PEOPLE":
|
|
69
|
+
return ProminentPeople.BLOCK_PROMINENT_PEOPLE;
|
|
70
|
+
case "PROMINENT_PEOPLE_UNSPECIFIED":
|
|
71
|
+
return ProminentPeople.PROMINENT_PEOPLE_UNSPECIFIED;
|
|
72
|
+
default:
|
|
73
|
+
return undefined;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
56
77
|
function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentPrompt): GenerateContentParameters {
|
|
57
78
|
const model_options = options.model_options as VertexAIGeminiOptions | undefined;
|
|
58
79
|
const tools = getToolDefinitions(options.tools);
|
|
59
80
|
|
|
60
|
-
|
|
81
|
+
// When no tools are provided but conversation contains functionCall/functionResponse parts
|
|
82
|
+
// (e.g. checkpoint summary calls), convert them to text to avoid API errors
|
|
83
|
+
if (!tools && prompt.contents) {
|
|
84
|
+
const hasToolParts = prompt.contents.some(c =>
|
|
85
|
+
c.parts?.some(p => p.functionCall || p.functionResponse)
|
|
86
|
+
);
|
|
87
|
+
if (hasToolParts) {
|
|
88
|
+
prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
61
91
|
|
|
62
|
-
const
|
|
63
|
-
|| model_options?.thinking_budget_tokens
|
|
64
|
-
|| options.model.includes("gemini-2.5");
|
|
92
|
+
const useStructuredOutput = supportsStructuredOutput(options) && !tools;
|
|
65
93
|
|
|
66
94
|
const configNanoBanana: GenerateContentConfig = {
|
|
67
95
|
systemInstruction: prompt.system,
|
|
@@ -71,10 +99,16 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
|
|
|
71
99
|
//Model options
|
|
72
100
|
temperature: model_options?.temperature,
|
|
73
101
|
topP: model_options?.top_p,
|
|
74
|
-
maxOutputTokens:
|
|
102
|
+
maxOutputTokens: model_options?.max_tokens,
|
|
75
103
|
stopSequences: model_options?.stop_sequence,
|
|
104
|
+
thinkingConfig: geminiThinkingConfig(options),
|
|
76
105
|
imageConfig: {
|
|
106
|
+
imageSize: model_options?.image_size,
|
|
77
107
|
aspectRatio: model_options?.image_aspect_ratio,
|
|
108
|
+
personGeneration: model_options?.person_generation,
|
|
109
|
+
prominentPeople: getProminentPeopleOption(model_options?.prominent_people),
|
|
110
|
+
outputMimeType: model_options?.output_mime_type,
|
|
111
|
+
outputCompressionQuality: model_options?.output_compression_quality,
|
|
78
112
|
}
|
|
79
113
|
}
|
|
80
114
|
|
|
@@ -95,12 +129,12 @@ function getGeminiPayload(options: ExecutionOptions, prompt: GenerateContentProm
|
|
|
95
129
|
temperature: model_options?.temperature,
|
|
96
130
|
topP: model_options?.top_p,
|
|
97
131
|
topK: model_options?.top_k,
|
|
98
|
-
maxOutputTokens:
|
|
132
|
+
maxOutputTokens: model_options?.max_tokens,
|
|
99
133
|
stopSequences: model_options?.stop_sequence,
|
|
100
134
|
presencePenalty: model_options?.presence_penalty,
|
|
101
135
|
frequencyPenalty: model_options?.frequency_penalty,
|
|
102
136
|
seed: model_options?.seed,
|
|
103
|
-
thinkingConfig:
|
|
137
|
+
thinkingConfig: geminiThinkingConfig(options),
|
|
104
138
|
}
|
|
105
139
|
|
|
106
140
|
return {
|
|
@@ -435,7 +469,11 @@ function removeEmptyJSONArray(array: any[], schema: JSONSchema): any[] {
|
|
|
435
469
|
return cleanedArray.filter(item => !isEmpty(item));
|
|
436
470
|
}
|
|
437
471
|
|
|
438
|
-
|
|
472
|
+
/**
|
|
473
|
+
* Collect all parts (text and images) from content in order.
|
|
474
|
+
* This preserves the original ordering of text and image parts.
|
|
475
|
+
*/
|
|
476
|
+
function extractCompletionResults(content: Content): CompletionResult[] {
|
|
439
477
|
const results: CompletionResult[] = [];
|
|
440
478
|
const parts = content.parts;
|
|
441
479
|
if (parts) {
|
|
@@ -445,18 +483,7 @@ function collectTextParts(content: Content): CompletionResult[] {
|
|
|
445
483
|
type: "text",
|
|
446
484
|
value: part.text
|
|
447
485
|
});
|
|
448
|
-
}
|
|
449
|
-
}
|
|
450
|
-
}
|
|
451
|
-
return results;
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
function collectInlineDataParts(content: Content): CompletionResult[] {
|
|
455
|
-
const results: CompletionResult[] = [];
|
|
456
|
-
const parts = content.parts;
|
|
457
|
-
if (parts) {
|
|
458
|
-
for (const part of parts) {
|
|
459
|
-
if (part.inlineData) {
|
|
486
|
+
} else if (part.inlineData) {
|
|
460
487
|
const base64ImageBytes: string = part.inlineData.data ?? "";
|
|
461
488
|
const mimeType = part.inlineData.mimeType ?? "image/png";
|
|
462
489
|
const imageUrl = `data:${mimeType};base64,${base64ImageBytes}`;
|
|
@@ -533,25 +560,16 @@ const recoverableToolCallReasons = [
|
|
|
533
560
|
'UNEXPECTED_TOOL_CALL', // Model called an undeclared tool
|
|
534
561
|
]
|
|
535
562
|
|
|
536
|
-
function geminiMaxTokens(option: StatelessExecutionOptions) {
|
|
537
|
-
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
538
|
-
if (model_options?.max_tokens) {
|
|
539
|
-
return model_options.max_tokens;
|
|
540
|
-
}
|
|
541
|
-
if (option.model.includes("gemini-2.5")) {
|
|
542
|
-
return getMaxTokensLimitVertexAi(option.model);
|
|
543
|
-
}
|
|
544
|
-
return undefined;
|
|
545
|
-
}
|
|
546
563
|
|
|
547
564
|
function geminiThinkingBudget(option: StatelessExecutionOptions) {
|
|
548
565
|
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
566
|
+
// If thinking_budget_tokens is explicitly set in model options, use it directly
|
|
549
567
|
if (model_options?.thinking_budget_tokens) {
|
|
550
568
|
return model_options.thinking_budget_tokens;
|
|
551
569
|
}
|
|
552
570
|
// Set minimum thinking level by default.
|
|
553
571
|
// Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
554
|
-
if (option.model
|
|
572
|
+
if (getGeminiModelVersion(option.model) == '2.5') {
|
|
555
573
|
if (option.model.includes("pro")) {
|
|
556
574
|
return 128;
|
|
557
575
|
}
|
|
@@ -562,16 +580,32 @@ function geminiThinkingBudget(option: StatelessExecutionOptions) {
|
|
|
562
580
|
|
|
563
581
|
function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig | undefined {
|
|
564
582
|
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
583
|
+
|
|
584
|
+
// If thinking options are explicitly set in model options, use them directly
|
|
565
585
|
const include_thoughts = model_options?.include_thoughts ?? false;
|
|
566
|
-
if (model_options?.thinking_budget_tokens) {
|
|
567
|
-
return {
|
|
586
|
+
if (model_options?.thinking_budget_tokens || model_options?.thinking_level) {
|
|
587
|
+
return {
|
|
588
|
+
includeThoughts: include_thoughts,
|
|
589
|
+
thinkingBudget: model_options.thinking_budget_tokens,
|
|
590
|
+
thinkingLevel: model_options.thinking_level,
|
|
591
|
+
};
|
|
568
592
|
}
|
|
569
593
|
|
|
570
|
-
// Set
|
|
594
|
+
// Set a low thinking level by default.
|
|
571
595
|
// Docs: https://ai.google.dev/gemini-api/docs/thinking#set-budget
|
|
572
|
-
|
|
596
|
+
// https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thinking
|
|
597
|
+
if (isGeminiModelVersionGte(option.model, '3.0')) {
|
|
598
|
+
return {
|
|
599
|
+
includeThoughts: include_thoughts,
|
|
600
|
+
thinkingLevel: ThinkingLevel.LOW
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
if (isGeminiModelVersionGte(option.model, '2.5')) {
|
|
573
604
|
const thinking_budget_tokens = geminiThinkingBudget(option) ?? 0;
|
|
574
|
-
return {
|
|
605
|
+
return {
|
|
606
|
+
includeThoughts: include_thoughts,
|
|
607
|
+
thinkingBudget: thinking_budget_tokens
|
|
608
|
+
};
|
|
575
609
|
}
|
|
576
610
|
}
|
|
577
611
|
|
|
@@ -667,7 +701,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
667
701
|
// File content handling
|
|
668
702
|
if (msg.files) {
|
|
669
703
|
for (const f of msg.files) {
|
|
670
|
-
|
|
704
|
+
const fileUrl = await f.getURL();
|
|
671
705
|
const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/');
|
|
672
706
|
|
|
673
707
|
if (isGsUrl) {
|
|
@@ -680,7 +714,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
680
714
|
} else {
|
|
681
715
|
// Inline data handling
|
|
682
716
|
const stream = await f.getStream();
|
|
683
|
-
const data = await readStreamAsBase64(stream);
|
|
717
|
+
const data = await readStreamAsBase64(stream);
|
|
684
718
|
parts.push({
|
|
685
719
|
inlineData: {
|
|
686
720
|
data,
|
|
@@ -776,6 +810,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
776
810
|
const modelName = splits[splits.length - 1];
|
|
777
811
|
options = { ...options, model: modelName };
|
|
778
812
|
|
|
813
|
+
// Restore system instruction from stored conversation on resume.
|
|
814
|
+
// The stored _llumiverse_system contains the complete system (interaction prompt + schema)
|
|
815
|
+
// from the initial call. Always prefer it over the prompt's system, which on resume only
|
|
816
|
+
// contains the schema instruction (no interaction system segments are present on resume).
|
|
817
|
+
const existingSystem = extractSystemFromConversation(options.conversation);
|
|
818
|
+
if (existingSystem) {
|
|
819
|
+
prompt.system = existingSystem;
|
|
820
|
+
}
|
|
821
|
+
|
|
779
822
|
let conversation = updateConversation(options.conversation, prompt.contents);
|
|
780
823
|
prompt.contents = conversation;
|
|
781
824
|
|
|
@@ -822,9 +865,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
822
865
|
|
|
823
866
|
// We clean the content before validation, so we can update the conversation.
|
|
824
867
|
const cleanedContent = cleanEmptyFieldsContent(content, options.result_schema);
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
result = [...textResults, ...imageResults];
|
|
868
|
+
// Collect all parts in order (text and images)
|
|
869
|
+
result = extractCompletionResults(cleanedContent);
|
|
828
870
|
conversation = updateConversation(conversation, [cleanedContent]);
|
|
829
871
|
}
|
|
830
872
|
}
|
|
@@ -850,12 +892,21 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
850
892
|
// Truncate large text content if configured
|
|
851
893
|
processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
|
|
852
894
|
|
|
895
|
+
// Strip old heartbeat status messages
|
|
896
|
+
processedConversation = stripHeartbeatsFromConversation(processedConversation, {
|
|
897
|
+
keepForTurns: options.stripHeartbeatsAfterTurns ?? 1,
|
|
898
|
+
currentTurn,
|
|
899
|
+
});
|
|
900
|
+
|
|
901
|
+
// Preserve system instruction in conversation for multi-turn support
|
|
902
|
+
const finalConversation = storeSystemInConversation(processedConversation, prompt.system);
|
|
903
|
+
|
|
853
904
|
return {
|
|
854
905
|
result: result && result.length > 0 ? result : [{ type: "text" as const, value: '' }],
|
|
855
906
|
token_usage: token_usage,
|
|
856
907
|
finish_reason: finish_reason,
|
|
857
908
|
original_response: options.include_original_response ? response : undefined,
|
|
858
|
-
conversation:
|
|
909
|
+
conversation: finalConversation,
|
|
859
910
|
tool_use
|
|
860
911
|
} satisfies Completion;
|
|
861
912
|
}
|
|
@@ -869,6 +920,15 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
869
920
|
const modelName = splits[splits.length - 1];
|
|
870
921
|
options = { ...options, model: modelName };
|
|
871
922
|
|
|
923
|
+
// Restore system instruction from stored conversation on resume.
|
|
924
|
+
// The stored _llumiverse_system contains the complete system (interaction prompt + schema)
|
|
925
|
+
// from the initial call. Always prefer it over the prompt's system, which on resume only
|
|
926
|
+
// contains the schema instruction (no interaction system segments are present on resume).
|
|
927
|
+
const existingSystem = extractSystemFromConversation(options.conversation);
|
|
928
|
+
if (existingSystem) {
|
|
929
|
+
prompt.system = existingSystem;
|
|
930
|
+
}
|
|
931
|
+
|
|
872
932
|
// Include conversation history in prompt contents (same as non-streaming)
|
|
873
933
|
const conversation = updateConversation(options.conversation, prompt.contents);
|
|
874
934
|
prompt.contents = conversation;
|
|
@@ -901,9 +961,8 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
901
961
|
+ `content: ${JSON.stringify(candidate.content, null, 2)}, safety: ${JSON.stringify(candidate.safetyRatings, null, 2)}`);
|
|
902
962
|
}
|
|
903
963
|
if (candidate.content?.role === 'model') {
|
|
904
|
-
|
|
905
|
-
const
|
|
906
|
-
const combinedResults = [...textResults, ...imageResults];
|
|
964
|
+
// Collect all parts in order (text and images)
|
|
965
|
+
const combinedResults = extractCompletionResults(candidate.content);
|
|
907
966
|
tool_use = collectToolUseParts(candidate.content);
|
|
908
967
|
if (tool_use) {
|
|
909
968
|
finish_reason = "tool_use";
|
|
@@ -933,9 +992,176 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
933
992
|
return stream;
|
|
934
993
|
}
|
|
935
994
|
|
|
995
|
+
/**
|
|
996
|
+
* Format Google API errors into LlumiverseError with proper status codes and retryability.
|
|
997
|
+
*
|
|
998
|
+
* Google API errors follow AIP-193 standard:
|
|
999
|
+
* - ApiError.status: HTTP status code
|
|
1000
|
+
* - ApiError.message: Error message
|
|
1001
|
+
*
|
|
1002
|
+
* Common error codes:
|
|
1003
|
+
* - 400 (INVALID_ARGUMENT): Invalid request parameters
|
|
1004
|
+
* - 401 (UNAUTHENTICATED): Authentication required
|
|
1005
|
+
* - 403 (PERMISSION_DENIED): Insufficient permissions
|
|
1006
|
+
* - 404 (NOT_FOUND): Resource not found
|
|
1007
|
+
* - 429 (RESOURCE_EXHAUSTED): Rate limit/quota exceeded
|
|
1008
|
+
* - 500 (INTERNAL): Internal server error
|
|
1009
|
+
* - 503 (UNAVAILABLE): Service temporarily unavailable
|
|
1010
|
+
* - 504 (DEADLINE_EXCEEDED): Request timeout
|
|
1011
|
+
*
|
|
1012
|
+
* @see https://google.aip.dev/193
|
|
1013
|
+
* @see https://docs.cloud.google.com/vertex-ai/generative-ai/docs/model-reference/api-errors
|
|
1014
|
+
*/
|
|
1015
|
+
formatLlumiverseError(
|
|
1016
|
+
_driver: VertexAIDriver,
|
|
1017
|
+
error: unknown,
|
|
1018
|
+
context: LlumiverseErrorContext
|
|
1019
|
+
): LlumiverseError {
|
|
1020
|
+
// Check if it's a Google API error with status code
|
|
1021
|
+
const isApiError = this.isGoogleApiError(error);
|
|
1022
|
+
|
|
1023
|
+
if (!isApiError) {
|
|
1024
|
+
// Not a Google API error, use default handling
|
|
1025
|
+
// This will be called by the driver's default formatLlumiverseError
|
|
1026
|
+
throw error;
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
const apiError = error as ApiError;
|
|
1030
|
+
const httpStatusCode = apiError.status;
|
|
1031
|
+
|
|
1032
|
+
// Extract error message
|
|
1033
|
+
const message = apiError.message || String(error);
|
|
1034
|
+
|
|
1035
|
+
// Build user-facing message with status code
|
|
1036
|
+
let userMessage = message;
|
|
1037
|
+
|
|
1038
|
+
// Include status code in message (for end-user visibility)
|
|
1039
|
+
if (httpStatusCode) {
|
|
1040
|
+
userMessage = `[${httpStatusCode}] ${userMessage}`;
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
// Determine retryability based on Google error codes
|
|
1044
|
+
const retryable = this.isGeminiErrorRetryable(httpStatusCode);
|
|
1045
|
+
|
|
1046
|
+
// Extract error name/type from message if present
|
|
1047
|
+
const errorName = this.extractErrorName(message);
|
|
1048
|
+
|
|
1049
|
+
return new LlumiverseError(
|
|
1050
|
+
`[${context.provider}] ${userMessage}`,
|
|
1051
|
+
retryable,
|
|
1052
|
+
context,
|
|
1053
|
+
error,
|
|
1054
|
+
httpStatusCode,
|
|
1055
|
+
errorName
|
|
1056
|
+
);
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
/**
|
|
1060
|
+
* Type guard to check if error is a Google API error.
|
|
1061
|
+
*/
|
|
1062
|
+
private isGoogleApiError(error: unknown): error is ApiError {
|
|
1063
|
+
return (
|
|
1064
|
+
error !== null &&
|
|
1065
|
+
typeof error === 'object' &&
|
|
1066
|
+
'status' in error &&
|
|
1067
|
+
typeof (error as any).status === 'number' &&
|
|
1068
|
+
'message' in error
|
|
1069
|
+
);
|
|
1070
|
+
}
|
|
1071
|
+
|
|
1072
|
+
/**
|
|
1073
|
+
* Determine if a Google API error is retryable based on HTTP status code.
|
|
1074
|
+
*
|
|
1075
|
+
* Retryable errors (per Google AIP-194):
|
|
1076
|
+
* - 408 (REQUEST_TIMEOUT): Request timeout
|
|
1077
|
+
* - 429 (RESOURCE_EXHAUSTED): Rate limit exceeded, quota exhausted
|
|
1078
|
+
* - 500 (INTERNAL): Internal server error
|
|
1079
|
+
* - 502 (BAD_GATEWAY): Bad gateway
|
|
1080
|
+
* - 503 (UNAVAILABLE): Service temporarily unavailable
|
|
1081
|
+
* - 504 (DEADLINE_EXCEEDED): Gateway timeout
|
|
1082
|
+
*
|
|
1083
|
+
* Non-retryable errors:
|
|
1084
|
+
* - 400 (INVALID_ARGUMENT): Invalid request parameters
|
|
1085
|
+
* - 401 (UNAUTHENTICATED): Authentication required
|
|
1086
|
+
* - 403 (PERMISSION_DENIED): Insufficient permissions
|
|
1087
|
+
* - 404 (NOT_FOUND): Resource not found
|
|
1088
|
+
* - 409 (CONFLICT): Resource conflict
|
|
1089
|
+
* - Other 4xx client errors
|
|
1090
|
+
*
|
|
1091
|
+
* @param httpStatusCode - The HTTP status code from the API error
|
|
1092
|
+
* @returns True if retryable, false if not retryable, undefined if unknown
|
|
1093
|
+
*/
|
|
1094
|
+
private isGeminiErrorRetryable(httpStatusCode: number): boolean | undefined {
|
|
1095
|
+
// Retryable status codes
|
|
1096
|
+
if (httpStatusCode === 408) return true; // Request timeout
|
|
1097
|
+
if (httpStatusCode === 429) return true; // Rate limit/quota
|
|
1098
|
+
if (httpStatusCode === 502) return true; // Bad gateway
|
|
1099
|
+
if (httpStatusCode === 503) return true; // Service unavailable
|
|
1100
|
+
if (httpStatusCode === 504) return true; // Gateway timeout
|
|
1101
|
+
if (httpStatusCode >= 500 && httpStatusCode < 600) return true; // Other 5xx server errors
|
|
1102
|
+
|
|
1103
|
+
// Non-retryable 4xx client errors
|
|
1104
|
+
if (httpStatusCode >= 400 && httpStatusCode < 500) return false;
|
|
1105
|
+
|
|
1106
|
+
// Unknown status codes - let consumer decide retry strategy
|
|
1107
|
+
return undefined;
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
/**
|
|
1111
|
+
* Extract error type name from error message.
|
|
1112
|
+
* Google errors often include the error type in the message.
|
|
1113
|
+
* Examples: "INVALID_ARGUMENT", "RESOURCE_EXHAUSTED", "PERMISSION_DENIED"
|
|
1114
|
+
*/
|
|
1115
|
+
private extractErrorName(message: string): string | undefined {
|
|
1116
|
+
// Common Google error patterns
|
|
1117
|
+
const patterns = [
|
|
1118
|
+
/^([A-Z_]+):/, // "ERROR_NAME: message"
|
|
1119
|
+
/\[([A-Z_]+)\]/, // "[ERROR_NAME] message"
|
|
1120
|
+
/^(\w+Error):/, // "ErrorTypeError: message"
|
|
1121
|
+
];
|
|
1122
|
+
|
|
1123
|
+
for (const pattern of patterns) {
|
|
1124
|
+
const match = message.match(pattern);
|
|
1125
|
+
if (match) {
|
|
1126
|
+
return match[1];
|
|
1127
|
+
}
|
|
1128
|
+
}
|
|
1129
|
+
|
|
1130
|
+
return undefined;
|
|
1131
|
+
}
|
|
1132
|
+
|
|
936
1133
|
}
|
|
937
1134
|
|
|
938
1135
|
|
|
1136
|
+
/**
|
|
1137
|
+
* Converts functionCall and functionResponse parts to text parts in Gemini Content[].
|
|
1138
|
+
* Preserves tool call information while removing structured parts that require
|
|
1139
|
+
* tools/toolConfig to be defined in the API request.
|
|
1140
|
+
*/
|
|
1141
|
+
export function convertGeminiFunctionPartsToText(contents: Content[]): Content[] {
|
|
1142
|
+
return contents.map(content => {
|
|
1143
|
+
if (!content.parts) return content;
|
|
1144
|
+
const hasFunctionParts = content.parts.some(p => p.functionCall || p.functionResponse);
|
|
1145
|
+
if (!hasFunctionParts) return content;
|
|
1146
|
+
|
|
1147
|
+
const newParts = content.parts.map(part => {
|
|
1148
|
+
if (part.functionCall) {
|
|
1149
|
+
const argsStr = part.functionCall.args ? JSON.stringify(part.functionCall.args) : '';
|
|
1150
|
+
const truncated = argsStr.length > 500 ? argsStr.substring(0, 500) + '...' : argsStr;
|
|
1151
|
+
return { text: `[Tool call: ${part.functionCall.name}(${truncated})]` };
|
|
1152
|
+
}
|
|
1153
|
+
if (part.functionResponse) {
|
|
1154
|
+
const respStr = part.functionResponse.response
|
|
1155
|
+
? JSON.stringify(part.functionResponse.response) : 'No response';
|
|
1156
|
+
const truncated = respStr.length > 500 ? respStr.substring(0, 500) + '...' : respStr;
|
|
1157
|
+
return { text: `[Tool result for ${part.functionResponse.name}: ${truncated}]` };
|
|
1158
|
+
}
|
|
1159
|
+
return part;
|
|
1160
|
+
});
|
|
1161
|
+
return { ...content, parts: newParts };
|
|
1162
|
+
});
|
|
1163
|
+
}
|
|
1164
|
+
|
|
939
1165
|
function getToolDefinitions(tools: ToolDefinition[] | undefined | null): Tool | undefined {
|
|
940
1166
|
if (!tools || tools.length === 0) {
|
|
941
1167
|
return undefined;
|
|
@@ -979,6 +1205,36 @@ function updateConversation(conversation: unknown, prompt: Content[]): Content[]
|
|
|
979
1205
|
return convArray.concat(prompt);
|
|
980
1206
|
}
|
|
981
1207
|
|
|
1208
|
+
const SYSTEM_KEY = '_llumiverse_system';
|
|
1209
|
+
|
|
1210
|
+
/**
|
|
1211
|
+
* Extract the stored system instruction from a Gemini conversation object.
|
|
1212
|
+
* Returns undefined if no system was stored.
|
|
1213
|
+
*/
|
|
1214
|
+
function extractSystemFromConversation(conversation: unknown): Content | undefined {
|
|
1215
|
+
if (typeof conversation === 'object' && conversation !== null) {
|
|
1216
|
+
const c = conversation as Record<string, unknown>;
|
|
1217
|
+
if (c[SYSTEM_KEY] && typeof c[SYSTEM_KEY] === 'object') {
|
|
1218
|
+
return c[SYSTEM_KEY] as Content;
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
return undefined;
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
/**
|
|
1225
|
+
* Store the system instruction in the Gemini conversation wrapper object.
|
|
1226
|
+
* The conversation is already wrapped by incrementConversationTurn into
|
|
1227
|
+
* { _arrayConversation: Content[], _llumiverse_meta: {...} }.
|
|
1228
|
+
* We add _llumiverse_system alongside these fields.
|
|
1229
|
+
*/
|
|
1230
|
+
function storeSystemInConversation(conversation: unknown, system: Content | undefined): unknown {
|
|
1231
|
+
if (!system) return conversation;
|
|
1232
|
+
if (typeof conversation === 'object' && conversation !== null) {
|
|
1233
|
+
return { ...conversation as object, [SYSTEM_KEY]: system };
|
|
1234
|
+
}
|
|
1235
|
+
return conversation;
|
|
1236
|
+
}
|
|
1237
|
+
|
|
982
1238
|
/**
|
|
983
1239
|
*
|
|
984
1240
|
* Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string.
|
package/src/vertexai/models.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { AIModel, Completion,
|
|
1
|
+
import { AIModel, Completion, CompletionChunkObject, ExecutionOptions, LlumiverseError, LlumiverseErrorContext, PromptSegment } from "@llumiverse/core";
|
|
2
2
|
import { VertexAIDriver, trimModelName } from "./index.js";
|
|
3
|
-
import { GeminiModelDefinition } from "./models/gemini.js";
|
|
4
3
|
import { ClaudeModelDefinition } from "./models/claude.js";
|
|
4
|
+
import { GeminiModelDefinition } from "./models/gemini.js";
|
|
5
5
|
import { LLamaModelDefinition } from "./models/llama.js";
|
|
6
6
|
|
|
7
7
|
export interface ModelDefinition<PromptT = any> {
|
|
@@ -11,6 +11,11 @@ export interface ModelDefinition<PromptT = any> {
|
|
|
11
11
|
requestTextCompletion: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<Completion>;
|
|
12
12
|
requestTextCompletionStream: (driver: VertexAIDriver, prompt: PromptT, options: ExecutionOptions) => Promise<AsyncIterable<CompletionChunkObject>>;
|
|
13
13
|
preValidationProcessing?(result: Completion, options: ExecutionOptions): { result: Completion, options: ExecutionOptions };
|
|
14
|
+
/**
|
|
15
|
+
* Format provider-specific errors into standardized LlumiverseError.
|
|
16
|
+
* Optional - if not provided, VertexAIDriver will use default error handling.
|
|
17
|
+
*/
|
|
18
|
+
formatLlumiverseError?(driver: VertexAIDriver, error: unknown, context: LlumiverseErrorContext): LlumiverseError;
|
|
14
19
|
}
|
|
15
20
|
|
|
16
21
|
export function getModelDefinition(model: string): ModelDefinition {
|