@llumiverse/drivers 0.23.0 → 0.24.0-dev.202601221707
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +141 -218
- package/lib/cjs/azure/azure_foundry.js +46 -2
- package/lib/cjs/azure/azure_foundry.js.map +1 -1
- package/lib/cjs/bedrock/index.js +236 -16
- package/lib/cjs/bedrock/index.js.map +1 -1
- package/lib/cjs/groq/index.js +115 -85
- package/lib/cjs/groq/index.js.map +1 -1
- package/lib/cjs/index.js +1 -0
- package/lib/cjs/index.js.map +1 -1
- package/lib/cjs/openai/index.js +310 -114
- package/lib/cjs/openai/index.js.map +1 -1
- package/lib/cjs/openai/openai_compatible.js +62 -0
- package/lib/cjs/openai/openai_compatible.js.map +1 -0
- package/lib/cjs/openai/openai_format.js +32 -39
- package/lib/cjs/openai/openai_format.js.map +1 -1
- package/lib/cjs/vertexai/index.js +165 -0
- package/lib/cjs/vertexai/index.js.map +1 -1
- package/lib/cjs/vertexai/models/claude.js +201 -3
- package/lib/cjs/vertexai/models/claude.js.map +1 -1
- package/lib/cjs/vertexai/models/gemini.js +59 -20
- package/lib/cjs/vertexai/models/gemini.js.map +1 -1
- package/lib/cjs/xai/index.js +10 -16
- package/lib/cjs/xai/index.js.map +1 -1
- package/lib/esm/azure/azure_foundry.js +46 -2
- package/lib/esm/azure/azure_foundry.js.map +1 -1
- package/lib/esm/bedrock/index.js +236 -17
- package/lib/esm/bedrock/index.js.map +1 -1
- package/lib/esm/groq/index.js +115 -85
- package/lib/esm/groq/index.js.map +1 -1
- package/lib/esm/index.js +1 -0
- package/lib/esm/index.js.map +1 -1
- package/lib/esm/openai/index.js +311 -115
- package/lib/esm/openai/index.js.map +1 -1
- package/lib/esm/openai/openai_compatible.js +55 -0
- package/lib/esm/openai/openai_compatible.js.map +1 -0
- package/lib/esm/openai/openai_format.js +32 -39
- package/lib/esm/openai/openai_format.js.map +1 -1
- package/lib/esm/vertexai/index.js +166 -1
- package/lib/esm/vertexai/index.js.map +1 -1
- package/lib/esm/vertexai/models/claude.js +199 -3
- package/lib/esm/vertexai/models/claude.js.map +1 -1
- package/lib/esm/vertexai/models/gemini.js +60 -21
- package/lib/esm/vertexai/models/gemini.js.map +1 -1
- package/lib/esm/xai/index.js +10 -16
- package/lib/esm/xai/index.js.map +1 -1
- package/lib/types/azure/azure_foundry.d.ts +7 -5
- package/lib/types/azure/azure_foundry.d.ts.map +1 -1
- package/lib/types/bedrock/index.d.ts +21 -1
- package/lib/types/bedrock/index.d.ts.map +1 -1
- package/lib/types/groq/index.d.ts.map +1 -1
- package/lib/types/index.d.ts +1 -0
- package/lib/types/index.d.ts.map +1 -1
- package/lib/types/openai/index.d.ts +13 -7
- package/lib/types/openai/index.d.ts.map +1 -1
- package/lib/types/openai/openai_compatible.d.ts +26 -0
- package/lib/types/openai/openai_compatible.d.ts.map +1 -0
- package/lib/types/openai/openai_format.d.ts +4 -2
- package/lib/types/openai/openai_format.d.ts.map +1 -1
- package/lib/types/vertexai/index.d.ts +15 -0
- package/lib/types/vertexai/index.d.ts.map +1 -1
- package/lib/types/vertexai/models/claude.d.ts +20 -0
- package/lib/types/vertexai/models/claude.d.ts.map +1 -1
- package/lib/types/vertexai/models/gemini.d.ts +1 -1
- package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
- package/lib/types/xai/index.d.ts +2 -3
- package/lib/types/xai/index.d.ts.map +1 -1
- package/package.json +12 -12
- package/src/azure/azure_foundry.ts +56 -7
- package/src/bedrock/index.ts +297 -26
- package/src/groq/index.ts +120 -94
- package/src/index.ts +1 -0
- package/src/openai/index.ts +363 -136
- package/src/openai/openai_compatible.ts +74 -0
- package/src/openai/openai_format.ts +44 -54
- package/src/vertexai/index.ts +205 -0
- package/src/vertexai/models/claude.ts +233 -3
- package/src/vertexai/models/gemini.ts +78 -27
- package/src/xai/index.ts +10 -17
|
@@ -321,11 +321,17 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
321
321
|
driver.logger.warn({ options: options.model_options }, "Invalid model options");
|
|
322
322
|
}
|
|
323
323
|
|
|
324
|
-
|
|
324
|
+
// Include conversation history (same as non-streaming)
|
|
325
|
+
const conversation = updateConversation(options.conversation as ClaudePrompt, prompt);
|
|
326
|
+
|
|
327
|
+
const { payload, requestOptions } = getClaudePayload(options, conversation);
|
|
325
328
|
const streamingPayload: MessageStreamParams = { ...payload, stream: true };
|
|
326
329
|
|
|
327
330
|
const response_stream = await client.messages.stream(streamingPayload, requestOptions);
|
|
328
331
|
|
|
332
|
+
// Track current tool use being built from streaming
|
|
333
|
+
let currentToolUse: { id: string; name: string; inputJson: string } | null = null;
|
|
334
|
+
|
|
329
335
|
const stream = asyncMap(response_stream, async (streamEvent: RawMessageStreamEvent) => {
|
|
330
336
|
switch (streamEvent.type) {
|
|
331
337
|
case "message_start":
|
|
@@ -345,6 +351,22 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
345
351
|
finish_reason: claudeFinishReason(streamEvent.delta.stop_reason ?? undefined),
|
|
346
352
|
} satisfies CompletionChunkObject;
|
|
347
353
|
case "content_block_start":
|
|
354
|
+
// Handle tool_use blocks
|
|
355
|
+
if (streamEvent.content_block.type === "tool_use") {
|
|
356
|
+
currentToolUse = {
|
|
357
|
+
id: streamEvent.content_block.id,
|
|
358
|
+
name: streamEvent.content_block.name,
|
|
359
|
+
inputJson: ''
|
|
360
|
+
};
|
|
361
|
+
return {
|
|
362
|
+
result: [],
|
|
363
|
+
tool_use: [{
|
|
364
|
+
id: streamEvent.content_block.id,
|
|
365
|
+
tool_name: streamEvent.content_block.name,
|
|
366
|
+
tool_input: '' as any // Will be accumulated via input_json_delta
|
|
367
|
+
}]
|
|
368
|
+
} satisfies CompletionChunkObject;
|
|
369
|
+
}
|
|
348
370
|
// Handle redacted thinking blocks
|
|
349
371
|
if (streamEvent.content_block.type === "redacted_thinking" && model_options?.include_thoughts) {
|
|
350
372
|
return {
|
|
@@ -359,6 +381,19 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
359
381
|
return {
|
|
360
382
|
result: streamEvent.delta.text ? [{ type: "text", value: streamEvent.delta.text }] : []
|
|
361
383
|
} satisfies CompletionChunkObject;
|
|
384
|
+
case "input_json_delta":
|
|
385
|
+
// Accumulate tool input JSON
|
|
386
|
+
if (currentToolUse && streamEvent.delta.partial_json) {
|
|
387
|
+
return {
|
|
388
|
+
result: [],
|
|
389
|
+
tool_use: [{
|
|
390
|
+
id: currentToolUse.id,
|
|
391
|
+
tool_name: '', // Name already sent in content_block_start
|
|
392
|
+
tool_input: streamEvent.delta.partial_json as any
|
|
393
|
+
}]
|
|
394
|
+
} satisfies CompletionChunkObject;
|
|
395
|
+
}
|
|
396
|
+
break;
|
|
362
397
|
case "thinking_delta":
|
|
363
398
|
if (model_options?.include_thoughts) {
|
|
364
399
|
return {
|
|
@@ -377,6 +412,10 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
377
412
|
}
|
|
378
413
|
break;
|
|
379
414
|
case "content_block_stop":
|
|
415
|
+
// Reset current tool use tracking when block ends
|
|
416
|
+
if (currentToolUse) {
|
|
417
|
+
currentToolUse = null;
|
|
418
|
+
}
|
|
380
419
|
// Handle the end of content blocks, for redacted thinking blocks
|
|
381
420
|
if (model_options?.include_thoughts) {
|
|
382
421
|
return {
|
|
@@ -406,6 +445,60 @@ function createPromptFromResponse(response: Message): ClaudePrompt {
|
|
|
406
445
|
}
|
|
407
446
|
}
|
|
408
447
|
|
|
448
|
+
/**
|
|
449
|
+
* Merge consecutive user messages in the conversation.
|
|
450
|
+
* This is required because Anthropic's API expects all tool_result blocks
|
|
451
|
+
* from a single assistant turn to be in one user message.
|
|
452
|
+
* When multiple tool results are added as separate user messages,
|
|
453
|
+
* we need to merge them before sending to the API.
|
|
454
|
+
*/
|
|
455
|
+
export function mergeConsecutiveUserMessages(messages: MessageParam[]): MessageParam[] {
|
|
456
|
+
if (messages.length === 0) return [];
|
|
457
|
+
|
|
458
|
+
// Check if any merging is needed
|
|
459
|
+
const needsMerging = messages.some((msg, i) =>
|
|
460
|
+
i < messages.length - 1 &&
|
|
461
|
+
msg.role === 'user' &&
|
|
462
|
+
messages[i + 1].role === 'user'
|
|
463
|
+
);
|
|
464
|
+
|
|
465
|
+
if (!needsMerging) {
|
|
466
|
+
return messages;
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const result: MessageParam[] = [];
|
|
470
|
+
let i = 0;
|
|
471
|
+
|
|
472
|
+
while (i < messages.length) {
|
|
473
|
+
const current = messages[i];
|
|
474
|
+
|
|
475
|
+
if (current.role === 'user') {
|
|
476
|
+
// Collect all consecutive user messages
|
|
477
|
+
const mergedContent: MessageParam['content'] = [];
|
|
478
|
+
|
|
479
|
+
while (i < messages.length && messages[i].role === 'user') {
|
|
480
|
+
const userMsg = messages[i];
|
|
481
|
+
if (Array.isArray(userMsg.content)) {
|
|
482
|
+
mergedContent.push(...userMsg.content);
|
|
483
|
+
} else if (typeof userMsg.content === 'string') {
|
|
484
|
+
mergedContent.push({ type: 'text', text: userMsg.content });
|
|
485
|
+
}
|
|
486
|
+
i++;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
result.push({
|
|
490
|
+
role: 'user',
|
|
491
|
+
content: mergedContent
|
|
492
|
+
});
|
|
493
|
+
} else {
|
|
494
|
+
result.push(current);
|
|
495
|
+
i++;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
return result;
|
|
500
|
+
}
|
|
501
|
+
|
|
409
502
|
/**
|
|
410
503
|
* Update the conversation messages
|
|
411
504
|
* @param prompt
|
|
@@ -416,11 +509,143 @@ function updateConversation(conversation: ClaudePrompt | undefined | null, promp
|
|
|
416
509
|
const baseSystemMessages = conversation?.system || [];
|
|
417
510
|
const baseMessages = conversation?.messages || [];
|
|
418
511
|
const system = baseSystemMessages.concat(prompt.system || []);
|
|
512
|
+
// Merge consecutive user messages to ensure tool_result blocks are properly grouped
|
|
513
|
+
const mergedMessages = mergeConsecutiveUserMessages(baseMessages.concat(prompt.messages || []));
|
|
419
514
|
return {
|
|
420
|
-
messages:
|
|
515
|
+
messages: mergedMessages,
|
|
421
516
|
system: system.length > 0 ? system : undefined // If system is empty, set to undefined
|
|
422
517
|
};
|
|
423
518
|
}
|
|
519
|
+
|
|
520
|
+
/**
|
|
521
|
+
* Sanitize messages by removing empty text blocks.
|
|
522
|
+
* Claude API rejects messages with empty text content blocks ("text content blocks must be non-empty").
|
|
523
|
+
* This handles cases where streaming was interrupted and left empty text blocks.
|
|
524
|
+
*
|
|
525
|
+
* - Filters out empty text blocks from each message's content
|
|
526
|
+
* - Removes messages entirely if they have no content after filtering
|
|
527
|
+
*/
|
|
528
|
+
function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
|
|
529
|
+
const result: MessageParam[] = [];
|
|
530
|
+
|
|
531
|
+
for (const message of messages) {
|
|
532
|
+
if (typeof message.content === 'string') {
|
|
533
|
+
// String content - keep only if non-empty
|
|
534
|
+
if (message.content.trim()) {
|
|
535
|
+
result.push(message);
|
|
536
|
+
}
|
|
537
|
+
continue;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// Array content - filter out empty text blocks
|
|
541
|
+
const filteredContent = message.content.filter(block => {
|
|
542
|
+
if (block.type === 'text') {
|
|
543
|
+
return block.text && block.text.trim().length > 0;
|
|
544
|
+
}
|
|
545
|
+
// Keep all non-text blocks (tool_use, tool_result, image, etc.)
|
|
546
|
+
return true;
|
|
547
|
+
});
|
|
548
|
+
|
|
549
|
+
// Only include message if it has content after filtering
|
|
550
|
+
if (filteredContent.length > 0) {
|
|
551
|
+
result.push({
|
|
552
|
+
...message,
|
|
553
|
+
content: filteredContent
|
|
554
|
+
});
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
return result;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
/**
|
|
562
|
+
* Fix orphaned tool_use blocks in the conversation.
|
|
563
|
+
* @exported for testing
|
|
564
|
+
*
|
|
565
|
+
* When an agent is stopped mid-tool-execution, the assistant message contains tool_use blocks
|
|
566
|
+
* but no corresponding tool_result was added. The Anthropic API requires that every tool_use
|
|
567
|
+
* must be followed by a tool_result in the next user message.
|
|
568
|
+
*
|
|
569
|
+
* This function detects such cases and injects synthetic tool_result blocks indicating
|
|
570
|
+
* the tools were interrupted, allowing the conversation to continue.
|
|
571
|
+
*/
|
|
572
|
+
export function fixOrphanedToolUse(messages: MessageParam[]): MessageParam[] {
|
|
573
|
+
if (messages.length < 2) return messages;
|
|
574
|
+
|
|
575
|
+
const result: MessageParam[] = [];
|
|
576
|
+
|
|
577
|
+
for (let i = 0; i < messages.length; i++) {
|
|
578
|
+
const current = messages[i];
|
|
579
|
+
result.push(current);
|
|
580
|
+
|
|
581
|
+
// Check if this is an assistant message with tool_use blocks
|
|
582
|
+
if (current.role === 'assistant' && Array.isArray(current.content)) {
|
|
583
|
+
const toolUseBlocks = current.content.filter(
|
|
584
|
+
(block): block is ContentBlockParam & { type: 'tool_use'; id: string; name: string } =>
|
|
585
|
+
block.type === 'tool_use'
|
|
586
|
+
);
|
|
587
|
+
|
|
588
|
+
if (toolUseBlocks.length > 0) {
|
|
589
|
+
// Check if the next message is a user message with matching tool_results
|
|
590
|
+
const nextMessage = messages[i + 1];
|
|
591
|
+
|
|
592
|
+
if (nextMessage && nextMessage.role === 'user' && Array.isArray(nextMessage.content)) {
|
|
593
|
+
// Get tool_result IDs from the next message
|
|
594
|
+
const toolResultIds = new Set(
|
|
595
|
+
nextMessage.content
|
|
596
|
+
.filter((block): block is ToolResultBlockParam => block.type === 'tool_result')
|
|
597
|
+
.map(block => block.tool_use_id)
|
|
598
|
+
);
|
|
599
|
+
|
|
600
|
+
// Find orphaned tool_use blocks (no matching tool_result)
|
|
601
|
+
const orphanedToolUse = toolUseBlocks.filter(block => !toolResultIds.has(block.id));
|
|
602
|
+
|
|
603
|
+
if (orphanedToolUse.length > 0) {
|
|
604
|
+
// Inject synthetic tool_results for orphaned tool_use
|
|
605
|
+
const syntheticResults: ToolResultBlockParam[] = orphanedToolUse.map(block => ({
|
|
606
|
+
type: 'tool_result',
|
|
607
|
+
tool_use_id: block.id,
|
|
608
|
+
content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
|
|
609
|
+
}));
|
|
610
|
+
|
|
611
|
+
// Prepend synthetic results to the next user message
|
|
612
|
+
const updatedNextMessage: MessageParam = {
|
|
613
|
+
...nextMessage,
|
|
614
|
+
content: [...syntheticResults, ...nextMessage.content]
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
// Replace the next message in our iteration
|
|
618
|
+
messages[i + 1] = updatedNextMessage;
|
|
619
|
+
}
|
|
620
|
+
} else if (nextMessage && nextMessage.role === 'user') {
|
|
621
|
+
// Next message is a user message but not array content (plain text)
|
|
622
|
+
// We need to convert it and add tool_results
|
|
623
|
+
const syntheticResults: ToolResultBlockParam[] = toolUseBlocks.map(block => ({
|
|
624
|
+
type: 'tool_result',
|
|
625
|
+
tool_use_id: block.id,
|
|
626
|
+
content: `[Tool interrupted: The user stopped the operation before "${block.name}" could execute.]`
|
|
627
|
+
}));
|
|
628
|
+
|
|
629
|
+
const textContent: TextBlockParam = typeof nextMessage.content === 'string'
|
|
630
|
+
? { type: 'text', text: nextMessage.content }
|
|
631
|
+
: { type: 'text', text: '' };
|
|
632
|
+
|
|
633
|
+
const updatedNextMessage: MessageParam = {
|
|
634
|
+
role: 'user',
|
|
635
|
+
content: [...syntheticResults, textContent]
|
|
636
|
+
};
|
|
637
|
+
|
|
638
|
+
messages[i + 1] = updatedNextMessage;
|
|
639
|
+
}
|
|
640
|
+
// Note: If there's no nextMessage, we leave the conversation as-is.
|
|
641
|
+
// The tool_use blocks are expected to be there - the next turn will provide tool_results.
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
return result;
|
|
647
|
+
}
|
|
648
|
+
|
|
424
649
|
interface RequestOptions {
|
|
425
650
|
headers?: Record<string, string>;
|
|
426
651
|
}
|
|
@@ -440,8 +665,13 @@ function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { pa
|
|
|
440
665
|
};
|
|
441
666
|
}
|
|
442
667
|
|
|
668
|
+
// Fix orphaned tool_use blocks (can occur when agent is stopped mid-tool-execution)
|
|
669
|
+
const fixedMessages = fixOrphanedToolUse(prompt.messages);
|
|
670
|
+
// Sanitize messages to remove empty text blocks (can occur from interrupted streaming)
|
|
671
|
+
const sanitizedMessages = sanitizeMessages(fixedMessages);
|
|
672
|
+
|
|
443
673
|
const payload = {
|
|
444
|
-
messages:
|
|
674
|
+
messages: sanitizedMessages,
|
|
445
675
|
system: prompt.system,
|
|
446
676
|
tools: options.tools, // we are using the same shape as claude for tools
|
|
447
677
|
temperature: model_options?.temperature,
|
|
@@ -5,12 +5,20 @@ import {
|
|
|
5
5
|
} from "@google/genai";
|
|
6
6
|
import {
|
|
7
7
|
AIModel, Completion, CompletionChunkObject, CompletionResult, ExecutionOptions,
|
|
8
|
-
ExecutionTokenUsage,
|
|
9
|
-
|
|
8
|
+
ExecutionTokenUsage,
|
|
9
|
+
getConversationMeta,
|
|
10
|
+
getMaxTokensLimitVertexAi,
|
|
11
|
+
incrementConversationTurn,
|
|
12
|
+
JSONObject, JSONSchema, ModelType, PromptOptions, PromptRole,
|
|
13
|
+
PromptSegment, readStreamAsBase64, StatelessExecutionOptions,
|
|
14
|
+
stripBase64ImagesFromConversation,
|
|
15
|
+
ToolDefinition, ToolUse,
|
|
16
|
+
truncateLargeTextInConversation,
|
|
17
|
+
unwrapConversationArray,
|
|
10
18
|
VertexAIGeminiOptions
|
|
11
19
|
} from "@llumiverse/core";
|
|
12
20
|
import { asyncMap } from "@llumiverse/core/async";
|
|
13
|
-
import {
|
|
21
|
+
import { GenerateContentPrompt, VertexAIDriver } from "../index.js";
|
|
14
22
|
import { ModelDefinition } from "../models.js";
|
|
15
23
|
|
|
16
24
|
function supportsStructuredOutput(options: PromptOptions): boolean {
|
|
@@ -467,11 +475,17 @@ function collectToolUseParts(content: Content): ToolUse[] | undefined {
|
|
|
467
475
|
const parts = content.parts ?? [];
|
|
468
476
|
for (const part of parts) {
|
|
469
477
|
if (part.functionCall) {
|
|
470
|
-
|
|
478
|
+
const toolUse: ToolUse = {
|
|
471
479
|
id: part.functionCall.name ?? '',
|
|
472
480
|
tool_name: part.functionCall.name ?? '',
|
|
473
481
|
tool_input: part.functionCall.args as JSONObject,
|
|
474
|
-
}
|
|
482
|
+
};
|
|
483
|
+
// Capture thought_signature for Gemini thinking models (2.5+/3.0+)
|
|
484
|
+
// This must be passed back with the function response
|
|
485
|
+
if (part.thoughtSignature) {
|
|
486
|
+
toolUse.thought_signature = part.thoughtSignature;
|
|
487
|
+
}
|
|
488
|
+
out.push(toolUse);
|
|
475
489
|
}
|
|
476
490
|
}
|
|
477
491
|
return out.length > 0 ? out : undefined;
|
|
@@ -545,7 +559,7 @@ function geminiThinkingConfig(option: StatelessExecutionOptions): ThinkingConfig
|
|
|
545
559
|
const model_options = option.model_options as VertexAIGeminiOptions | undefined;
|
|
546
560
|
const include_thoughts = model_options?.include_thoughts ?? false;
|
|
547
561
|
if (model_options?.thinking_budget_tokens) {
|
|
548
|
-
return {includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens};
|
|
562
|
+
return { includeThoughts: include_thoughts, thinkingBudget: model_options.thinking_budget_tokens };
|
|
549
563
|
}
|
|
550
564
|
|
|
551
565
|
// Set minimum thinking level by default.
|
|
@@ -623,16 +637,18 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
623
637
|
if (!msg.tool_use_id) {
|
|
624
638
|
throw new Error("Tool response missing tool_use_id");
|
|
625
639
|
}
|
|
640
|
+
// Build functionResponse part with optional thought_signature for Gemini thinking models
|
|
641
|
+
const functionResponsePart: Part = {
|
|
642
|
+
functionResponse: {
|
|
643
|
+
name: msg.tool_use_id,
|
|
644
|
+
response: formatFunctionResponse(msg.content || ''),
|
|
645
|
+
},
|
|
646
|
+
// Include thought_signature if provided (required for Gemini 2.5+/3.0+ thinking models)
|
|
647
|
+
thoughtSignature: msg.thought_signature,
|
|
648
|
+
};
|
|
626
649
|
contents.push({
|
|
627
650
|
role: 'user',
|
|
628
|
-
parts: [
|
|
629
|
-
{
|
|
630
|
-
functionResponse: {
|
|
631
|
-
name: msg.tool_use_id,
|
|
632
|
-
response: formatFunctionResponse(msg.content || ''),
|
|
633
|
-
}
|
|
634
|
-
}
|
|
635
|
-
]
|
|
651
|
+
parts: [functionResponsePart]
|
|
636
652
|
});
|
|
637
653
|
} else { // PromptRole.user, PromptRole.assistant, PromptRole.safety
|
|
638
654
|
const parts: Part[] = [];
|
|
@@ -646,14 +662,27 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
646
662
|
// File content handling
|
|
647
663
|
if (msg.files) {
|
|
648
664
|
for (const f of msg.files) {
|
|
649
|
-
|
|
650
|
-
const
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
665
|
+
let fileUrl = await f.getURL();
|
|
666
|
+
const isGsUrl = fileUrl.startsWith('gs://') || fileUrl.startsWith('https://storage.googleapis.com/');
|
|
667
|
+
|
|
668
|
+
if (isGsUrl) {
|
|
669
|
+
parts.push({
|
|
670
|
+
fileData: {
|
|
671
|
+
fileUri: fileUrl,
|
|
672
|
+
mimeType: f.mime_type
|
|
673
|
+
}
|
|
674
|
+
});
|
|
675
|
+
} else {
|
|
676
|
+
// Inline data handling
|
|
677
|
+
const stream = await f.getStream();
|
|
678
|
+
const data = await readStreamAsBase64(stream);
|
|
679
|
+
parts.push({
|
|
680
|
+
inlineData: {
|
|
681
|
+
data,
|
|
682
|
+
mimeType: f.mime_type
|
|
683
|
+
}
|
|
684
|
+
});
|
|
685
|
+
}
|
|
657
686
|
}
|
|
658
687
|
}
|
|
659
688
|
|
|
@@ -742,7 +771,7 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
742
771
|
const modelName = splits[splits.length - 1];
|
|
743
772
|
options = { ...options, model: modelName };
|
|
744
773
|
|
|
745
|
-
let conversation = updateConversation(options.conversation
|
|
774
|
+
let conversation = updateConversation(options.conversation, prompt.contents);
|
|
746
775
|
prompt.contents = conversation;
|
|
747
776
|
|
|
748
777
|
// TODO: Remove hack, use global endpoint manually if needed.
|
|
@@ -792,12 +821,27 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
792
821
|
finish_reason = "tool_use";
|
|
793
822
|
}
|
|
794
823
|
|
|
824
|
+
// Increment turn counter for deferred stripping
|
|
825
|
+
conversation = incrementConversationTurn(conversation) as Content[];
|
|
826
|
+
|
|
827
|
+
// Strip large base64 image data based on options.stripImagesAfterTurns
|
|
828
|
+
const currentTurn = getConversationMeta(conversation).turnNumber;
|
|
829
|
+
const stripOptions = {
|
|
830
|
+
keepForTurns: options.stripImagesAfterTurns ?? Infinity,
|
|
831
|
+
currentTurn,
|
|
832
|
+
textMaxTokens: options.stripTextMaxTokens
|
|
833
|
+
};
|
|
834
|
+
let processedConversation = stripBase64ImagesFromConversation(conversation, stripOptions);
|
|
835
|
+
|
|
836
|
+
// Truncate large text content if configured
|
|
837
|
+
processedConversation = truncateLargeTextInConversation(processedConversation, stripOptions);
|
|
838
|
+
|
|
795
839
|
return {
|
|
796
840
|
result: result && result.length > 0 ? result : [{ type: "text" as const, value: '' }],
|
|
797
841
|
token_usage: token_usage,
|
|
798
842
|
finish_reason: finish_reason,
|
|
799
843
|
original_response: options.include_original_response ? response : undefined,
|
|
800
|
-
conversation,
|
|
844
|
+
conversation: processedConversation,
|
|
801
845
|
tool_use
|
|
802
846
|
} satisfies Completion;
|
|
803
847
|
}
|
|
@@ -811,6 +855,10 @@ export class GeminiModelDefinition implements ModelDefinition<GenerateContentPro
|
|
|
811
855
|
const modelName = splits[splits.length - 1];
|
|
812
856
|
options = { ...options, model: modelName };
|
|
813
857
|
|
|
858
|
+
// Include conversation history in prompt contents (same as non-streaming)
|
|
859
|
+
const conversation = updateConversation(options.conversation, prompt.contents);
|
|
860
|
+
prompt.contents = conversation;
|
|
861
|
+
|
|
814
862
|
if (options.model.includes("gemini-2.5-flash-image")) {
|
|
815
863
|
region = "global"; // Gemini Flash Image only available in global region, this is for nano-banana model
|
|
816
864
|
}
|
|
@@ -897,16 +945,19 @@ function getToolFunction(tool: ToolDefinition): FunctionDeclaration {
|
|
|
897
945
|
};
|
|
898
946
|
}
|
|
899
947
|
|
|
900
|
-
|
|
901
948
|
/**
|
|
902
949
|
* Update the conversation messages
|
|
903
950
|
* @param prompt
|
|
904
951
|
* @param response
|
|
905
952
|
* @returns
|
|
906
953
|
*/
|
|
907
|
-
function updateConversation(conversation:
|
|
908
|
-
|
|
954
|
+
function updateConversation(conversation: unknown, prompt: Content[]): Content[] {
|
|
955
|
+
// Unwrap array if wrapped, otherwise treat as array
|
|
956
|
+
const unwrapped = unwrapConversationArray<Content>(conversation);
|
|
957
|
+
const convArray = unwrapped ?? (conversation as Content[] || []);
|
|
958
|
+
return convArray.concat(prompt);
|
|
909
959
|
}
|
|
960
|
+
|
|
910
961
|
/**
|
|
911
962
|
*
|
|
912
963
|
* Gemini supports JSON output in the response. so we test if the response is a valid JSON object. otherwise we treat the response as a string.
|
package/src/xai/index.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AIModel,
|
|
1
|
+
import { AIModel, DriverOptions, PromptOptions, PromptSegment, Providers } from "@llumiverse/core";
|
|
2
2
|
import { formatOpenAILikeMultimodalPrompt, OpenAIPromptFormatterOptions } from "../openai/openai_format.js";
|
|
3
3
|
import { FetchClient } from "@vertesia/api-fetch-client";
|
|
4
4
|
import OpenAI from "openai";
|
|
@@ -15,7 +15,7 @@ export interface xAiDriverOptions extends DriverOptions {
|
|
|
15
15
|
export class xAIDriver extends BaseOpenAIDriver {
|
|
16
16
|
|
|
17
17
|
service: OpenAI;
|
|
18
|
-
provider
|
|
18
|
+
readonly provider = Providers.xai;
|
|
19
19
|
xai_service: FetchClient;
|
|
20
20
|
DEFAULT_ENDPOINT = "https://api.x.ai/v1";
|
|
21
21
|
|
|
@@ -31,7 +31,6 @@ export class xAIDriver extends BaseOpenAIDriver {
|
|
|
31
31
|
baseURL: opts.endpoint ?? this.DEFAULT_ENDPOINT,
|
|
32
32
|
});
|
|
33
33
|
this.xai_service = new FetchClient(opts.endpoint ?? this.DEFAULT_ENDPOINT).withAuthCallback(async () => `Bearer ${opts.apiKey}`);
|
|
34
|
-
this.provider = "xai";
|
|
35
34
|
//this.formatPrompt = this._formatPrompt; //TODO: fix xai prompt formatting
|
|
36
35
|
}
|
|
37
36
|
|
|
@@ -49,17 +48,9 @@ export class xAIDriver extends BaseOpenAIDriver {
|
|
|
49
48
|
|
|
50
49
|
}
|
|
51
50
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
finish_reason: result.choices[0].finish_reason,
|
|
56
|
-
token_usage: {
|
|
57
|
-
prompt: result.usage?.prompt_tokens,
|
|
58
|
-
result: result.usage?.completion_tokens,
|
|
59
|
-
total: result.usage?.total_tokens,
|
|
60
|
-
}
|
|
61
|
-
}
|
|
62
|
-
}
|
|
51
|
+
// Note: We intentionally do NOT override extractDataFromResponse here.
|
|
52
|
+
// The base class implementation properly handles tool_calls extraction.
|
|
53
|
+
// xAI's API is OpenAI-compatible and returns tool_calls in the same format.
|
|
63
54
|
|
|
64
55
|
async listModels(): Promise<AIModel[]> {
|
|
65
56
|
const [lm, em] = await Promise.all([
|
|
@@ -76,10 +67,12 @@ export class xAIDriver extends BaseOpenAIDriver {
|
|
|
76
67
|
return {
|
|
77
68
|
id: model.id,
|
|
78
69
|
provider: this.provider,
|
|
79
|
-
name: model.
|
|
80
|
-
description: model.
|
|
70
|
+
name: model.id,
|
|
71
|
+
description: `${model.id} by ${model.owned_by}`,
|
|
81
72
|
is_multimodal: model.input_modalities.length > 1,
|
|
82
|
-
|
|
73
|
+
input_modalities: model.input_modalities,
|
|
74
|
+
output_modalities: model.output_modalities,
|
|
75
|
+
tags: [...model.input_modalities.map(m => `i:${m}`), ...model.output_modalities.map(m => `o:${m}`)],
|
|
83
76
|
} satisfies AIModel;
|
|
84
77
|
});
|
|
85
78
|
|