@llumiverse/drivers 1.0.0 → 1.1.0-dev.20260427.054520Z
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/cjs/bedrock/index.js +90 -10
- package/lib/cjs/bedrock/index.js.map +1 -1
- package/lib/cjs/openai/index.js +2 -0
- package/lib/cjs/openai/index.js.map +1 -1
- package/lib/cjs/vertexai/index.js +31 -22
- package/lib/cjs/vertexai/index.js.map +1 -1
- package/lib/cjs/vertexai/models/claude.js +99 -26
- package/lib/cjs/vertexai/models/claude.js.map +1 -1
- package/lib/cjs/vertexai/models/gemini.js +35 -335
- package/lib/cjs/vertexai/models/gemini.js.map +1 -1
- package/lib/esm/bedrock/index.js +90 -10
- package/lib/esm/bedrock/index.js.map +1 -1
- package/lib/esm/openai/index.js +2 -0
- package/lib/esm/openai/index.js.map +1 -1
- package/lib/esm/vertexai/index.js +31 -22
- package/lib/esm/vertexai/index.js.map +1 -1
- package/lib/esm/vertexai/models/claude.js +99 -28
- package/lib/esm/vertexai/models/claude.js.map +1 -1
- package/lib/esm/vertexai/models/gemini.js +36 -336
- package/lib/esm/vertexai/models/gemini.js.map +1 -1
- package/lib/types/bedrock/index.d.ts +5 -2
- package/lib/types/bedrock/index.d.ts.map +1 -1
- package/lib/types/openai/index.d.ts.map +1 -1
- package/lib/types/vertexai/index.d.ts +4 -1
- package/lib/types/vertexai/index.d.ts.map +1 -1
- package/lib/types/vertexai/models/claude.d.ts +16 -0
- package/lib/types/vertexai/models/claude.d.ts.map +1 -1
- package/lib/types/vertexai/models/gemini.d.ts +4 -8
- package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
- package/package.json +8 -8
- package/src/bedrock/index.ts +104 -12
- package/src/bedrock/streaming-tool-use.test.ts +250 -0
- package/src/openai/index.ts +2 -0
- package/src/vertexai/index.ts +32 -22
- package/src/vertexai/models/claude-streaming-spacing.test.ts +174 -0
- package/src/vertexai/models/claude.ts +120 -29
- package/src/vertexai/models/gemini-conversation-mutation.test.ts +174 -0
- package/src/vertexai/models/gemini.ts +48 -391
package/src/vertexai/index.ts
CHANGED
|
@@ -59,6 +59,8 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
|
|
|
59
59
|
anthropicClient: AnthropicVertex | undefined;
|
|
60
60
|
fetchClient: FetchClient | undefined;
|
|
61
61
|
googleGenAI: GoogleGenAI | undefined;
|
|
62
|
+
googleGenAIRegion: string | undefined;
|
|
63
|
+
googleGenAIFlex: boolean | undefined;
|
|
62
64
|
llamaClient: FetchClient & { region?: string } | undefined;
|
|
63
65
|
modelGarden: v1beta1.ModelGardenServiceClient | undefined;
|
|
64
66
|
imagenClient: PredictionServiceClient | undefined;
|
|
@@ -73,6 +75,8 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
|
|
|
73
75
|
this.anthropicClient = undefined;
|
|
74
76
|
this.fetchClient = undefined
|
|
75
77
|
this.googleGenAI = undefined;
|
|
78
|
+
this.googleGenAIRegion = undefined;
|
|
79
|
+
this.googleGenAIFlex = undefined;
|
|
76
80
|
this.modelGarden = undefined;
|
|
77
81
|
this.llamaClient = undefined;
|
|
78
82
|
this.imagenClient = undefined;
|
|
@@ -88,32 +92,38 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
|
|
|
88
92
|
return this.authClientPromise;
|
|
89
93
|
}
|
|
90
94
|
|
|
91
|
-
public getGoogleGenAIClient(region: string = this.options.region): GoogleGenAI {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
location: region,
|
|
98
|
-
vertexai: true,
|
|
99
|
-
googleAuthOptions: this.options.googleAuthOptions || {
|
|
100
|
-
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
101
|
-
}
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
if (!this.googleGenAI) {
|
|
105
|
-
this.googleGenAI = new GoogleGenAI({
|
|
106
|
-
project: this.options.project,
|
|
107
|
-
location: region,
|
|
108
|
-
vertexai: true,
|
|
109
|
-
googleAuthOptions: this.options.googleAuthOptions || {
|
|
110
|
-
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
111
|
-
}
|
|
112
|
-
});
|
|
95
|
+
public getGoogleGenAIClient(region: string = this.options.region, flex: boolean = false): GoogleGenAI {
|
|
96
|
+
if (this.googleGenAI &&
|
|
97
|
+
this.googleGenAIRegion === region &&
|
|
98
|
+
this.googleGenAIFlex === flex) {
|
|
99
|
+
// Return existing client if region and flex settings match
|
|
100
|
+
return this.googleGenAI;
|
|
113
101
|
}
|
|
102
|
+
this.googleGenAI = this.buildGoogleGenAIClient(region, flex);
|
|
103
|
+
this.googleGenAIRegion = region;
|
|
104
|
+
this.googleGenAIFlex = flex;
|
|
114
105
|
return this.googleGenAI;
|
|
115
106
|
}
|
|
116
107
|
|
|
108
|
+
private buildGoogleGenAIClient(region: string, flex: boolean): GoogleGenAI {
|
|
109
|
+
return new GoogleGenAI({
|
|
110
|
+
project: this.options.project,
|
|
111
|
+
location: region,
|
|
112
|
+
vertexai: true,
|
|
113
|
+
googleAuthOptions: this.options.googleAuthOptions || {
|
|
114
|
+
scopes: ["https://www.googleapis.com/auth/cloud-platform"],
|
|
115
|
+
},
|
|
116
|
+
...(flex ? {
|
|
117
|
+
httpOptions: {
|
|
118
|
+
headers: {
|
|
119
|
+
"X-Vertex-AI-LLM-Request-Type": "shared",
|
|
120
|
+
"X-Vertex-AI-LLM-Shared-Request-Type": "flex",
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
} : {}),
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
117
127
|
public getFetchClient(): FetchClient {
|
|
118
128
|
//Lazy initialization
|
|
119
129
|
if (!this.fetchClient) {
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import { ExecutionOptions } from '@llumiverse/core';
|
|
2
|
+
import { describe, expect, it } from 'vitest';
|
|
3
|
+
import { VertexAIDriver } from '../index.js';
|
|
4
|
+
import { ClaudeModelDefinition } from './claude.js';
|
|
5
|
+
|
|
6
|
+
function createAsyncStream(events: any[]): AsyncIterable<any> {
|
|
7
|
+
return (async function* () {
|
|
8
|
+
for (const event of events) {
|
|
9
|
+
yield event;
|
|
10
|
+
}
|
|
11
|
+
})();
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
async function collectChunks(stream: AsyncIterable<any>) {
|
|
15
|
+
const chunks: any[] = [];
|
|
16
|
+
for await (const chunk of stream) {
|
|
17
|
+
chunks.push(chunk);
|
|
18
|
+
}
|
|
19
|
+
return chunks;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
describe('ClaudeModelDefinition streaming spacing', () => {
|
|
23
|
+
it('does not leak deferred spacing when tool use follows thinking', async () => {
|
|
24
|
+
const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
|
|
25
|
+
const driver = {
|
|
26
|
+
logger: { warn: () => { }, info: () => { }, error: () => { } },
|
|
27
|
+
getAnthropicClient: async () => ({
|
|
28
|
+
messages: {
|
|
29
|
+
stream: async () => createAsyncStream([
|
|
30
|
+
{
|
|
31
|
+
type: 'content_block_delta',
|
|
32
|
+
delta: { type: 'thinking_delta', thinking: 'Thinking...' },
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
type: 'content_block_delta',
|
|
36
|
+
delta: { type: 'signature_delta' },
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
type: 'content_block_start',
|
|
40
|
+
content_block: { type: 'tool_use', id: 'tool-1', name: 'get_weather' },
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
type: 'content_block_delta',
|
|
44
|
+
delta: { type: 'input_json_delta', partial_json: '{"city":"Paris"}' },
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
type: 'content_block_stop',
|
|
48
|
+
},
|
|
49
|
+
]),
|
|
50
|
+
},
|
|
51
|
+
}),
|
|
52
|
+
} as unknown as VertexAIDriver;
|
|
53
|
+
|
|
54
|
+
const prompt = {
|
|
55
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: 'Weather?' }] }],
|
|
56
|
+
} as any;
|
|
57
|
+
|
|
58
|
+
const options = {
|
|
59
|
+
model: 'publishers/anthropic/models/claude-sonnet-4-5',
|
|
60
|
+
model_options: {
|
|
61
|
+
_option_id: 'vertexai-claude',
|
|
62
|
+
include_thoughts: true,
|
|
63
|
+
},
|
|
64
|
+
} as ExecutionOptions;
|
|
65
|
+
|
|
66
|
+
const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
|
|
67
|
+
const chunks = await collectChunks(stream);
|
|
68
|
+
|
|
69
|
+
const textOutput = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value).join('');
|
|
70
|
+
const toolChunks = chunks.flatMap(chunk => chunk.tool_use ?? []);
|
|
71
|
+
|
|
72
|
+
expect(textOutput).toBe('Thinking...');
|
|
73
|
+
expect(toolChunks).toHaveLength(2);
|
|
74
|
+
expect(toolChunks[0]).toMatchObject({ id: 'tool-1', tool_name: 'get_weather', tool_input: '' });
|
|
75
|
+
expect(toolChunks[1]).toMatchObject({ id: 'tool-1', tool_name: '', tool_input: '{"city":"Paris"}' });
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it('flushes deferred spacing into the first text delta after thinking', async () => {
|
|
79
|
+
const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
|
|
80
|
+
const driver = {
|
|
81
|
+
logger: { warn: () => { }, info: () => { }, error: () => { } },
|
|
82
|
+
getAnthropicClient: async () => ({
|
|
83
|
+
messages: {
|
|
84
|
+
stream: async () => createAsyncStream([
|
|
85
|
+
{
|
|
86
|
+
type: 'content_block_delta',
|
|
87
|
+
delta: { type: 'thinking_delta', thinking: 'Thinking...' },
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
type: 'content_block_delta',
|
|
91
|
+
delta: { type: 'signature_delta' },
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
type: 'content_block_delta',
|
|
95
|
+
delta: { type: 'text_delta', text: 'Answer' },
|
|
96
|
+
},
|
|
97
|
+
]),
|
|
98
|
+
},
|
|
99
|
+
}),
|
|
100
|
+
} as unknown as VertexAIDriver;
|
|
101
|
+
|
|
102
|
+
const prompt = {
|
|
103
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: 'Question?' }] }],
|
|
104
|
+
} as any;
|
|
105
|
+
|
|
106
|
+
const options = {
|
|
107
|
+
model: 'publishers/anthropic/models/claude-sonnet-4-5',
|
|
108
|
+
model_options: {
|
|
109
|
+
_option_id: 'vertexai-claude',
|
|
110
|
+
include_thoughts: true,
|
|
111
|
+
},
|
|
112
|
+
} as ExecutionOptions;
|
|
113
|
+
|
|
114
|
+
const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
|
|
115
|
+
const chunks = await collectChunks(stream);
|
|
116
|
+
|
|
117
|
+
const textParts = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value);
|
|
118
|
+
expect(textParts).toEqual(['Thinking...', '\n\nAnswer']);
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
it('does not reintroduce deferred spacing when text arrives after a tool call', async () => {
|
|
122
|
+
const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
|
|
123
|
+
const driver = {
|
|
124
|
+
logger: { warn: () => { }, info: () => { }, error: () => { } },
|
|
125
|
+
getAnthropicClient: async () => ({
|
|
126
|
+
messages: {
|
|
127
|
+
stream: async () => createAsyncStream([
|
|
128
|
+
{
|
|
129
|
+
type: 'content_block_delta',
|
|
130
|
+
delta: { type: 'thinking_delta', thinking: 'Thinking...' },
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
type: 'content_block_delta',
|
|
134
|
+
delta: { type: 'signature_delta' },
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
type: 'content_block_start',
|
|
138
|
+
content_block: { type: 'tool_use', id: 'tool-1', name: 'get_weather' },
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
type: 'content_block_delta',
|
|
142
|
+
delta: { type: 'input_json_delta', partial_json: '{"city":"Paris"}' },
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
type: 'content_block_stop',
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
type: 'content_block_delta',
|
|
149
|
+
delta: { type: 'text_delta', text: 'Answer after tool' },
|
|
150
|
+
},
|
|
151
|
+
]),
|
|
152
|
+
},
|
|
153
|
+
}),
|
|
154
|
+
} as unknown as VertexAIDriver;
|
|
155
|
+
|
|
156
|
+
const prompt = {
|
|
157
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: 'Weather?' }] }],
|
|
158
|
+
} as any;
|
|
159
|
+
|
|
160
|
+
const options = {
|
|
161
|
+
model: 'publishers/anthropic/models/claude-sonnet-4-5',
|
|
162
|
+
model_options: {
|
|
163
|
+
_option_id: 'vertexai-claude',
|
|
164
|
+
include_thoughts: true,
|
|
165
|
+
},
|
|
166
|
+
} as ExecutionOptions;
|
|
167
|
+
|
|
168
|
+
const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
|
|
169
|
+
const chunks = await collectChunks(stream);
|
|
170
|
+
|
|
171
|
+
const textParts = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value);
|
|
172
|
+
expect(textParts).toEqual(['Thinking...', 'Answer after tool']);
|
|
173
|
+
});
|
|
174
|
+
});
|
|
@@ -15,7 +15,7 @@ import { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, M
|
|
|
15
15
|
import { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
|
|
16
16
|
import { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
|
|
17
17
|
import {
|
|
18
|
-
AIModel, Completion, CompletionChunkObject, ExecutionOptions,
|
|
18
|
+
AIModel, Completion, CompletionChunkObject, ExecutionOptions, ExecutionTokenUsage,
|
|
19
19
|
getConversationMeta,
|
|
20
20
|
getMaxTokensLimitVertexAi,
|
|
21
21
|
incrementConversationTurn,
|
|
@@ -49,6 +49,26 @@ interface ClaudePrompt {
|
|
|
49
49
|
system?: TextBlockParam[];
|
|
50
50
|
}
|
|
51
51
|
|
|
52
|
+
interface AnthropicUsageLike {
|
|
53
|
+
input_tokens: number;
|
|
54
|
+
output_tokens: number;
|
|
55
|
+
cache_read_input_tokens?: number | null;
|
|
56
|
+
cache_creation_input_tokens?: number | null;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function anthropicUsageToTokenUsage(usage: AnthropicUsageLike): ExecutionTokenUsage {
|
|
60
|
+
const cacheRead = usage.cache_read_input_tokens ?? 0;
|
|
61
|
+
const cacheWrite = usage.cache_creation_input_tokens ?? 0;
|
|
62
|
+
return {
|
|
63
|
+
prompt_new: usage.input_tokens,
|
|
64
|
+
prompt: usage.input_tokens + cacheRead + cacheWrite,
|
|
65
|
+
result: usage.output_tokens,
|
|
66
|
+
total: usage.input_tokens + usage.output_tokens + cacheRead + cacheWrite,
|
|
67
|
+
prompt_cached: usage.cache_read_input_tokens ?? undefined,
|
|
68
|
+
prompt_cache_write: usage.cache_creation_input_tokens ?? undefined,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
52
72
|
function claudeFinishReason(reason: string | undefined) {
|
|
53
73
|
if (!reason) return undefined;
|
|
54
74
|
switch (reason) {
|
|
@@ -332,11 +352,7 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
332
352
|
return {
|
|
333
353
|
result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }],
|
|
334
354
|
tool_use,
|
|
335
|
-
token_usage:
|
|
336
|
-
prompt: result.usage.input_tokens,
|
|
337
|
-
result: result.usage.output_tokens,
|
|
338
|
-
total: result.usage.input_tokens + result.usage.output_tokens
|
|
339
|
-
},
|
|
355
|
+
token_usage: anthropicUsageToTokenUsage(result.usage),
|
|
340
356
|
// make sure we set finish_reason to the correct value (claude is normally setting this by itself)
|
|
341
357
|
finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''),
|
|
342
358
|
conversation: processedConversation
|
|
@@ -369,16 +385,16 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
369
385
|
|
|
370
386
|
// Track current tool use being built from streaming
|
|
371
387
|
let currentToolUse: { id: string; name: string; inputJson: string } | null = null;
|
|
388
|
+
// Deferred spacing after a thinking block — emitted only when real text follows,
|
|
389
|
+
// so it doesn't leak into the output when a tool call comes after thinking.
|
|
390
|
+
let pendingSpacing = false;
|
|
372
391
|
|
|
373
392
|
const stream = asyncMap(response_stream, async (streamEvent: RawMessageStreamEvent) => {
|
|
374
393
|
switch (streamEvent.type) {
|
|
375
394
|
case "message_start":
|
|
376
395
|
return {
|
|
377
396
|
result: [{ type: "text", value: '' }],
|
|
378
|
-
token_usage:
|
|
379
|
-
prompt: streamEvent.message.usage.input_tokens,
|
|
380
|
-
result: streamEvent.message.usage.output_tokens
|
|
381
|
-
}
|
|
397
|
+
token_usage: anthropicUsageToTokenUsage(streamEvent.message.usage as AnthropicUsageLike),
|
|
382
398
|
} satisfies CompletionChunkObject;
|
|
383
399
|
case "message_delta":
|
|
384
400
|
return {
|
|
@@ -415,10 +431,13 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
415
431
|
case "content_block_delta":
|
|
416
432
|
// Handle different delta types
|
|
417
433
|
switch (streamEvent.delta.type) {
|
|
418
|
-
case "text_delta":
|
|
434
|
+
case "text_delta": {
|
|
435
|
+
const prefix = pendingSpacing ? '\n\n' : '';
|
|
436
|
+
pendingSpacing = false;
|
|
419
437
|
return {
|
|
420
|
-
result: streamEvent.delta.text ? [{ type: "text", value: streamEvent.delta.text }] : []
|
|
438
|
+
result: streamEvent.delta.text ? [{ type: "text", value: prefix + streamEvent.delta.text }] : []
|
|
421
439
|
} satisfies CompletionChunkObject;
|
|
440
|
+
}
|
|
422
441
|
case "input_json_delta":
|
|
423
442
|
// Accumulate tool input JSON
|
|
424
443
|
if (currentToolUse && streamEvent.delta.partial_json) {
|
|
@@ -440,25 +459,20 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
|
|
|
440
459
|
}
|
|
441
460
|
break;
|
|
442
461
|
case "signature_delta":
|
|
443
|
-
//
|
|
462
|
+
// End of thinking block — defer spacing until real text follows,
|
|
463
|
+
// so it doesn't leak when a tool call comes next.
|
|
444
464
|
if (model_options?.include_thoughts) {
|
|
445
|
-
|
|
446
|
-
result: [{ type: "text", value: '\n\n' }], // Double newline for more spacing
|
|
447
|
-
} satisfies CompletionChunkObject;
|
|
465
|
+
pendingSpacing = true;
|
|
448
466
|
}
|
|
449
467
|
break;
|
|
450
468
|
}
|
|
451
469
|
break;
|
|
452
470
|
case "content_block_stop":
|
|
453
|
-
// Reset
|
|
471
|
+
// Reset tool use tracking; spacing is handled via pendingSpacing
|
|
454
472
|
if (currentToolUse) {
|
|
455
473
|
currentToolUse = null;
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
if (model_options?.include_thoughts) {
|
|
459
|
-
return {
|
|
460
|
-
result: [{ type: "text", value: '\n\n' }] // Add double newline for spacing
|
|
461
|
-
} satisfies CompletionChunkObject;
|
|
474
|
+
// Tool call followed thinking — discard any pending spacing so it doesn't leak
|
|
475
|
+
pendingSpacing = false;
|
|
462
476
|
}
|
|
463
477
|
break;
|
|
464
478
|
}
|
|
@@ -707,12 +721,18 @@ export function mergeConsecutiveUserMessages(messages: MessageParam[]): MessageP
|
|
|
707
721
|
* @param response
|
|
708
722
|
* @returns
|
|
709
723
|
*/
|
|
710
|
-
function updateConversation(conversation: ClaudePrompt | undefined | null, prompt: ClaudePrompt): ClaudePrompt {
|
|
724
|
+
export function updateConversation(conversation: ClaudePrompt | undefined | null, prompt: ClaudePrompt): ClaudePrompt {
|
|
711
725
|
const baseSystemMessages = conversation?.system || [];
|
|
712
726
|
const baseMessages = conversation?.messages || [];
|
|
713
727
|
const system = baseSystemMessages.concat(prompt.system || []);
|
|
714
|
-
//
|
|
715
|
-
|
|
728
|
+
// Sanitize first, then merge. Order matters: an empty assistant message (e.g. from interrupted
|
|
729
|
+
// streaming) between two tool-result user messages acts as a false separator. If merge runs
|
|
730
|
+
// first, those messages look non-consecutive and fixOrphanedToolUse injects a synthetic result
|
|
731
|
+
// into the first one; when sanitize later removes the empty assistant, the second user message
|
|
732
|
+
// ends up with an orphaned tool_result that Vertex AI rejects:
|
|
733
|
+
// "unexpected tool_use_id found in tool_result blocks".
|
|
734
|
+
const combined = sanitizeMessages(baseMessages.concat(prompt.messages || []));
|
|
735
|
+
const mergedMessages = mergeConsecutiveUserMessages(combined);
|
|
716
736
|
return {
|
|
717
737
|
messages: mergedMessages,
|
|
718
738
|
system: system.length > 0 ? system : undefined // If system is empty, set to undefined
|
|
@@ -727,7 +747,7 @@ function updateConversation(conversation: ClaudePrompt | undefined | null, promp
|
|
|
727
747
|
* - Filters out empty text blocks from each message's content
|
|
728
748
|
* - Removes messages entirely if they have no content after filtering
|
|
729
749
|
*/
|
|
730
|
-
function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
|
|
750
|
+
export function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
|
|
731
751
|
const result: MessageParam[] = [];
|
|
732
752
|
|
|
733
753
|
for (const message of messages) {
|
|
@@ -852,6 +872,42 @@ interface RequestOptions {
|
|
|
852
872
|
headers?: Record<string, string>;
|
|
853
873
|
}
|
|
854
874
|
|
|
875
|
+
type ClaudeTool = NonNullable<MessageCreateParamsBase['tools']>[number];
|
|
876
|
+
|
|
877
|
+
function stripClaudeCacheControlFromMessages(messages: MessageParam[]): MessageParam[] {
|
|
878
|
+
return messages.map(message => {
|
|
879
|
+
if (typeof message.content === 'string') {
|
|
880
|
+
return message;
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
return {
|
|
884
|
+
...message,
|
|
885
|
+
content: message.content.map(block => stripClaudeCacheControlFromBlock(block)),
|
|
886
|
+
};
|
|
887
|
+
});
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
function stripClaudeCacheControlFromBlock<T extends ContentBlockParam>(block: T): T {
|
|
891
|
+
const cloned = { ...block } as T & { cache_control?: unknown };
|
|
892
|
+
delete cloned.cache_control;
|
|
893
|
+
return cloned as T;
|
|
894
|
+
}
|
|
895
|
+
|
|
896
|
+
function stripClaudeCacheControlFromSystem(system?: TextBlockParam[]): TextBlockParam[] | undefined {
|
|
897
|
+
return system?.map(block => {
|
|
898
|
+
const { cache_control: _cacheControl, ...rest } = block as TextBlockParam & { cache_control?: unknown };
|
|
899
|
+
return rest as TextBlockParam;
|
|
900
|
+
});
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
function stripClaudeCacheControlFromTools(tools?: MessageCreateParamsBase['tools']): MessageCreateParamsBase['tools'] | undefined {
|
|
904
|
+
return tools?.map(tool => {
|
|
905
|
+
const cloned = { ...tool } as ClaudeTool & { cache_control?: unknown };
|
|
906
|
+
delete cloned.cache_control;
|
|
907
|
+
return cloned as ClaudeTool;
|
|
908
|
+
});
|
|
909
|
+
}
|
|
910
|
+
|
|
855
911
|
function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { payload: MessageCreateParamsBase, requestOptions: RequestOptions | undefined } {
|
|
856
912
|
const modelName = options.model; // Model name is already extracted in the calling methods
|
|
857
913
|
const model_options = options.model_options as VertexAIClaudeOptions;
|
|
@@ -888,10 +944,45 @@ function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { pa
|
|
|
888
944
|
sanitizedMessages = convertClaudeToolBlocksToText(sanitizedMessages);
|
|
889
945
|
}
|
|
890
946
|
|
|
947
|
+
sanitizedMessages = stripClaudeCacheControlFromMessages(sanitizedMessages);
|
|
948
|
+
const sanitizedSystem = stripClaudeCacheControlFromSystem(prompt.system);
|
|
949
|
+
const sanitizedTools = hasTools
|
|
950
|
+
? stripClaudeCacheControlFromTools(options.tools as MessageCreateParamsBase['tools'])
|
|
951
|
+
: undefined;
|
|
952
|
+
|
|
953
|
+
// Prompt caching: use three breakpoints so stable system prompt, tool definitions,
|
|
954
|
+
// and the conversation history prefix can all be reused across calls.
|
|
955
|
+
const cacheEnabled = model_options?.cache_enabled === true;
|
|
956
|
+
if (cacheEnabled) {
|
|
957
|
+
const cacheTtl = model_options?.cache_ttl;
|
|
958
|
+
const cacheControl = { type: 'ephemeral' as const, ...(cacheTtl && { ttl: cacheTtl }) };
|
|
959
|
+
|
|
960
|
+
if (sanitizedSystem && sanitizedSystem.length > 0) {
|
|
961
|
+
const lastSystemBlock = sanitizedSystem[sanitizedSystem.length - 1] as TextBlockParam & { cache_control?: unknown };
|
|
962
|
+
lastSystemBlock.cache_control = cacheControl;
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
if (sanitizedTools && sanitizedTools.length > 0) {
|
|
966
|
+
const lastTool = sanitizedTools[sanitizedTools.length - 1] as ClaudeTool & { cache_control?: unknown };
|
|
967
|
+
lastTool.cache_control = cacheControl;
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
if (sanitizedMessages.length >= 4) {
|
|
971
|
+
const pivotMsg = sanitizedMessages[sanitizedMessages.length - 2];
|
|
972
|
+
if (Array.isArray(pivotMsg.content) && pivotMsg.content.length > 0) {
|
|
973
|
+
const lastBlock = pivotMsg.content[pivotMsg.content.length - 1];
|
|
974
|
+
if (typeof lastBlock === 'object' && lastBlock !== null &&
|
|
975
|
+
'type' in lastBlock && lastBlock.type !== 'thinking' && lastBlock.type !== 'redacted_thinking') {
|
|
976
|
+
(lastBlock as TextBlockParam).cache_control = cacheControl;
|
|
977
|
+
}
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
|
|
891
982
|
const payload = {
|
|
892
983
|
messages: sanitizedMessages,
|
|
893
|
-
system:
|
|
894
|
-
tools:
|
|
984
|
+
system: sanitizedSystem,
|
|
985
|
+
tools: sanitizedTools,
|
|
895
986
|
temperature: model_options?.temperature,
|
|
896
987
|
model: modelName,
|
|
897
988
|
max_tokens: maxToken(options),
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the Gemini conversation mutation bug fix.
|
|
3
|
+
*
|
|
4
|
+
* Bug: When tools=[] is passed but the conversation contains functionCall/functionResponse
|
|
5
|
+
* parts from prior turns, getGeminiPayload() was doing:
|
|
6
|
+
*
|
|
7
|
+
* prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
|
|
8
|
+
*
|
|
9
|
+
* Since prompt.contents is the same object reference as the caller's conversation array,
|
|
10
|
+
* this permanently corrupted the stored conversation with "[Tool call: ...]" text markers.
|
|
11
|
+
* On the next turn the model would see those markers in context and echo them as literal output.
|
|
12
|
+
*
|
|
13
|
+
* Fix: use a local `payloadContents` variable so the caller's conversation is never mutated.
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { ExecutionOptions } from '@llumiverse/core';
|
|
17
|
+
import { FinishReason } from '@google/genai';
|
|
18
|
+
import { describe, expect, it } from 'vitest';
|
|
19
|
+
import { VertexAIDriver } from '../index.js';
|
|
20
|
+
import { convertGeminiFunctionPartsToText, GeminiModelDefinition } from './gemini.js';
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Pure function tests — no driver needed
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
describe('convertGeminiFunctionPartsToText', () => {
|
|
27
|
+
it('does not mutate the input array', () => {
|
|
28
|
+
const original = [
|
|
29
|
+
{
|
|
30
|
+
role: 'model',
|
|
31
|
+
parts: [{ functionCall: { name: 'plan', args: { task: 'write tests' } } }],
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
role: 'user',
|
|
35
|
+
parts: [{ functionResponse: { name: 'plan', response: { output: 'done' } } }],
|
|
36
|
+
},
|
|
37
|
+
];
|
|
38
|
+
const originalItemRefs = original.map(c => c);
|
|
39
|
+
const originalPartRefs = original.map(c => c.parts[0]);
|
|
40
|
+
|
|
41
|
+
const result = convertGeminiFunctionPartsToText(original);
|
|
42
|
+
|
|
43
|
+
// Result must be a different array
|
|
44
|
+
expect(result).not.toBe(original);
|
|
45
|
+
// Original items must be unchanged (same references, not mutated)
|
|
46
|
+
original.forEach((item, i) => {
|
|
47
|
+
expect(item).toBe(originalItemRefs[i]);
|
|
48
|
+
expect(item.parts[0]).toBe(originalPartRefs[i]);
|
|
49
|
+
});
|
|
50
|
+
// Original functionCall part must still be a functionCall, not text
|
|
51
|
+
expect(original[0].parts[0]).toHaveProperty('functionCall');
|
|
52
|
+
expect(original[0].parts[0]).not.toHaveProperty('text');
|
|
53
|
+
expect(original[1].parts[0]).toHaveProperty('functionResponse');
|
|
54
|
+
expect(original[1].parts[0]).not.toHaveProperty('text');
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('converts functionCall parts to the expected text format', () => {
|
|
58
|
+
const contents = [
|
|
59
|
+
{
|
|
60
|
+
role: 'model',
|
|
61
|
+
parts: [{ functionCall: { name: 'get_weather', args: { location: 'Paris' } } }],
|
|
62
|
+
},
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
const result = convertGeminiFunctionPartsToText(contents);
|
|
66
|
+
|
|
67
|
+
expect(result[0].parts![0]).toEqual({
|
|
68
|
+
text: '[Tool call: get_weather({"location":"Paris"})]',
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('converts functionResponse parts to the expected text format', () => {
|
|
73
|
+
const contents = [
|
|
74
|
+
{
|
|
75
|
+
role: 'user',
|
|
76
|
+
parts: [{ functionResponse: { name: 'get_weather', response: { temperature: '15°C' } } }],
|
|
77
|
+
},
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
const result = convertGeminiFunctionPartsToText(contents);
|
|
81
|
+
|
|
82
|
+
expect(result[0].parts![0]).toEqual({
|
|
83
|
+
text: '[Tool result for get_weather: {"temperature":"15°C"}]',
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('leaves non-function parts intact', () => {
|
|
88
|
+
const textPart = { text: 'Hello world' };
|
|
89
|
+
const contents = [{ role: 'user', parts: [textPart] }];
|
|
90
|
+
|
|
91
|
+
const result = convertGeminiFunctionPartsToText(contents);
|
|
92
|
+
|
|
93
|
+
expect(result[0].parts![0]).toBe(textPart);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
// ---------------------------------------------------------------------------
|
|
98
|
+
// Integration-level tests — verify the driver does not mutate the conversation
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
function makeContentsWithFunctionParts() {
|
|
102
|
+
return [
|
|
103
|
+
{ role: 'model', parts: [{ functionCall: { name: 'plan', args: { task: 'test' } } }] },
|
|
104
|
+
{ role: 'user', parts: [{ functionResponse: { name: 'plan', response: { result: 'ok' } } }] },
|
|
105
|
+
];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function makeDriver(overrides: { generateContent?: () => Promise<any>; generateContentStream?: () => Promise<AsyncIterable<any>> }) {
|
|
109
|
+
return {
|
|
110
|
+
logger: { warn: () => {}, info: () => {}, error: () => {} },
|
|
111
|
+
getGoogleGenAIClient: () => ({
|
|
112
|
+
models: {
|
|
113
|
+
generateContent: overrides.generateContent ?? (async () => ({})),
|
|
114
|
+
generateContentStream: overrides.generateContentStream ?? (async () => (async function* () {})()),
|
|
115
|
+
},
|
|
116
|
+
}),
|
|
117
|
+
} as unknown as VertexAIDriver;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const mockNonStreamingResponse = {
|
|
121
|
+
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5, totalTokenCount: 15 },
|
|
122
|
+
candidates: [{
|
|
123
|
+
finishReason: FinishReason.STOP,
|
|
124
|
+
content: { role: 'model', parts: [{ text: 'Summary.' }] },
|
|
125
|
+
safetyRatings: [],
|
|
126
|
+
}],
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
const mockStreamingChunk = {
|
|
130
|
+
usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5, totalTokenCount: 15 },
|
|
131
|
+
candidates: [{
|
|
132
|
+
finishReason: FinishReason.STOP,
|
|
133
|
+
content: { role: 'model', parts: [{ text: 'Summary.' }] },
|
|
134
|
+
safetyRatings: [],
|
|
135
|
+
}],
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
describe('GeminiModelDefinition - no conversation mutation', () => {
|
|
139
|
+
it('requestTextCompletion: does not mutate prompt.contents when tools=[] and conversation has function parts', async () => {
|
|
140
|
+
const modelDef = new GeminiModelDefinition('gemini-2.0-flash');
|
|
141
|
+
const originalContents = makeContentsWithFunctionParts();
|
|
142
|
+
const contentsSnapshot = JSON.stringify(originalContents);
|
|
143
|
+
|
|
144
|
+
const driver = makeDriver({ generateContent: async () => mockNonStreamingResponse });
|
|
145
|
+
const prompt = { contents: originalContents, system: undefined } as any;
|
|
146
|
+
const options: ExecutionOptions = { model: 'publishers/google/models/gemini-2.0-flash', tools: [] };
|
|
147
|
+
|
|
148
|
+
await modelDef.requestTextCompletion(driver, prompt, options);
|
|
149
|
+
|
|
150
|
+
expect(JSON.stringify(originalContents)).toBe(contentsSnapshot);
|
|
151
|
+
expect(originalContents[0].parts[0]).toHaveProperty('functionCall');
|
|
152
|
+
expect(originalContents[1].parts[0]).toHaveProperty('functionResponse');
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it('requestTextCompletionStream: does not mutate prompt.contents when tools=[] and conversation has function parts', async () => {
|
|
156
|
+
const modelDef = new GeminiModelDefinition('gemini-2.0-flash');
|
|
157
|
+
const originalContents = makeContentsWithFunctionParts();
|
|
158
|
+
const contentsSnapshot = JSON.stringify(originalContents);
|
|
159
|
+
|
|
160
|
+
const driver = makeDriver({
|
|
161
|
+
generateContentStream: async () => (async function* () { yield mockStreamingChunk; })(),
|
|
162
|
+
});
|
|
163
|
+
const prompt = { contents: originalContents, system: undefined } as any;
|
|
164
|
+
const options: ExecutionOptions = { model: 'publishers/google/models/gemini-2.0-flash', tools: [] };
|
|
165
|
+
|
|
166
|
+
const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
|
|
167
|
+
// Drain the stream to trigger all processing
|
|
168
|
+
for await (const _chunk of stream) { /* noop */ }
|
|
169
|
+
|
|
170
|
+
expect(JSON.stringify(originalContents)).toBe(contentsSnapshot);
|
|
171
|
+
expect(originalContents[0].parts[0]).toHaveProperty('functionCall');
|
|
172
|
+
expect(originalContents[1].parts[0]).toHaveProperty('functionResponse');
|
|
173
|
+
});
|
|
174
|
+
});
|