@llumiverse/drivers 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/lib/cjs/bedrock/index.js +90 -10
  2. package/lib/cjs/bedrock/index.js.map +1 -1
  3. package/lib/cjs/openai/index.js +2 -0
  4. package/lib/cjs/openai/index.js.map +1 -1
  5. package/lib/cjs/vertexai/index.js +31 -22
  6. package/lib/cjs/vertexai/index.js.map +1 -1
  7. package/lib/cjs/vertexai/models/claude.js +99 -26
  8. package/lib/cjs/vertexai/models/claude.js.map +1 -1
  9. package/lib/cjs/vertexai/models/gemini.js +35 -335
  10. package/lib/cjs/vertexai/models/gemini.js.map +1 -1
  11. package/lib/esm/bedrock/index.js +90 -10
  12. package/lib/esm/bedrock/index.js.map +1 -1
  13. package/lib/esm/openai/index.js +2 -0
  14. package/lib/esm/openai/index.js.map +1 -1
  15. package/lib/esm/vertexai/index.js +31 -22
  16. package/lib/esm/vertexai/index.js.map +1 -1
  17. package/lib/esm/vertexai/models/claude.js +99 -28
  18. package/lib/esm/vertexai/models/claude.js.map +1 -1
  19. package/lib/esm/vertexai/models/gemini.js +36 -336
  20. package/lib/esm/vertexai/models/gemini.js.map +1 -1
  21. package/lib/types/bedrock/index.d.ts +5 -2
  22. package/lib/types/bedrock/index.d.ts.map +1 -1
  23. package/lib/types/openai/index.d.ts.map +1 -1
  24. package/lib/types/vertexai/index.d.ts +4 -1
  25. package/lib/types/vertexai/index.d.ts.map +1 -1
  26. package/lib/types/vertexai/models/claude.d.ts +16 -0
  27. package/lib/types/vertexai/models/claude.d.ts.map +1 -1
  28. package/lib/types/vertexai/models/gemini.d.ts +4 -8
  29. package/lib/types/vertexai/models/gemini.d.ts.map +1 -1
  30. package/package.json +8 -8
  31. package/src/bedrock/index.ts +104 -12
  32. package/src/bedrock/streaming-tool-use.test.ts +250 -0
  33. package/src/openai/index.ts +2 -0
  34. package/src/vertexai/index.ts +32 -22
  35. package/src/vertexai/models/claude-streaming-spacing.test.ts +174 -0
  36. package/src/vertexai/models/claude.ts +120 -29
  37. package/src/vertexai/models/gemini-conversation-mutation.test.ts +174 -0
  38. package/src/vertexai/models/gemini.ts +48 -391
@@ -59,6 +59,8 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
59
59
  anthropicClient: AnthropicVertex | undefined;
60
60
  fetchClient: FetchClient | undefined;
61
61
  googleGenAI: GoogleGenAI | undefined;
62
+ googleGenAIRegion: string | undefined;
63
+ googleGenAIFlex: boolean | undefined;
62
64
  llamaClient: FetchClient & { region?: string } | undefined;
63
65
  modelGarden: v1beta1.ModelGardenServiceClient | undefined;
64
66
  imagenClient: PredictionServiceClient | undefined;
@@ -73,6 +75,8 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
73
75
  this.anthropicClient = undefined;
74
76
  this.fetchClient = undefined
75
77
  this.googleGenAI = undefined;
78
+ this.googleGenAIRegion = undefined;
79
+ this.googleGenAIFlex = undefined;
76
80
  this.modelGarden = undefined;
77
81
  this.llamaClient = undefined;
78
82
  this.imagenClient = undefined;
@@ -88,32 +92,38 @@ export class VertexAIDriver extends AbstractDriver<VertexAIDriverOptions, Vertex
88
92
  return this.authClientPromise;
89
93
  }
90
94
 
91
- public getGoogleGenAIClient(region: string = this.options.region): GoogleGenAI {
92
- //Lazy initialization
93
- if (region !== this.options.region) {
94
- //Get one off client for different region
95
- return new GoogleGenAI({
96
- project: this.options.project,
97
- location: region,
98
- vertexai: true,
99
- googleAuthOptions: this.options.googleAuthOptions || {
100
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
101
- }
102
- });
103
- }
104
- if (!this.googleGenAI) {
105
- this.googleGenAI = new GoogleGenAI({
106
- project: this.options.project,
107
- location: region,
108
- vertexai: true,
109
- googleAuthOptions: this.options.googleAuthOptions || {
110
- scopes: ["https://www.googleapis.com/auth/cloud-platform"],
111
- }
112
- });
95
+ public getGoogleGenAIClient(region: string = this.options.region, flex: boolean = false): GoogleGenAI {
96
+ if (this.googleGenAI &&
97
+ this.googleGenAIRegion === region &&
98
+ this.googleGenAIFlex === flex) {
99
+ // Return existing client if region and flex settings match
100
+ return this.googleGenAI;
113
101
  }
102
+ this.googleGenAI = this.buildGoogleGenAIClient(region, flex);
103
+ this.googleGenAIRegion = region;
104
+ this.googleGenAIFlex = flex;
114
105
  return this.googleGenAI;
115
106
  }
116
107
 
108
+ private buildGoogleGenAIClient(region: string, flex: boolean): GoogleGenAI {
109
+ return new GoogleGenAI({
110
+ project: this.options.project,
111
+ location: region,
112
+ vertexai: true,
113
+ googleAuthOptions: this.options.googleAuthOptions || {
114
+ scopes: ["https://www.googleapis.com/auth/cloud-platform"],
115
+ },
116
+ ...(flex ? {
117
+ httpOptions: {
118
+ headers: {
119
+ "X-Vertex-AI-LLM-Request-Type": "shared",
120
+ "X-Vertex-AI-LLM-Shared-Request-Type": "flex",
121
+ }
122
+ }
123
+ } : {}),
124
+ });
125
+ }
126
+
117
127
  public getFetchClient(): FetchClient {
118
128
  //Lazy initialization
119
129
  if (!this.fetchClient) {
@@ -0,0 +1,174 @@
1
+ import { ExecutionOptions } from '@llumiverse/core';
2
+ import { describe, expect, it } from 'vitest';
3
+ import { VertexAIDriver } from '../index.js';
4
+ import { ClaudeModelDefinition } from './claude.js';
5
+
6
+ function createAsyncStream(events: any[]): AsyncIterable<any> {
7
+ return (async function* () {
8
+ for (const event of events) {
9
+ yield event;
10
+ }
11
+ })();
12
+ }
13
+
14
+ async function collectChunks(stream: AsyncIterable<any>) {
15
+ const chunks: any[] = [];
16
+ for await (const chunk of stream) {
17
+ chunks.push(chunk);
18
+ }
19
+ return chunks;
20
+ }
21
+
22
+ describe('ClaudeModelDefinition streaming spacing', () => {
23
+ it('does not leak deferred spacing when tool use follows thinking', async () => {
24
+ const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
25
+ const driver = {
26
+ logger: { warn: () => { }, info: () => { }, error: () => { } },
27
+ getAnthropicClient: async () => ({
28
+ messages: {
29
+ stream: async () => createAsyncStream([
30
+ {
31
+ type: 'content_block_delta',
32
+ delta: { type: 'thinking_delta', thinking: 'Thinking...' },
33
+ },
34
+ {
35
+ type: 'content_block_delta',
36
+ delta: { type: 'signature_delta' },
37
+ },
38
+ {
39
+ type: 'content_block_start',
40
+ content_block: { type: 'tool_use', id: 'tool-1', name: 'get_weather' },
41
+ },
42
+ {
43
+ type: 'content_block_delta',
44
+ delta: { type: 'input_json_delta', partial_json: '{"city":"Paris"}' },
45
+ },
46
+ {
47
+ type: 'content_block_stop',
48
+ },
49
+ ]),
50
+ },
51
+ }),
52
+ } as unknown as VertexAIDriver;
53
+
54
+ const prompt = {
55
+ messages: [{ role: 'user', content: [{ type: 'text', text: 'Weather?' }] }],
56
+ } as any;
57
+
58
+ const options = {
59
+ model: 'publishers/anthropic/models/claude-sonnet-4-5',
60
+ model_options: {
61
+ _option_id: 'vertexai-claude',
62
+ include_thoughts: true,
63
+ },
64
+ } as ExecutionOptions;
65
+
66
+ const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
67
+ const chunks = await collectChunks(stream);
68
+
69
+ const textOutput = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value).join('');
70
+ const toolChunks = chunks.flatMap(chunk => chunk.tool_use ?? []);
71
+
72
+ expect(textOutput).toBe('Thinking...');
73
+ expect(toolChunks).toHaveLength(2);
74
+ expect(toolChunks[0]).toMatchObject({ id: 'tool-1', tool_name: 'get_weather', tool_input: '' });
75
+ expect(toolChunks[1]).toMatchObject({ id: 'tool-1', tool_name: '', tool_input: '{"city":"Paris"}' });
76
+ });
77
+
78
+ it('flushes deferred spacing into the first text delta after thinking', async () => {
79
+ const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
80
+ const driver = {
81
+ logger: { warn: () => { }, info: () => { }, error: () => { } },
82
+ getAnthropicClient: async () => ({
83
+ messages: {
84
+ stream: async () => createAsyncStream([
85
+ {
86
+ type: 'content_block_delta',
87
+ delta: { type: 'thinking_delta', thinking: 'Thinking...' },
88
+ },
89
+ {
90
+ type: 'content_block_delta',
91
+ delta: { type: 'signature_delta' },
92
+ },
93
+ {
94
+ type: 'content_block_delta',
95
+ delta: { type: 'text_delta', text: 'Answer' },
96
+ },
97
+ ]),
98
+ },
99
+ }),
100
+ } as unknown as VertexAIDriver;
101
+
102
+ const prompt = {
103
+ messages: [{ role: 'user', content: [{ type: 'text', text: 'Question?' }] }],
104
+ } as any;
105
+
106
+ const options = {
107
+ model: 'publishers/anthropic/models/claude-sonnet-4-5',
108
+ model_options: {
109
+ _option_id: 'vertexai-claude',
110
+ include_thoughts: true,
111
+ },
112
+ } as ExecutionOptions;
113
+
114
+ const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
115
+ const chunks = await collectChunks(stream);
116
+
117
+ const textParts = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value);
118
+ expect(textParts).toEqual(['Thinking...', '\n\nAnswer']);
119
+ });
120
+
121
+ it('does not reintroduce deferred spacing when text arrives after a tool call', async () => {
122
+ const modelDef = new ClaudeModelDefinition('claude-sonnet-4-5');
123
+ const driver = {
124
+ logger: { warn: () => { }, info: () => { }, error: () => { } },
125
+ getAnthropicClient: async () => ({
126
+ messages: {
127
+ stream: async () => createAsyncStream([
128
+ {
129
+ type: 'content_block_delta',
130
+ delta: { type: 'thinking_delta', thinking: 'Thinking...' },
131
+ },
132
+ {
133
+ type: 'content_block_delta',
134
+ delta: { type: 'signature_delta' },
135
+ },
136
+ {
137
+ type: 'content_block_start',
138
+ content_block: { type: 'tool_use', id: 'tool-1', name: 'get_weather' },
139
+ },
140
+ {
141
+ type: 'content_block_delta',
142
+ delta: { type: 'input_json_delta', partial_json: '{"city":"Paris"}' },
143
+ },
144
+ {
145
+ type: 'content_block_stop',
146
+ },
147
+ {
148
+ type: 'content_block_delta',
149
+ delta: { type: 'text_delta', text: 'Answer after tool' },
150
+ },
151
+ ]),
152
+ },
153
+ }),
154
+ } as unknown as VertexAIDriver;
155
+
156
+ const prompt = {
157
+ messages: [{ role: 'user', content: [{ type: 'text', text: 'Weather?' }] }],
158
+ } as any;
159
+
160
+ const options = {
161
+ model: 'publishers/anthropic/models/claude-sonnet-4-5',
162
+ model_options: {
163
+ _option_id: 'vertexai-claude',
164
+ include_thoughts: true,
165
+ },
166
+ } as ExecutionOptions;
167
+
168
+ const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
169
+ const chunks = await collectChunks(stream);
170
+
171
+ const textParts = chunks.flatMap(chunk => chunk.result ?? []).map(part => part.value);
172
+ expect(textParts).toEqual(['Thinking...', 'Answer after tool']);
173
+ });
174
+ });
@@ -15,7 +15,7 @@ import { ContentBlock, ContentBlockParam, DocumentBlockParam, ImageBlockParam, M
15
15
  import { MessageStreamParams } from "@anthropic-ai/sdk/resources/index.mjs";
16
16
  import { MessageCreateParamsBase, MessageCreateParamsNonStreaming, RawMessageStreamEvent } from "@anthropic-ai/sdk/resources/messages.js";
17
17
  import {
18
- AIModel, Completion, CompletionChunkObject, ExecutionOptions,
18
+ AIModel, Completion, CompletionChunkObject, ExecutionOptions, ExecutionTokenUsage,
19
19
  getConversationMeta,
20
20
  getMaxTokensLimitVertexAi,
21
21
  incrementConversationTurn,
@@ -49,6 +49,26 @@ interface ClaudePrompt {
49
49
  system?: TextBlockParam[];
50
50
  }
51
51
 
52
+ interface AnthropicUsageLike {
53
+ input_tokens: number;
54
+ output_tokens: number;
55
+ cache_read_input_tokens?: number | null;
56
+ cache_creation_input_tokens?: number | null;
57
+ }
58
+
59
+ function anthropicUsageToTokenUsage(usage: AnthropicUsageLike): ExecutionTokenUsage {
60
+ const cacheRead = usage.cache_read_input_tokens ?? 0;
61
+ const cacheWrite = usage.cache_creation_input_tokens ?? 0;
62
+ return {
63
+ prompt_new: usage.input_tokens,
64
+ prompt: usage.input_tokens + cacheRead + cacheWrite,
65
+ result: usage.output_tokens,
66
+ total: usage.input_tokens + usage.output_tokens + cacheRead + cacheWrite,
67
+ prompt_cached: usage.cache_read_input_tokens ?? undefined,
68
+ prompt_cache_write: usage.cache_creation_input_tokens ?? undefined,
69
+ };
70
+ }
71
+
52
72
  function claudeFinishReason(reason: string | undefined) {
53
73
  if (!reason) return undefined;
54
74
  switch (reason) {
@@ -332,11 +352,7 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
332
352
  return {
333
353
  result: text ? [{ type: "text", value: text }] : [{ type: "text", value: '' }],
334
354
  tool_use,
335
- token_usage: {
336
- prompt: result.usage.input_tokens,
337
- result: result.usage.output_tokens,
338
- total: result.usage.input_tokens + result.usage.output_tokens
339
- },
355
+ token_usage: anthropicUsageToTokenUsage(result.usage),
340
356
  // make sure we set finish_reason to the correct value (claude is normally setting this by itself)
341
357
  finish_reason: tool_use ? "tool_use" : claudeFinishReason(result?.stop_reason ?? ''),
342
358
  conversation: processedConversation
@@ -369,16 +385,16 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
369
385
 
370
386
  // Track current tool use being built from streaming
371
387
  let currentToolUse: { id: string; name: string; inputJson: string } | null = null;
388
+ // Deferred spacing after a thinking block — emitted only when real text follows,
389
+ // so it doesn't leak into the output when a tool call comes after thinking.
390
+ let pendingSpacing = false;
372
391
 
373
392
  const stream = asyncMap(response_stream, async (streamEvent: RawMessageStreamEvent) => {
374
393
  switch (streamEvent.type) {
375
394
  case "message_start":
376
395
  return {
377
396
  result: [{ type: "text", value: '' }],
378
- token_usage: {
379
- prompt: streamEvent.message.usage.input_tokens,
380
- result: streamEvent.message.usage.output_tokens
381
- }
397
+ token_usage: anthropicUsageToTokenUsage(streamEvent.message.usage as AnthropicUsageLike),
382
398
  } satisfies CompletionChunkObject;
383
399
  case "message_delta":
384
400
  return {
@@ -415,10 +431,13 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
415
431
  case "content_block_delta":
416
432
  // Handle different delta types
417
433
  switch (streamEvent.delta.type) {
418
- case "text_delta":
434
+ case "text_delta": {
435
+ const prefix = pendingSpacing ? '\n\n' : '';
436
+ pendingSpacing = false;
419
437
  return {
420
- result: streamEvent.delta.text ? [{ type: "text", value: streamEvent.delta.text }] : []
438
+ result: streamEvent.delta.text ? [{ type: "text", value: prefix + streamEvent.delta.text }] : []
421
439
  } satisfies CompletionChunkObject;
440
+ }
422
441
  case "input_json_delta":
423
442
  // Accumulate tool input JSON
424
443
  if (currentToolUse && streamEvent.delta.partial_json) {
@@ -440,25 +459,20 @@ export class ClaudeModelDefinition implements ModelDefinition<ClaudePrompt> {
440
459
  }
441
460
  break;
442
461
  case "signature_delta":
443
- // Signature deltas, signify the end of the thoughts.
462
+ // End of thinking block defer spacing until real text follows,
463
+ // so it doesn't leak when a tool call comes next.
444
464
  if (model_options?.include_thoughts) {
445
- return {
446
- result: [{ type: "text", value: '\n\n' }], // Double newline for more spacing
447
- } satisfies CompletionChunkObject;
465
+ pendingSpacing = true;
448
466
  }
449
467
  break;
450
468
  }
451
469
  break;
452
470
  case "content_block_stop":
453
- // Reset current tool use tracking when block ends
471
+ // Reset tool use tracking; spacing is handled via pendingSpacing
454
472
  if (currentToolUse) {
455
473
  currentToolUse = null;
456
- }
457
- // Handle the end of content blocks, for redacted thinking blocks
458
- if (model_options?.include_thoughts) {
459
- return {
460
- result: [{ type: "text", value: '\n\n' }] // Add double newline for spacing
461
- } satisfies CompletionChunkObject;
474
+ // Tool call followed thinking — discard any pending spacing so it doesn't leak
475
+ pendingSpacing = false;
462
476
  }
463
477
  break;
464
478
  }
@@ -707,12 +721,18 @@ export function mergeConsecutiveUserMessages(messages: MessageParam[]): MessageP
707
721
  * @param response
708
722
  * @returns
709
723
  */
710
- function updateConversation(conversation: ClaudePrompt | undefined | null, prompt: ClaudePrompt): ClaudePrompt {
724
+ export function updateConversation(conversation: ClaudePrompt | undefined | null, prompt: ClaudePrompt): ClaudePrompt {
711
725
  const baseSystemMessages = conversation?.system || [];
712
726
  const baseMessages = conversation?.messages || [];
713
727
  const system = baseSystemMessages.concat(prompt.system || []);
714
- // Merge consecutive user messages to ensure tool_result blocks are properly grouped
715
- const mergedMessages = mergeConsecutiveUserMessages(baseMessages.concat(prompt.messages || []));
728
+ // Sanitize first, then merge. Order matters: an empty assistant message (e.g. from interrupted
729
+ // streaming) between two tool-result user messages acts as a false separator. If merge runs
730
+ // first, those messages look non-consecutive and fixOrphanedToolUse injects a synthetic result
731
+ // into the first one; when sanitize later removes the empty assistant, the second user message
732
+ // ends up with an orphaned tool_result that Vertex AI rejects:
733
+ // "unexpected tool_use_id found in tool_result blocks".
734
+ const combined = sanitizeMessages(baseMessages.concat(prompt.messages || []));
735
+ const mergedMessages = mergeConsecutiveUserMessages(combined);
716
736
  return {
717
737
  messages: mergedMessages,
718
738
  system: system.length > 0 ? system : undefined // If system is empty, set to undefined
@@ -727,7 +747,7 @@ function updateConversation(conversation: ClaudePrompt | undefined | null, promp
727
747
  * - Filters out empty text blocks from each message's content
728
748
  * - Removes messages entirely if they have no content after filtering
729
749
  */
730
- function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
750
+ export function sanitizeMessages(messages: MessageParam[]): MessageParam[] {
731
751
  const result: MessageParam[] = [];
732
752
 
733
753
  for (const message of messages) {
@@ -852,6 +872,42 @@ interface RequestOptions {
852
872
  headers?: Record<string, string>;
853
873
  }
854
874
 
875
+ type ClaudeTool = NonNullable<MessageCreateParamsBase['tools']>[number];
876
+
877
+ function stripClaudeCacheControlFromMessages(messages: MessageParam[]): MessageParam[] {
878
+ return messages.map(message => {
879
+ if (typeof message.content === 'string') {
880
+ return message;
881
+ }
882
+
883
+ return {
884
+ ...message,
885
+ content: message.content.map(block => stripClaudeCacheControlFromBlock(block)),
886
+ };
887
+ });
888
+ }
889
+
890
+ function stripClaudeCacheControlFromBlock<T extends ContentBlockParam>(block: T): T {
891
+ const cloned = { ...block } as T & { cache_control?: unknown };
892
+ delete cloned.cache_control;
893
+ return cloned as T;
894
+ }
895
+
896
+ function stripClaudeCacheControlFromSystem(system?: TextBlockParam[]): TextBlockParam[] | undefined {
897
+ return system?.map(block => {
898
+ const { cache_control: _cacheControl, ...rest } = block as TextBlockParam & { cache_control?: unknown };
899
+ return rest as TextBlockParam;
900
+ });
901
+ }
902
+
903
+ function stripClaudeCacheControlFromTools(tools?: MessageCreateParamsBase['tools']): MessageCreateParamsBase['tools'] | undefined {
904
+ return tools?.map(tool => {
905
+ const cloned = { ...tool } as ClaudeTool & { cache_control?: unknown };
906
+ delete cloned.cache_control;
907
+ return cloned as ClaudeTool;
908
+ });
909
+ }
910
+
855
911
  function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { payload: MessageCreateParamsBase, requestOptions: RequestOptions | undefined } {
856
912
  const modelName = options.model; // Model name is already extracted in the calling methods
857
913
  const model_options = options.model_options as VertexAIClaudeOptions;
@@ -888,10 +944,45 @@ function getClaudePayload(options: ExecutionOptions, prompt: ClaudePrompt): { pa
888
944
  sanitizedMessages = convertClaudeToolBlocksToText(sanitizedMessages);
889
945
  }
890
946
 
947
+ sanitizedMessages = stripClaudeCacheControlFromMessages(sanitizedMessages);
948
+ const sanitizedSystem = stripClaudeCacheControlFromSystem(prompt.system);
949
+ const sanitizedTools = hasTools
950
+ ? stripClaudeCacheControlFromTools(options.tools as MessageCreateParamsBase['tools'])
951
+ : undefined;
952
+
953
+ // Prompt caching: use three breakpoints so stable system prompt, tool definitions,
954
+ // and the conversation history prefix can all be reused across calls.
955
+ const cacheEnabled = model_options?.cache_enabled === true;
956
+ if (cacheEnabled) {
957
+ const cacheTtl = model_options?.cache_ttl;
958
+ const cacheControl = { type: 'ephemeral' as const, ...(cacheTtl && { ttl: cacheTtl }) };
959
+
960
+ if (sanitizedSystem && sanitizedSystem.length > 0) {
961
+ const lastSystemBlock = sanitizedSystem[sanitizedSystem.length - 1] as TextBlockParam & { cache_control?: unknown };
962
+ lastSystemBlock.cache_control = cacheControl;
963
+ }
964
+
965
+ if (sanitizedTools && sanitizedTools.length > 0) {
966
+ const lastTool = sanitizedTools[sanitizedTools.length - 1] as ClaudeTool & { cache_control?: unknown };
967
+ lastTool.cache_control = cacheControl;
968
+ }
969
+
970
+ if (sanitizedMessages.length >= 4) {
971
+ const pivotMsg = sanitizedMessages[sanitizedMessages.length - 2];
972
+ if (Array.isArray(pivotMsg.content) && pivotMsg.content.length > 0) {
973
+ const lastBlock = pivotMsg.content[pivotMsg.content.length - 1];
974
+ if (typeof lastBlock === 'object' && lastBlock !== null &&
975
+ 'type' in lastBlock && lastBlock.type !== 'thinking' && lastBlock.type !== 'redacted_thinking') {
976
+ (lastBlock as TextBlockParam).cache_control = cacheControl;
977
+ }
978
+ }
979
+ }
980
+ }
981
+
891
982
  const payload = {
892
983
  messages: sanitizedMessages,
893
- system: prompt.system,
894
- tools: hasTools ? options.tools as MessageCreateParamsBase['tools'] : undefined,
984
+ system: sanitizedSystem,
985
+ tools: sanitizedTools,
895
986
  temperature: model_options?.temperature,
896
987
  model: modelName,
897
988
  max_tokens: maxToken(options),
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Unit tests for the Gemini conversation mutation bug fix.
3
+ *
4
+ * Bug: When tools=[] is passed but the conversation contains functionCall/functionResponse
5
+ * parts from prior turns, getGeminiPayload() was doing:
6
+ *
7
+ * prompt.contents = convertGeminiFunctionPartsToText(prompt.contents);
8
+ *
9
+ * Since prompt.contents is the same object reference as the caller's conversation array,
10
+ * this permanently corrupted the stored conversation with "[Tool call: ...]" text markers.
11
+ * On the next turn the model would see those markers in context and echo them as literal output.
12
+ *
13
+ * Fix: use a local `payloadContents` variable so the caller's conversation is never mutated.
14
+ */
15
+
16
+ import { ExecutionOptions } from '@llumiverse/core';
17
+ import { FinishReason } from '@google/genai';
18
+ import { describe, expect, it } from 'vitest';
19
+ import { VertexAIDriver } from '../index.js';
20
+ import { convertGeminiFunctionPartsToText, GeminiModelDefinition } from './gemini.js';
21
+
22
+ // ---------------------------------------------------------------------------
23
+ // Pure function tests — no driver needed
24
+ // ---------------------------------------------------------------------------
25
+
26
+ describe('convertGeminiFunctionPartsToText', () => {
27
+ it('does not mutate the input array', () => {
28
+ const original = [
29
+ {
30
+ role: 'model',
31
+ parts: [{ functionCall: { name: 'plan', args: { task: 'write tests' } } }],
32
+ },
33
+ {
34
+ role: 'user',
35
+ parts: [{ functionResponse: { name: 'plan', response: { output: 'done' } } }],
36
+ },
37
+ ];
38
+ const originalItemRefs = original.map(c => c);
39
+ const originalPartRefs = original.map(c => c.parts[0]);
40
+
41
+ const result = convertGeminiFunctionPartsToText(original);
42
+
43
+ // Result must be a different array
44
+ expect(result).not.toBe(original);
45
+ // Original items must be unchanged (same references, not mutated)
46
+ original.forEach((item, i) => {
47
+ expect(item).toBe(originalItemRefs[i]);
48
+ expect(item.parts[0]).toBe(originalPartRefs[i]);
49
+ });
50
+ // Original functionCall part must still be a functionCall, not text
51
+ expect(original[0].parts[0]).toHaveProperty('functionCall');
52
+ expect(original[0].parts[0]).not.toHaveProperty('text');
53
+ expect(original[1].parts[0]).toHaveProperty('functionResponse');
54
+ expect(original[1].parts[0]).not.toHaveProperty('text');
55
+ });
56
+
57
+ it('converts functionCall parts to the expected text format', () => {
58
+ const contents = [
59
+ {
60
+ role: 'model',
61
+ parts: [{ functionCall: { name: 'get_weather', args: { location: 'Paris' } } }],
62
+ },
63
+ ];
64
+
65
+ const result = convertGeminiFunctionPartsToText(contents);
66
+
67
+ expect(result[0].parts![0]).toEqual({
68
+ text: '[Tool call: get_weather({"location":"Paris"})]',
69
+ });
70
+ });
71
+
72
+ it('converts functionResponse parts to the expected text format', () => {
73
+ const contents = [
74
+ {
75
+ role: 'user',
76
+ parts: [{ functionResponse: { name: 'get_weather', response: { temperature: '15°C' } } }],
77
+ },
78
+ ];
79
+
80
+ const result = convertGeminiFunctionPartsToText(contents);
81
+
82
+ expect(result[0].parts![0]).toEqual({
83
+ text: '[Tool result for get_weather: {"temperature":"15°C"}]',
84
+ });
85
+ });
86
+
87
+ it('leaves non-function parts intact', () => {
88
+ const textPart = { text: 'Hello world' };
89
+ const contents = [{ role: 'user', parts: [textPart] }];
90
+
91
+ const result = convertGeminiFunctionPartsToText(contents);
92
+
93
+ expect(result[0].parts![0]).toBe(textPart);
94
+ });
95
+ });
96
+
97
+ // ---------------------------------------------------------------------------
98
+ // Integration-level tests — verify the driver does not mutate the conversation
99
+ // ---------------------------------------------------------------------------
100
+
101
+ function makeContentsWithFunctionParts() {
102
+ return [
103
+ { role: 'model', parts: [{ functionCall: { name: 'plan', args: { task: 'test' } } }] },
104
+ { role: 'user', parts: [{ functionResponse: { name: 'plan', response: { result: 'ok' } } }] },
105
+ ];
106
+ }
107
+
108
+ function makeDriver(overrides: { generateContent?: () => Promise<any>; generateContentStream?: () => Promise<AsyncIterable<any>> }) {
109
+ return {
110
+ logger: { warn: () => {}, info: () => {}, error: () => {} },
111
+ getGoogleGenAIClient: () => ({
112
+ models: {
113
+ generateContent: overrides.generateContent ?? (async () => ({})),
114
+ generateContentStream: overrides.generateContentStream ?? (async () => (async function* () {})()),
115
+ },
116
+ }),
117
+ } as unknown as VertexAIDriver;
118
+ }
119
+
120
+ const mockNonStreamingResponse = {
121
+ usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5, totalTokenCount: 15 },
122
+ candidates: [{
123
+ finishReason: FinishReason.STOP,
124
+ content: { role: 'model', parts: [{ text: 'Summary.' }] },
125
+ safetyRatings: [],
126
+ }],
127
+ };
128
+
129
+ const mockStreamingChunk = {
130
+ usageMetadata: { promptTokenCount: 10, candidatesTokenCount: 5, totalTokenCount: 15 },
131
+ candidates: [{
132
+ finishReason: FinishReason.STOP,
133
+ content: { role: 'model', parts: [{ text: 'Summary.' }] },
134
+ safetyRatings: [],
135
+ }],
136
+ };
137
+
138
+ describe('GeminiModelDefinition - no conversation mutation', () => {
139
+ it('requestTextCompletion: does not mutate prompt.contents when tools=[] and conversation has function parts', async () => {
140
+ const modelDef = new GeminiModelDefinition('gemini-2.0-flash');
141
+ const originalContents = makeContentsWithFunctionParts();
142
+ const contentsSnapshot = JSON.stringify(originalContents);
143
+
144
+ const driver = makeDriver({ generateContent: async () => mockNonStreamingResponse });
145
+ const prompt = { contents: originalContents, system: undefined } as any;
146
+ const options: ExecutionOptions = { model: 'publishers/google/models/gemini-2.0-flash', tools: [] };
147
+
148
+ await modelDef.requestTextCompletion(driver, prompt, options);
149
+
150
+ expect(JSON.stringify(originalContents)).toBe(contentsSnapshot);
151
+ expect(originalContents[0].parts[0]).toHaveProperty('functionCall');
152
+ expect(originalContents[1].parts[0]).toHaveProperty('functionResponse');
153
+ });
154
+
155
+ it('requestTextCompletionStream: does not mutate prompt.contents when tools=[] and conversation has function parts', async () => {
156
+ const modelDef = new GeminiModelDefinition('gemini-2.0-flash');
157
+ const originalContents = makeContentsWithFunctionParts();
158
+ const contentsSnapshot = JSON.stringify(originalContents);
159
+
160
+ const driver = makeDriver({
161
+ generateContentStream: async () => (async function* () { yield mockStreamingChunk; })(),
162
+ });
163
+ const prompt = { contents: originalContents, system: undefined } as any;
164
+ const options: ExecutionOptions = { model: 'publishers/google/models/gemini-2.0-flash', tools: [] };
165
+
166
+ const stream = await modelDef.requestTextCompletionStream(driver, prompt, options);
167
+ // Drain the stream to trigger all processing
168
+ for await (const _chunk of stream) { /* noop */ }
169
+
170
+ expect(JSON.stringify(originalContents)).toBe(contentsSnapshot);
171
+ expect(originalContents[0].parts[0]).toHaveProperty('functionCall');
172
+ expect(originalContents[1].parts[0]).toHaveProperty('functionResponse');
173
+ });
174
+ });