@contentgrowth/llm-service 0.8.3 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,609 +0,0 @@
1
- import { GoogleGenAI } from '@google/genai';
2
- import { BaseLLMProvider } from './base-provider.js';
3
- import { LLMServiceException } from '../../llm-service.js';
4
- import { extractJsonFromResponse } from '../json-utils.js';
5
-
6
- export class GeminiProvider extends BaseLLMProvider {
7
- constructor(config) {
8
- super(config);
9
-
10
- // Unified client for all operations (text, image, video)
11
- // Uses apiKey for Gemini, and automatically handles Vertex AI env vars for Veo
12
- this.client = new GoogleGenAI({
13
- apiKey: config.apiKey,
14
- });
15
-
16
- this.models = config.models;
17
- this.defaultModel = config.models.default;
18
-
19
- // Store pending operations for polling
20
- this._pendingOperations = new Map();
21
- }
22
-
23
-
24
- async chat(userMessage, systemPrompt = '', options = {}) {
25
- const messages = [{ role: 'user', content: userMessage }];
26
- const tier = options.tier || 'default';
27
- const effectiveModel = this._getModelForTier(tier);
28
- const effectiveMaxTokens = options.maxTokens || this.config.maxTokens;
29
- const effectiveTemperature = options.temperature !== undefined ? options.temperature : this.config.temperature;
30
-
31
- const response = await this._chatCompletionWithModel(
32
- messages,
33
- systemPrompt,
34
- null,
35
- effectiveModel,
36
- effectiveMaxTokens,
37
- effectiveTemperature
38
- );
39
- return { text: response.content };
40
- }
41
-
42
- async chatCompletion(messages, systemPrompt, tools = null, options = {}) {
43
- return this._chatCompletionWithModel(
44
- messages,
45
- systemPrompt,
46
- tools,
47
- this.defaultModel,
48
- this.config.maxTokens,
49
- this.config.temperature,
50
- options
51
- );
52
- }
53
-
54
- async _chatCompletionWithModel(messages, systemPrompt, tools, modelName, maxTokens, temperature, options = {}) {
55
- // Build generation config
56
- const generationConfig = {
57
- temperature: options.temperature ?? temperature,
58
- maxOutputTokens: options.maxTokens ?? maxTokens,
59
- };
60
-
61
- // Add JSON mode support
62
- if (options.responseFormat) {
63
- const formatConfig = this._buildGenerationConfig(options, maxTokens, temperature);
64
- Object.assign(generationConfig, formatConfig);
65
- }
66
-
67
- // Pre-process messages to handle the 'system' role for Gemini
68
- const geminiMessages = [];
69
- let systemContentBuffer = [];
70
-
71
- for (const msg of messages) {
72
- if (msg.role === 'system') {
73
- systemContentBuffer.push(msg.content);
74
- } else {
75
- if (msg.role === 'user' && systemContentBuffer.length > 0) {
76
- const fullContent = `${systemContentBuffer.join('\n')}\n\n${msg.content}`;
77
- geminiMessages.push({ ...msg, content: fullContent });
78
- systemContentBuffer = [];
79
- } else {
80
- geminiMessages.push(msg);
81
- }
82
- }
83
- }
84
-
85
- const contents = geminiMessages.map((msg, index) => {
86
- let role = '';
87
- let parts;
88
-
89
- switch (msg.role) {
90
- case 'user':
91
- role = 'user';
92
- parts = [{ text: msg.content }];
93
-
94
- // Enhancement: If this is the LAST message (current turn), append the reminder.
95
- // This helps the model respect the system prompt (especially format) even with long context history.
96
- if (index === geminiMessages.length - 1) {
97
- let reminder = "";
98
- if (options.responseFormat === 'json' || options.responseFormat?.type === 'json_schema' || options.responseSchema) {
99
- reminder = "\n\n[SYSTEM NOTE: The output MUST be valid JSON as per the schema. Do not include markdown formatting or explanations.]";
100
- } else {
101
- reminder = "\n\n[SYSTEM NOTE: Please ensure your response adheres strictly to the constraints defined in the System Prompt.]";
102
- }
103
-
104
- // Append to the existing text part (Safest method)
105
- const lastPart = parts.find(p => p.text);
106
- if (lastPart) {
107
- lastPart.text += reminder;
108
- } else {
109
- // Fallback if message was image-only
110
- parts.push({ text: reminder });
111
- }
112
- }
113
- break;
114
- case 'assistant':
115
- role = 'model';
116
-
117
- // Find if this is the LAST assistant message in the conversation
118
- // Text messages: only the last one should carry thought_signature (cumulative state)
119
- // Tool calls: ALL must carry their signatures (model requirement for function calls)
120
- const isLastAssistantMessage = index === geminiMessages.map((m, i) => m.role === 'assistant' ? i : -1).filter(i => i >= 0).pop();
121
-
122
- if (msg.tool_calls) {
123
- parts = msg.tool_calls.map(tc => {
124
- const part = {
125
- functionCall: { name: tc.function.name, args: tc.function.arguments || tc.function.args }
126
- };
127
- // IMPORTANT: Always attach signatures for ALL tool calls in history
128
- // The model requires thought_signature on every functionCall part
129
- if (tc.thought_signature) {
130
- console.log(`[GeminiProvider] Sending thought_signature in tool_call (${tc.thought_signature.length} chars)`);
131
- part.thoughtSignature = tc.thought_signature; // camelCase for SDK
132
- }
133
- return part;
134
- });
135
- } else {
136
- // Handle text content with optional thought signature
137
- const part = { text: msg.content || '' };
138
- // Only attach signature for the last assistant message (text messages only)
139
- if (isLastAssistantMessage && msg.thought_signature) {
140
- console.log(`[GeminiProvider] Sending thought_signature in text message (${msg.thought_signature.length} chars)`);
141
- part.thoughtSignature = msg.thought_signature;
142
- }
143
- parts = [part];
144
- }
145
- break;
146
- case 'tool':
147
- role = 'user';
148
- const preceding_message = messages[index - 1];
149
- const tool_call = preceding_message?.tool_calls?.find(tc => tc.id === msg.tool_call_id);
150
- parts = [{
151
- functionResponse: {
152
- name: tool_call?.function?.name || 'unknown_tool',
153
- response: { content: msg.content },
154
- }
155
- }];
156
-
157
- // Fix for JSON mode: If JSON is requested, remind the model to output JSON after tool execution
158
- // This is necessary because strict JSON mode is disabled when tools are present.
159
- if (options.responseFormat === 'json' || options.responseFormat?.type === 'json_schema' || options.responseSchema) {
160
- parts.push({ text: "\n\n[SYSTEM NOTE: The output MUST be valid JSON as per the schema. Do not include markdown formatting or explanations.]" });
161
- } else {
162
- // Generic reminder to help model stay on track with system prompt instructions (e.g. formatting)
163
- // even if no specific JSON mode is configured.
164
- parts.push({ text: "\n\n[SYSTEM NOTE: Please ensure your response adheres strictly to the constraints defined in the System Prompt.]" });
165
- }
166
- break;
167
- default:
168
- return null;
169
- }
170
- return { role, parts };
171
- }).filter(Boolean);
172
-
173
- while (contents.length > 0 && contents[0].role !== 'user') {
174
- contents.shift();
175
- }
176
-
177
- if (contents.length === 0) {
178
- throw new LLMServiceException('Cannot process a conversation with no user messages.', 400);
179
- }
180
-
181
- // Use the new @google/genai API
182
- // Use the new @google/genai API
183
- const requestOptions = {
184
- model: modelName,
185
- contents: contents,
186
- config: generationConfig,
187
- };
188
-
189
- if (systemPrompt) {
190
- requestOptions.config.systemInstruction = { parts: [{ text: systemPrompt }] };
191
- }
192
-
193
- if (tools && tools.length > 0) {
194
- requestOptions.config.tools = [{ functionDeclarations: tools.map(t => t.function) }];
195
- // CRITICAL: Cannot enforce JSON mode (responseMimeType/responseSchema) when tools are present
196
- // because the model needs to be able to return tool calls (which are not JSON text).
197
- // We must rely on the system prompt for JSON formatting in this case.
198
- if (requestOptions.config.responseMimeType === 'application/json') {
199
- console.warn('[GeminiProvider] Disabling strict JSON mode because tools are present. Relying on system prompt.');
200
- delete requestOptions.config.responseMimeType;
201
- delete requestOptions.config.responseSchema;
202
- }
203
- }
204
-
205
- // console.log('[GeminiProvider] generateContent request:', JSON.stringify(requestOptions, null, 2));
206
-
207
- let response;
208
- try {
209
- response = await this.client.models.generateContent(requestOptions);
210
- } catch (error) {
211
- console.error(`[GeminiProvider] generateContent failed (API Key: ${this._getMaskedApiKey()}):`, error);
212
- throw error;
213
- }
214
-
215
- // In @google/genai, the response is returned directly (no .response property)
216
- // And helper methods like .text() or .functionCalls() might not exist on the raw object
217
- // So we extract manually from candidates
218
-
219
- const candidate = response.candidates?.[0];
220
- if (!candidate) {
221
- throw new LLMServiceException('No candidates returned from model', 500);
222
- }
223
-
224
- const parts = candidate.content?.parts || [];
225
-
226
- // Extract text, function calls, and thought signatures
227
- let textContent = '';
228
- let toolCalls = null;
229
- let responseThoughtSignature = null;
230
-
231
- for (const part of parts) {
232
- if (part.text) {
233
- textContent += part.text;
234
- // Capture thought signature attached to text part if present
235
- if (part.thought_signature || part.thoughtSignature) {
236
- responseThoughtSignature = part.thought_signature || part.thoughtSignature;
237
- }
238
- }
239
- if (part.functionCall) {
240
- if (!toolCalls) toolCalls = [];
241
- // Preserve thought_signature if present (Gemini 3 requirement)
242
- // Check both snake_case (API) and camelCase (SDK convention)
243
- const sig = part.thought_signature || part.thoughtSignature;
244
- if (sig) {
245
- part.functionCall.thought_signature = sig;
246
- // Also capture as top-level if not already set (though tool calls might have their own)
247
- if (!responseThoughtSignature) responseThoughtSignature = sig;
248
- }
249
- toolCalls.push(part.functionCall);
250
- }
251
- // Fallback for standalone thought signature parts if they exist (hypothetical)
252
- if (!part.text && !part.functionCall && (part.thought_signature || part.thoughtSignature)) {
253
- responseThoughtSignature = part.thought_signature || part.thoughtSignature;
254
- }
255
- }
256
-
257
- // Validate that we have EITHER content OR tool calls
258
- if (!textContent && (!toolCalls || toolCalls.length === 0)) {
259
- console.error('[GeminiProvider] Model returned empty response (no text, no tool calls)');
260
- console.error('[GeminiProvider] Finish Reason:', candidate.finishReason);
261
- console.error('[GeminiProvider] Safety Ratings:', JSON.stringify(candidate.safetyRatings, null, 2));
262
- console.error('[GeminiProvider] Full Candidate:', JSON.stringify(candidate, null, 2));
263
-
264
- throw new LLMServiceException(
265
- `Model returned empty response. Finish Reason: ${candidate.finishReason}.`,
266
- 500
267
- );
268
- }
269
-
270
- // Detailed logging as requested
271
- // console.log('[GeminiProvider] generateContent response candidate:', JSON.stringify(candidate, null, 2));
272
-
273
- // console.log('Gemini returns:', textContent);
274
- // Return with parsed JSON if applicable
275
- // Normalize the finish reason to standard value for consistent handling
276
- const normalizedFinishReason = this.normalizeFinishReason(candidate.finishReason);
277
-
278
- return {
279
- content: textContent,
280
- thought_signature: responseThoughtSignature, // Return signature to caller
281
- tool_calls: toolCalls ? (Array.isArray(toolCalls) ? toolCalls : [toolCalls]).map(fc => ({
282
- type: 'function',
283
- function: fc,
284
- thought_signature: fc.thought_signature
285
- })) : null,
286
- finishReason: normalizedFinishReason, // Standardized: 'completed', 'truncated', etc.
287
- _rawFinishReason: candidate.finishReason, // Keep original for debugging
288
- _responseFormat: options.responseFormat,
289
- ...(options.responseFormat && this._shouldAutoParse(options) ? {
290
- parsedContent: this._safeJsonParse(textContent)
291
- } : {})
292
- };
293
- }
294
-
295
-
296
- _buildGenerationConfig(options, maxTokens, temperature) {
297
- const config = {
298
- temperature: options.temperature ?? temperature,
299
- maxOutputTokens: options.maxTokens ?? maxTokens,
300
- };
301
-
302
- // Handle responseFormat as an object with type and schema properties
303
- if (options.responseFormat) {
304
- const formatType = typeof options.responseFormat === 'string'
305
- ? options.responseFormat
306
- : options.responseFormat.type;
307
-
308
- const schema = typeof options.responseFormat === 'object'
309
- ? options.responseFormat.schema
310
- : options.responseSchema || null;
311
-
312
- if (formatType === 'json' || formatType === 'json_schema') {
313
- config.responseMimeType = 'application/json';
314
-
315
- // CRITICAL: Must provide schema for "Strict Mode" to avoid markdown wrappers
316
- if (schema) {
317
- // Use responseSchema for strict structured output
318
- // Must convert to Gemini Schema format (Uppercase types)
319
- config.responseSchema = this._convertToGeminiSchema(schema);
320
- // console.log('[GeminiProvider] Using Strict JSON mode with schema (responseSchema)');
321
- } else {
322
- console.warn('[GeminiProvider] Using legacy JSON mode without schema - may produce markdown wrappers');
323
- }
324
- }
325
- }
326
-
327
- return config;
328
- }
329
-
330
- _convertToGeminiSchema(jsonSchema) {
331
- const convertType = (type) => {
332
- switch (type) {
333
- case 'string': return 'STRING';
334
- case 'number': return 'NUMBER';
335
- case 'integer': return 'INTEGER';
336
- case 'boolean': return 'BOOLEAN';
337
- case 'array': return 'ARRAY';
338
- case 'object': return 'OBJECT';
339
- default: return 'STRING';
340
- }
341
- };
342
-
343
- const convert = (schema) => {
344
- const result = {
345
- type: convertType(schema.type),
346
- };
347
-
348
- if (schema.properties) {
349
- result.properties = {};
350
- for (const [key, value] of Object.entries(schema.properties)) {
351
- result.properties[key] = convert(value);
352
- }
353
- }
354
-
355
- if (schema.items) {
356
- result.items = convert(schema.items);
357
- }
358
-
359
- if (schema.required) {
360
- result.required = schema.required;
361
- }
362
-
363
- if (schema.nullable) {
364
- result.nullable = schema.nullable;
365
- }
366
-
367
- if (schema.description) {
368
- result.description = schema.description;
369
- }
370
-
371
- return result;
372
- };
373
-
374
- return convert(jsonSchema);
375
- }
376
-
377
- _shouldAutoParse(options) {
378
- return options.autoParse !== false; // Default true
379
- }
380
-
381
- _safeJsonParse(content) {
382
- if (!content) return null;
383
-
384
- // Use the robust JSON extractor that handles:
385
- // - Markdown code blocks (```json ... ```)
386
- // - Plain JSON objects
387
- // - Over-escaped content (\\\\n instead of \\n)
388
- // - Brace extraction as fallback
389
- const parsed = extractJsonFromResponse(content);
390
-
391
- if (!parsed) {
392
- console.error('[GeminiProvider] Failed to extract valid JSON from response');
393
- console.error('[GeminiProvider] Content preview:', content.substring(0, 200));
394
- }
395
-
396
- return parsed;
397
- }
398
-
399
- async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {
400
- const toolResults = await Promise.all(
401
- tool_calls.map(async (toolCall, index) => {
402
- const toolName = toolCall.function.name;
403
- const tool = toolImplementations[toolName];
404
- const tool_call_id = `gemini-tool-call-${index}`;
405
- toolCall.id = tool_call_id;
406
-
407
- // console.log(`[Tool Call] ${toolName} with arguments:`, toolCall.function.args);
408
-
409
- if (!tool) {
410
- console.error(`[Tool Error] Tool '${toolName}' not found`);
411
- return { tool_call_id, output: JSON.stringify({ error: `Tool '${toolName}' not found.` }) };
412
- }
413
- try {
414
- const output = await tool(toolCall.function.args, { env, tenantId });
415
- // console.log(`[Tool Result] ${toolName} returned:`, output.substring(0, 200) + (output.length > 200 ? '...' : ''));
416
- return { tool_call_id, output };
417
- } catch (error) {
418
- console.error(`[Tool Error] ${toolName} failed:`, error.message);
419
- return { tool_call_id, output: JSON.stringify({ error: `Error executing tool '${toolName}': ${error.message}` }) };
420
- }
421
- })
422
- );
423
- toolResults.forEach(result => messages.push({ role: 'tool', tool_call_id: result.tool_call_id, content: result.output }));
424
- }
425
-
426
- async imageGeneration(prompt, systemPrompt, options = {}) {
427
- // Allow model override via options.model, otherwise use default from config
428
- const modelName = options.model || this.models.image || 'gemini-3-pro-image-preview';
429
- console.log(`[GeminiProvider] Generating image with model: ${modelName}`);
430
-
431
- const generationConfig = {
432
- responseModalities: ["IMAGE"],
433
- };
434
-
435
- if (options.aspectRatio) {
436
- generationConfig.imageConfig = {
437
- aspectRatio: options.aspectRatio
438
- };
439
- }
440
-
441
- const parts = [{ text: prompt }];
442
-
443
- if (options.images && options.images.length > 0) {
444
- options.images.forEach(img => {
445
- parts.push({
446
- inlineData: {
447
- data: img.data,
448
- mimeType: img.mimeType
449
- }
450
- });
451
- });
452
- }
453
-
454
- // Use the new @google/genai API
455
- const requestOptions = {
456
- model: modelName,
457
- contents: [{
458
- role: "user",
459
- parts: parts
460
- }],
461
- config: generationConfig
462
- };
463
-
464
- if (systemPrompt) {
465
- requestOptions.config.systemInstruction = { parts: [{ text: systemPrompt }] };
466
- }
467
-
468
- // console.log('[GeminiProvider] imageGeneration request:', JSON.stringify(requestOptions, null, 2));
469
-
470
- const response = await this.client.models.generateContent(requestOptions);
471
-
472
- const imagePart = response.candidates?.[0]?.content?.parts?.find(
473
- part => part.inlineData && part.inlineData.mimeType?.startsWith('image/')
474
- );
475
-
476
- if (!imagePart || !imagePart.inlineData) {
477
- // Fallback: Check if it returned a URI or other format, or just text
478
- const textPart = response.candidates?.[0]?.content?.parts?.find(p => p.text);
479
- const candidate = response.candidates?.[0];
480
-
481
- console.error('[GeminiProvider] Image generation failed (no image data)');
482
- if (candidate) {
483
- console.error('[GeminiProvider] Finish Reason:', candidate.finishReason);
484
- console.error('[GeminiProvider] Safety Ratings:', JSON.stringify(candidate.safetyRatings, null, 2));
485
- console.error('[GeminiProvider] Full Candidate:', JSON.stringify(candidate, null, 2));
486
- }
487
-
488
- if (textPart) {
489
- console.warn('[GeminiProvider] Model returned text instead of image:', textPart.text);
490
- }
491
- throw new Error(`No image data in response. Finish Reason: ${candidate?.finishReason}`);
492
- }
493
-
494
- // Check for thought signature in the image part or any other part
495
- let thoughtSignature = null;
496
- if (imagePart.thought_signature || imagePart.thoughtSignature) {
497
- thoughtSignature = imagePart.thought_signature || imagePart.thoughtSignature;
498
- } else {
499
- // Check other parts for standalone thought signature
500
- const signaturePart = response.candidates?.[0]?.content?.parts?.find(p => p.thought_signature || p.thoughtSignature);
501
- if (signaturePart) {
502
- thoughtSignature = signaturePart.thought_signature || signaturePart.thoughtSignature;
503
- }
504
- }
505
-
506
- // Safety: If thought signature is abnormally large (>50KB), replace with bypass token
507
- // to prevent massive context usage (User reported 1.5MB signatures in some cases).
508
- if (thoughtSignature && thoughtSignature.length > 50000) {
509
- console.warn(`[GeminiProvider] ⚠️ Thought signature is abnormally large (${thoughtSignature.length} chars). Replacing with bypass token to save context.`);
510
- thoughtSignature = "skip_thought_signature_validator";
511
- }
512
-
513
- return {
514
- imageData: imagePart.inlineData.data,
515
- mimeType: imagePart.inlineData.mimeType,
516
- thought_signature: thoughtSignature
517
- };
518
- }
519
-
520
- _getModelForTier(tier) {
521
- return this.models[tier] || this.models.default;
522
- }
523
-
524
- async startVideoGeneration(prompt, images, modelName, systemPrompt, options = {}) {
525
- // Use unified client for video generation
526
- // Prepend system prompt to user prompt if provided, as video models often expect instructions in the prompt
527
- const effectivePrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
528
-
529
- const requestConfig = {
530
- model: modelName,
531
- prompt: effectivePrompt,
532
- config: {
533
- durationSeconds: options.durationSeconds || 6,
534
- aspectRatio: options.aspectRatio || '16:9',
535
- numberOfVideos: 1,
536
- // Pass reference images if provided
537
- ...(images && images.length > 0 ? { referenceImages: images } : {}),
538
- }
539
- };
540
-
541
- // Create a loggable copy of the config
542
- const logConfig = JSON.parse(JSON.stringify(requestConfig));
543
- if (logConfig.config && logConfig.config.referenceImages) {
544
- logConfig.config.referenceImages = logConfig.config.referenceImages.map(img => ({
545
- ...img,
546
- data: `... (${img.data ? img.data.length : 0} bytes)` // Summarize data
547
- }));
548
- }
549
-
550
- console.log('[GeminiProvider] startVideoGeneration request:', JSON.stringify(logConfig, null, 2));
551
-
552
- try {
553
- const operation = await this.client.models.generateVideos(requestConfig);
554
-
555
- // Store operation for later polling
556
- this._pendingOperations.set(operation.name, operation);
557
-
558
- return { operationName: operation.name };
559
- } catch (error) {
560
- console.error(`[GeminiProvider] startVideoGeneration failed (API Key: ${this._getMaskedApiKey()}):`, error);
561
- throw error;
562
- }
563
- }
564
-
565
- async getVideoGenerationStatus(operationName) {
566
- console.log(`[GeminiProvider] Checking status for operation: ${operationName}`);
567
-
568
- // Get the operation from cache or fetch it
569
- let operation = this._pendingOperations.get(operationName);
570
-
571
- if (!operation) {
572
- // If not in cache, we need to fetch it by name
573
- operation = await this.client.models.getOperation(operationName);
574
- }
575
-
576
- // Refresh status
577
- operation = await operation.get();
578
-
579
- // Update cache
580
- this._pendingOperations.set(operationName, operation);
581
-
582
- const result = {
583
- done: operation.done,
584
- progress: operation.metadata?.progressPercent || 0,
585
- state: operation.metadata?.state || (operation.done ? 'COMPLETED' : 'PROCESSING'),
586
- };
587
-
588
- console.log(`[GeminiProvider] Operation status: ${result.state}, Progress: ${result.progress}%`);
589
-
590
- if (operation.done) {
591
- // Clean up from cache
592
- this._pendingOperations.delete(operationName);
593
-
594
- if (operation.error) {
595
- console.error('[GeminiProvider] Video generation failed:', JSON.stringify(operation.error, null, 2));
596
- result.error = operation.error;
597
- } else {
598
- const videoResult = operation.response;
599
- // Extract video URI from response
600
- result.videoUri = videoResult.videos?.[0]?.gcsUri ||
601
- videoResult.uri ||
602
- (videoResult.generatedAssets?.[0]?.uri);
603
- result.content = "Video generation completed.";
604
- }
605
- }
606
-
607
- return result;
608
- }
609
- }