@contentgrowth/llm-service 0.8.2 → 0.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,607 +0,0 @@
1
- import { GoogleGenAI } from '@google/genai';
2
- import { BaseLLMProvider } from './base-provider.js';
3
- import { LLMServiceException } from '../../llm-service.js';
4
- import { extractJsonFromResponse } from '../json-utils.js';
5
-
6
- export class GeminiProvider extends BaseLLMProvider {
7
- constructor(config) {
8
- super(config);
9
-
10
- // Unified client for all operations (text, image, video)
11
- // Uses apiKey for Gemini, and automatically handles Vertex AI env vars for Veo
12
- this.client = new GoogleGenAI({
13
- apiKey: config.apiKey,
14
- });
15
-
16
- this.models = config.models;
17
- this.defaultModel = config.models.default;
18
-
19
- // Store pending operations for polling
20
- this._pendingOperations = new Map();
21
- }
22
-
23
-
24
- async chat(userMessage, systemPrompt = '', options = {}) {
25
- const messages = [{ role: 'user', content: userMessage }];
26
- const tier = options.tier || 'default';
27
- const effectiveModel = this._getModelForTier(tier);
28
- const effectiveMaxTokens = options.maxTokens || this.config.maxTokens;
29
- const effectiveTemperature = options.temperature !== undefined ? options.temperature : this.config.temperature;
30
-
31
- const response = await this._chatCompletionWithModel(
32
- messages,
33
- systemPrompt,
34
- null,
35
- effectiveModel,
36
- effectiveMaxTokens,
37
- effectiveTemperature
38
- );
39
- return { text: response.content };
40
- }
41
-
42
- async chatCompletion(messages, systemPrompt, tools = null, options = {}) {
43
- return this._chatCompletionWithModel(
44
- messages,
45
- systemPrompt,
46
- tools,
47
- this.defaultModel,
48
- this.config.maxTokens,
49
- this.config.temperature,
50
- options
51
- );
52
- }
53
-
54
- async _chatCompletionWithModel(messages, systemPrompt, tools, modelName, maxTokens, temperature, options = {}) {
55
- // Build generation config
56
- const generationConfig = {
57
- temperature: options.temperature ?? temperature,
58
- maxOutputTokens: options.maxTokens ?? maxTokens,
59
- };
60
-
61
- // Add JSON mode support
62
- if (options.responseFormat) {
63
- const formatConfig = this._buildGenerationConfig(options, maxTokens, temperature);
64
- Object.assign(generationConfig, formatConfig);
65
- }
66
-
67
- // Pre-process messages to handle the 'system' role for Gemini
68
- const geminiMessages = [];
69
- let systemContentBuffer = [];
70
-
71
- for (const msg of messages) {
72
- if (msg.role === 'system') {
73
- systemContentBuffer.push(msg.content);
74
- } else {
75
- if (msg.role === 'user' && systemContentBuffer.length > 0) {
76
- const fullContent = `${systemContentBuffer.join('\n')}\n\n${msg.content}`;
77
- geminiMessages.push({ ...msg, content: fullContent });
78
- systemContentBuffer = [];
79
- } else {
80
- geminiMessages.push(msg);
81
- }
82
- }
83
- }
84
-
85
- const contents = geminiMessages.map((msg, index) => {
86
- let role = '';
87
- let parts;
88
-
89
- switch (msg.role) {
90
- case 'user':
91
- role = 'user';
92
- parts = [{ text: msg.content }];
93
-
94
- // Enhancement: If this is the LAST message (current turn), append the reminder.
95
- // This helps the model respect the system prompt (especially format) even with long context history.
96
- if (index === geminiMessages.length - 1) {
97
- let reminder = "";
98
- if (options.responseFormat === 'json' || options.responseFormat?.type === 'json_schema' || options.responseSchema) {
99
- reminder = "\n\n[SYSTEM NOTE: The output MUST be valid JSON as per the schema. Do not include markdown formatting or explanations.]";
100
- } else {
101
- reminder = "\n\n[SYSTEM NOTE: Please ensure your response adheres strictly to the constraints defined in the System Prompt.]";
102
- }
103
-
104
- // Append to the existing text part (Safest method)
105
- const lastPart = parts.find(p => p.text);
106
- if (lastPart) {
107
- lastPart.text += reminder;
108
- } else {
109
- // Fallback if message was image-only
110
- parts.push({ text: reminder });
111
- }
112
- }
113
- break;
114
- case 'assistant':
115
- role = 'model';
116
-
117
- // Find if this is the LAST assistant message in the conversation
118
- // Only the last assistant message should carry the thought_signature to avoid token bloat
119
- const isLastAssistantMessage = index === geminiMessages.map((m, i) => m.role === 'assistant' ? i : -1).filter(i => i >= 0).pop();
120
-
121
- if (msg.tool_calls) {
122
- parts = msg.tool_calls.map(tc => {
123
- const part = {
124
- functionCall: { name: tc.function.name, args: tc.function.arguments || tc.function.args }
125
- };
126
- // Only attach signature for the last assistant message
127
- if (isLastAssistantMessage && tc.thought_signature) {
128
- console.log(`[GeminiProvider] Sending thought_signature in tool_call (${tc.thought_signature.length} chars)`);
129
- part.thoughtSignature = tc.thought_signature; // camelCase for SDK
130
- }
131
- return part;
132
- });
133
- } else {
134
- // Handle text content with optional thought signature
135
- const part = { text: msg.content || '' };
136
- // Only attach signature for the last assistant message
137
- if (isLastAssistantMessage && msg.thought_signature) {
138
- console.log(`[GeminiProvider] Sending thought_signature in text message (${msg.thought_signature.length} chars)`);
139
- part.thoughtSignature = msg.thought_signature;
140
- }
141
- parts = [part];
142
- }
143
- break;
144
- case 'tool':
145
- role = 'user';
146
- const preceding_message = messages[index - 1];
147
- const tool_call = preceding_message?.tool_calls?.find(tc => tc.id === msg.tool_call_id);
148
- parts = [{
149
- functionResponse: {
150
- name: tool_call?.function?.name || 'unknown_tool',
151
- response: { content: msg.content },
152
- }
153
- }];
154
-
155
- // Fix for JSON mode: If JSON is requested, remind the model to output JSON after tool execution
156
- // This is necessary because strict JSON mode is disabled when tools are present.
157
- if (options.responseFormat === 'json' || options.responseFormat?.type === 'json_schema' || options.responseSchema) {
158
- parts.push({ text: "\n\n[SYSTEM NOTE: The output MUST be valid JSON as per the schema. Do not include markdown formatting or explanations.]" });
159
- } else {
160
- // Generic reminder to help model stay on track with system prompt instructions (e.g. formatting)
161
- // even if no specific JSON mode is configured.
162
- parts.push({ text: "\n\n[SYSTEM NOTE: Please ensure your response adheres strictly to the constraints defined in the System Prompt.]" });
163
- }
164
- break;
165
- default:
166
- return null;
167
- }
168
- return { role, parts };
169
- }).filter(Boolean);
170
-
171
- while (contents.length > 0 && contents[0].role !== 'user') {
172
- contents.shift();
173
- }
174
-
175
- if (contents.length === 0) {
176
- throw new LLMServiceException('Cannot process a conversation with no user messages.', 400);
177
- }
178
-
179
- // Use the new @google/genai API
180
- // Use the new @google/genai API
181
- const requestOptions = {
182
- model: modelName,
183
- contents: contents,
184
- config: generationConfig,
185
- };
186
-
187
- if (systemPrompt) {
188
- requestOptions.config.systemInstruction = { parts: [{ text: systemPrompt }] };
189
- }
190
-
191
- if (tools && tools.length > 0) {
192
- requestOptions.config.tools = [{ functionDeclarations: tools.map(t => t.function) }];
193
- // CRITICAL: Cannot enforce JSON mode (responseMimeType/responseSchema) when tools are present
194
- // because the model needs to be able to return tool calls (which are not JSON text).
195
- // We must rely on the system prompt for JSON formatting in this case.
196
- if (requestOptions.config.responseMimeType === 'application/json') {
197
- console.warn('[GeminiProvider] Disabling strict JSON mode because tools are present. Relying on system prompt.');
198
- delete requestOptions.config.responseMimeType;
199
- delete requestOptions.config.responseSchema;
200
- }
201
- }
202
-
203
- // console.log('[GeminiProvider] generateContent request:', JSON.stringify(requestOptions, null, 2));
204
-
205
- let response;
206
- try {
207
- response = await this.client.models.generateContent(requestOptions);
208
- } catch (error) {
209
- console.error(`[GeminiProvider] generateContent failed (API Key: ${this._getMaskedApiKey()}):`, error);
210
- throw error;
211
- }
212
-
213
- // In @google/genai, the response is returned directly (no .response property)
214
- // And helper methods like .text() or .functionCalls() might not exist on the raw object
215
- // So we extract manually from candidates
216
-
217
- const candidate = response.candidates?.[0];
218
- if (!candidate) {
219
- throw new LLMServiceException('No candidates returned from model', 500);
220
- }
221
-
222
- const parts = candidate.content?.parts || [];
223
-
224
- // Extract text, function calls, and thought signatures
225
- let textContent = '';
226
- let toolCalls = null;
227
- let responseThoughtSignature = null;
228
-
229
- for (const part of parts) {
230
- if (part.text) {
231
- textContent += part.text;
232
- // Capture thought signature attached to text part if present
233
- if (part.thought_signature || part.thoughtSignature) {
234
- responseThoughtSignature = part.thought_signature || part.thoughtSignature;
235
- }
236
- }
237
- if (part.functionCall) {
238
- if (!toolCalls) toolCalls = [];
239
- // Preserve thought_signature if present (Gemini 3 requirement)
240
- // Check both snake_case (API) and camelCase (SDK convention)
241
- const sig = part.thought_signature || part.thoughtSignature;
242
- if (sig) {
243
- part.functionCall.thought_signature = sig;
244
- // Also capture as top-level if not already set (though tool calls might have their own)
245
- if (!responseThoughtSignature) responseThoughtSignature = sig;
246
- }
247
- toolCalls.push(part.functionCall);
248
- }
249
- // Fallback for standalone thought signature parts if they exist (hypothetical)
250
- if (!part.text && !part.functionCall && (part.thought_signature || part.thoughtSignature)) {
251
- responseThoughtSignature = part.thought_signature || part.thoughtSignature;
252
- }
253
- }
254
-
255
- // Validate that we have EITHER content OR tool calls
256
- if (!textContent && (!toolCalls || toolCalls.length === 0)) {
257
- console.error('[GeminiProvider] Model returned empty response (no text, no tool calls)');
258
- console.error('[GeminiProvider] Finish Reason:', candidate.finishReason);
259
- console.error('[GeminiProvider] Safety Ratings:', JSON.stringify(candidate.safetyRatings, null, 2));
260
- console.error('[GeminiProvider] Full Candidate:', JSON.stringify(candidate, null, 2));
261
-
262
- throw new LLMServiceException(
263
- `Model returned empty response. Finish Reason: ${candidate.finishReason}.`,
264
- 500
265
- );
266
- }
267
-
268
- // Detailed logging as requested
269
- // console.log('[GeminiProvider] generateContent response candidate:', JSON.stringify(candidate, null, 2));
270
-
271
- // console.log('Gemini returns:', textContent);
272
- // Return with parsed JSON if applicable
273
- // Normalize the finish reason to standard value for consistent handling
274
- const normalizedFinishReason = this.normalizeFinishReason(candidate.finishReason);
275
-
276
- return {
277
- content: textContent,
278
- thought_signature: responseThoughtSignature, // Return signature to caller
279
- tool_calls: toolCalls ? (Array.isArray(toolCalls) ? toolCalls : [toolCalls]).map(fc => ({
280
- type: 'function',
281
- function: fc,
282
- thought_signature: fc.thought_signature
283
- })) : null,
284
- finishReason: normalizedFinishReason, // Standardized: 'completed', 'truncated', etc.
285
- _rawFinishReason: candidate.finishReason, // Keep original for debugging
286
- _responseFormat: options.responseFormat,
287
- ...(options.responseFormat && this._shouldAutoParse(options) ? {
288
- parsedContent: this._safeJsonParse(textContent)
289
- } : {})
290
- };
291
- }
292
-
293
-
294
- _buildGenerationConfig(options, maxTokens, temperature) {
295
- const config = {
296
- temperature: options.temperature ?? temperature,
297
- maxOutputTokens: options.maxTokens ?? maxTokens,
298
- };
299
-
300
- // Handle responseFormat as an object with type and schema properties
301
- if (options.responseFormat) {
302
- const formatType = typeof options.responseFormat === 'string'
303
- ? options.responseFormat
304
- : options.responseFormat.type;
305
-
306
- const schema = typeof options.responseFormat === 'object'
307
- ? options.responseFormat.schema
308
- : options.responseSchema || null;
309
-
310
- if (formatType === 'json' || formatType === 'json_schema') {
311
- config.responseMimeType = 'application/json';
312
-
313
- // CRITICAL: Must provide schema for "Strict Mode" to avoid markdown wrappers
314
- if (schema) {
315
- // Use responseSchema for strict structured output
316
- // Must convert to Gemini Schema format (Uppercase types)
317
- config.responseSchema = this._convertToGeminiSchema(schema);
318
- // console.log('[GeminiProvider] Using Strict JSON mode with schema (responseSchema)');
319
- } else {
320
- console.warn('[GeminiProvider] Using legacy JSON mode without schema - may produce markdown wrappers');
321
- }
322
- }
323
- }
324
-
325
- return config;
326
- }
327
-
328
- _convertToGeminiSchema(jsonSchema) {
329
- const convertType = (type) => {
330
- switch (type) {
331
- case 'string': return 'STRING';
332
- case 'number': return 'NUMBER';
333
- case 'integer': return 'INTEGER';
334
- case 'boolean': return 'BOOLEAN';
335
- case 'array': return 'ARRAY';
336
- case 'object': return 'OBJECT';
337
- default: return 'STRING';
338
- }
339
- };
340
-
341
- const convert = (schema) => {
342
- const result = {
343
- type: convertType(schema.type),
344
- };
345
-
346
- if (schema.properties) {
347
- result.properties = {};
348
- for (const [key, value] of Object.entries(schema.properties)) {
349
- result.properties[key] = convert(value);
350
- }
351
- }
352
-
353
- if (schema.items) {
354
- result.items = convert(schema.items);
355
- }
356
-
357
- if (schema.required) {
358
- result.required = schema.required;
359
- }
360
-
361
- if (schema.nullable) {
362
- result.nullable = schema.nullable;
363
- }
364
-
365
- if (schema.description) {
366
- result.description = schema.description;
367
- }
368
-
369
- return result;
370
- };
371
-
372
- return convert(jsonSchema);
373
- }
374
-
375
- _shouldAutoParse(options) {
376
- return options.autoParse !== false; // Default true
377
- }
378
-
379
- _safeJsonParse(content) {
380
- if (!content) return null;
381
-
382
- // Use the robust JSON extractor that handles:
383
- // - Markdown code blocks (```json ... ```)
384
- // - Plain JSON objects
385
- // - Over-escaped content (\\\\n instead of \\n)
386
- // - Brace extraction as fallback
387
- const parsed = extractJsonFromResponse(content);
388
-
389
- if (!parsed) {
390
- console.error('[GeminiProvider] Failed to extract valid JSON from response');
391
- console.error('[GeminiProvider] Content preview:', content.substring(0, 200));
392
- }
393
-
394
- return parsed;
395
- }
396
-
397
- async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {
398
- const toolResults = await Promise.all(
399
- tool_calls.map(async (toolCall, index) => {
400
- const toolName = toolCall.function.name;
401
- const tool = toolImplementations[toolName];
402
- const tool_call_id = `gemini-tool-call-${index}`;
403
- toolCall.id = tool_call_id;
404
-
405
- // console.log(`[Tool Call] ${toolName} with arguments:`, toolCall.function.args);
406
-
407
- if (!tool) {
408
- console.error(`[Tool Error] Tool '${toolName}' not found`);
409
- return { tool_call_id, output: JSON.stringify({ error: `Tool '${toolName}' not found.` }) };
410
- }
411
- try {
412
- const output = await tool(toolCall.function.args, { env, tenantId });
413
- // console.log(`[Tool Result] ${toolName} returned:`, output.substring(0, 200) + (output.length > 200 ? '...' : ''));
414
- return { tool_call_id, output };
415
- } catch (error) {
416
- console.error(`[Tool Error] ${toolName} failed:`, error.message);
417
- return { tool_call_id, output: JSON.stringify({ error: `Error executing tool '${toolName}': ${error.message}` }) };
418
- }
419
- })
420
- );
421
- toolResults.forEach(result => messages.push({ role: 'tool', tool_call_id: result.tool_call_id, content: result.output }));
422
- }
423
-
424
- async imageGeneration(prompt, systemPrompt, options = {}) {
425
- // Allow model override via options.model, otherwise use default from config
426
- const modelName = options.model || this.models.image || 'gemini-3-pro-image-preview';
427
- console.log(`[GeminiProvider] Generating image with model: ${modelName}`);
428
-
429
- const generationConfig = {
430
- responseModalities: ["IMAGE"],
431
- };
432
-
433
- if (options.aspectRatio) {
434
- generationConfig.imageConfig = {
435
- aspectRatio: options.aspectRatio
436
- };
437
- }
438
-
439
- const parts = [{ text: prompt }];
440
-
441
- if (options.images && options.images.length > 0) {
442
- options.images.forEach(img => {
443
- parts.push({
444
- inlineData: {
445
- data: img.data,
446
- mimeType: img.mimeType
447
- }
448
- });
449
- });
450
- }
451
-
452
- // Use the new @google/genai API
453
- const requestOptions = {
454
- model: modelName,
455
- contents: [{
456
- role: "user",
457
- parts: parts
458
- }],
459
- config: generationConfig
460
- };
461
-
462
- if (systemPrompt) {
463
- requestOptions.config.systemInstruction = { parts: [{ text: systemPrompt }] };
464
- }
465
-
466
- // console.log('[GeminiProvider] imageGeneration request:', JSON.stringify(requestOptions, null, 2));
467
-
468
- const response = await this.client.models.generateContent(requestOptions);
469
-
470
- const imagePart = response.candidates?.[0]?.content?.parts?.find(
471
- part => part.inlineData && part.inlineData.mimeType?.startsWith('image/')
472
- );
473
-
474
- if (!imagePart || !imagePart.inlineData) {
475
- // Fallback: Check if it returned a URI or other format, or just text
476
- const textPart = response.candidates?.[0]?.content?.parts?.find(p => p.text);
477
- const candidate = response.candidates?.[0];
478
-
479
- console.error('[GeminiProvider] Image generation failed (no image data)');
480
- if (candidate) {
481
- console.error('[GeminiProvider] Finish Reason:', candidate.finishReason);
482
- console.error('[GeminiProvider] Safety Ratings:', JSON.stringify(candidate.safetyRatings, null, 2));
483
- console.error('[GeminiProvider] Full Candidate:', JSON.stringify(candidate, null, 2));
484
- }
485
-
486
- if (textPart) {
487
- console.warn('[GeminiProvider] Model returned text instead of image:', textPart.text);
488
- }
489
- throw new Error(`No image data in response. Finish Reason: ${candidate?.finishReason}`);
490
- }
491
-
492
- // Check for thought signature in the image part or any other part
493
- let thoughtSignature = null;
494
- if (imagePart.thought_signature || imagePart.thoughtSignature) {
495
- thoughtSignature = imagePart.thought_signature || imagePart.thoughtSignature;
496
- } else {
497
- // Check other parts for standalone thought signature
498
- const signaturePart = response.candidates?.[0]?.content?.parts?.find(p => p.thought_signature || p.thoughtSignature);
499
- if (signaturePart) {
500
- thoughtSignature = signaturePart.thought_signature || signaturePart.thoughtSignature;
501
- }
502
- }
503
-
504
- // Safety: If thought signature is abnormally large (>50KB), replace with bypass token
505
- // to prevent massive context usage (User reported 1.5MB signatures in some cases).
506
- if (thoughtSignature && thoughtSignature.length > 50000) {
507
- console.warn(`[GeminiProvider] ⚠️ Thought signature is abnormally large (${thoughtSignature.length} chars). Replacing with bypass token to save context.`);
508
- thoughtSignature = "skip_thought_signature_validator";
509
- }
510
-
511
- return {
512
- imageData: imagePart.inlineData.data,
513
- mimeType: imagePart.inlineData.mimeType,
514
- thought_signature: thoughtSignature
515
- };
516
- }
517
-
518
- _getModelForTier(tier) {
519
- return this.models[tier] || this.models.default;
520
- }
521
-
522
- async startVideoGeneration(prompt, images, modelName, systemPrompt, options = {}) {
523
- // Use unified client for video generation
524
- // Prepend system prompt to user prompt if provided, as video models often expect instructions in the prompt
525
- const effectivePrompt = systemPrompt ? `${systemPrompt}\n\n${prompt}` : prompt;
526
-
527
- const requestConfig = {
528
- model: modelName,
529
- prompt: effectivePrompt,
530
- config: {
531
- durationSeconds: options.durationSeconds || 6,
532
- aspectRatio: options.aspectRatio || '16:9',
533
- numberOfVideos: 1,
534
- // Pass reference images if provided
535
- ...(images && images.length > 0 ? { referenceImages: images } : {}),
536
- }
537
- };
538
-
539
- // Create a loggable copy of the config
540
- const logConfig = JSON.parse(JSON.stringify(requestConfig));
541
- if (logConfig.config && logConfig.config.referenceImages) {
542
- logConfig.config.referenceImages = logConfig.config.referenceImages.map(img => ({
543
- ...img,
544
- data: `... (${img.data ? img.data.length : 0} bytes)` // Summarize data
545
- }));
546
- }
547
-
548
- console.log('[GeminiProvider] startVideoGeneration request:', JSON.stringify(logConfig, null, 2));
549
-
550
- try {
551
- const operation = await this.client.models.generateVideos(requestConfig);
552
-
553
- // Store operation for later polling
554
- this._pendingOperations.set(operation.name, operation);
555
-
556
- return { operationName: operation.name };
557
- } catch (error) {
558
- console.error(`[GeminiProvider] startVideoGeneration failed (API Key: ${this._getMaskedApiKey()}):`, error);
559
- throw error;
560
- }
561
- }
562
-
563
- async getVideoGenerationStatus(operationName) {
564
- console.log(`[GeminiProvider] Checking status for operation: ${operationName}`);
565
-
566
- // Get the operation from cache or fetch it
567
- let operation = this._pendingOperations.get(operationName);
568
-
569
- if (!operation) {
570
- // If not in cache, we need to fetch it by name
571
- operation = await this.client.models.getOperation(operationName);
572
- }
573
-
574
- // Refresh status
575
- operation = await operation.get();
576
-
577
- // Update cache
578
- this._pendingOperations.set(operationName, operation);
579
-
580
- const result = {
581
- done: operation.done,
582
- progress: operation.metadata?.progressPercent || 0,
583
- state: operation.metadata?.state || (operation.done ? 'COMPLETED' : 'PROCESSING'),
584
- };
585
-
586
- console.log(`[GeminiProvider] Operation status: ${result.state}, Progress: ${result.progress}%`);
587
-
588
- if (operation.done) {
589
- // Clean up from cache
590
- this._pendingOperations.delete(operationName);
591
-
592
- if (operation.error) {
593
- console.error('[GeminiProvider] Video generation failed:', JSON.stringify(operation.error, null, 2));
594
- result.error = operation.error;
595
- } else {
596
- const videoResult = operation.response;
597
- // Extract video URI from response
598
- result.videoUri = videoResult.videos?.[0]?.gcsUri ||
599
- videoResult.uri ||
600
- (videoResult.generatedAssets?.[0]?.uri);
601
- result.content = "Video generation completed.";
602
- }
603
- }
604
-
605
- return result;
606
- }
607
- }