@contentgrowth/llm-service 0.6.7 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@contentgrowth/llm-service",
3
- "version": "0.6.7",
3
+ "version": "0.6.9",
4
4
  "description": "Unified LLM Service for Content Growth",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -14,7 +14,7 @@
14
14
  "author": "Content Growth",
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
- "@google/generative-ai": "^0.24.1",
17
+ "@google/genai": "^1.31.0",
18
18
  "openai": "^6.9.1"
19
19
  },
20
20
  "devDependencies": {
@@ -46,4 +46,26 @@ export class BaseLLMProvider {
46
46
  async imageGeneration(prompt, modelName, systemPrompt, options) {
47
47
  throw new Error('Image generation not supported by this provider');
48
48
  }
49
+
50
+ /**
51
+ * Start video generation (returns operation name for polling)
52
+ * @param {string} prompt
53
+ * @param {Array} images
54
+ * @param {string} modelName
55
+ * @param {string} systemPrompt
56
+ * @param {Object} options
57
+ * @returns {Promise<{operationName: string}>}
58
+ */
59
+ async startVideoGeneration(prompt, images, modelName, systemPrompt, options) {
60
+ throw new Error('Video generation not supported by this provider');
61
+ }
62
+
63
+ /**
64
+ * Get video generation status (poll operation)
65
+ * @param {string} operationName
66
+ * @returns {Promise<{done: boolean, progress: number, state: string, videoUri?: string, error?: object}>}
67
+ */
68
+ async getVideoGenerationStatus(operationName) {
69
+ throw new Error('Video generation not supported by this provider');
70
+ }
49
71
  }
@@ -1,4 +1,4 @@
1
- import { GoogleGenerativeAI } from '@google/generative-ai';
1
+ import { GoogleGenAI } from '@google/genai';
2
2
  import { BaseLLMProvider } from './base-provider.js';
3
3
  import { LLMServiceException } from '../../llm-service.js';
4
4
  import { extractJsonFromResponse } from '../json-utils.js';
@@ -6,11 +6,21 @@ import { extractJsonFromResponse } from '../json-utils.js';
6
6
  export class GeminiProvider extends BaseLLMProvider {
7
7
  constructor(config) {
8
8
  super(config);
9
- this.client = new GoogleGenerativeAI(config.apiKey);
9
+
10
+ // Unified client for all operations (text, image, video)
11
+ // Uses apiKey for Gemini, and automatically handles Vertex AI env vars for Veo
12
+ this.client = new GoogleGenAI({
13
+ apiKey: config.apiKey,
14
+ });
15
+
10
16
  this.models = config.models;
11
17
  this.defaultModel = config.models.default;
18
+
19
+ // Store pending operations for polling
20
+ this._pendingOperations = new Map();
12
21
  }
13
22
 
23
+
14
24
  async chat(userMessage, systemPrompt = '', options = {}) {
15
25
  const messages = [{ role: 'user', content: userMessage }];
16
26
  const tier = options.tier || 'default';
@@ -42,25 +52,18 @@ export class GeminiProvider extends BaseLLMProvider {
42
52
  }
43
53
 
44
54
  async _chatCompletionWithModel(messages, systemPrompt, tools, modelName, maxTokens, temperature, options = {}) {
45
- const modelConfig = {
46
- model: modelName,
47
- systemInstruction: systemPrompt,
48
- tools: tools ? [{ functionDeclarations: tools.map(t => t.function) }] : undefined,
55
+ // Build generation config
56
+ const generationConfig = {
57
+ temperature: options.temperature ?? temperature,
58
+ maxOutputTokens: options.maxTokens ?? maxTokens,
49
59
  };
50
60
 
51
- // Add JSON mode support for Gemini (only used when NO tools are present)
61
+ // Add JSON mode support
52
62
  if (options.responseFormat) {
53
- modelConfig.generationConfig = this._buildGenerationConfig(options, maxTokens, temperature);
54
- } else if (options.temperature !== undefined || options.maxTokens !== undefined) {
55
- // Apply temperature/maxTokens overrides even without JSON mode
56
- modelConfig.generationConfig = {
57
- temperature: options.temperature ?? temperature,
58
- maxOutputTokens: options.maxTokens ?? maxTokens,
59
- };
63
+ const formatConfig = this._buildGenerationConfig(options, maxTokens, temperature);
64
+ Object.assign(generationConfig, formatConfig);
60
65
  }
61
66
 
62
- const model = this.client.getGenerativeModel(modelConfig);
63
-
64
67
  // Pre-process messages to handle the 'system' role for Gemini
65
68
  const geminiMessages = [];
66
69
  let systemContentBuffer = [];
@@ -79,7 +82,7 @@ export class GeminiProvider extends BaseLLMProvider {
79
82
  }
80
83
  }
81
84
 
82
- const history = geminiMessages.map((msg, index) => {
85
+ const contents = geminiMessages.map((msg, index) => {
83
86
  let role = '';
84
87
  let parts;
85
88
 
@@ -115,32 +118,38 @@ export class GeminiProvider extends BaseLLMProvider {
115
118
  return { role, parts };
116
119
  }).filter(Boolean);
117
120
 
118
- while (history.length > 0 && history[0].role !== 'user') {
119
- history.shift();
121
+ while (contents.length > 0 && contents[0].role !== 'user') {
122
+ contents.shift();
120
123
  }
121
124
 
122
- if (history.length === 0) {
125
+ if (contents.length === 0) {
123
126
  throw new LLMServiceException('Cannot process a conversation with no user messages.', 400);
124
127
  }
125
128
 
126
- const lastMessage = history.pop();
127
- const chat = model.startChat({ history });
129
+ // Use the new @google/genai API
130
+ const result = await this.client.models.generateContent({
131
+ model: modelName,
132
+ contents: contents,
133
+ systemInstruction: systemPrompt,
134
+ generationConfig: generationConfig,
135
+ tools: tools ? [{ functionDeclarations: tools.map(t => t.function) }] : undefined,
136
+ });
128
137
 
129
- const result = await chat.sendMessage(lastMessage.parts);
130
138
  const response = result.response;
131
- const toolCalls = response.functionCalls();
139
+ const toolCalls = response.functionCalls?.() || response.functionCalls || null;
132
140
 
133
141
  let textContent = '';
134
142
  try {
135
- textContent = response.text();
143
+ textContent = typeof response.text === 'function' ? response.text() : (response.text || '');
136
144
  } catch (e) {
137
145
  // response.text() throws if there is no text content (e.g. only tool calls)
138
146
  // This is expected behavior for tool-only responses
139
147
  }
148
+
140
149
  // Validate that we have EITHER content OR tool calls
141
150
  if (!textContent && (!toolCalls || toolCalls.length === 0)) {
142
151
  console.error('[GeminiProvider] Model returned empty response (no text, no tool calls)');
143
- console.error('[GeminiProvider] Last message:', JSON.stringify(lastMessage, null, 2));
152
+ console.error('[GeminiProvider] Contents:', JSON.stringify(contents, null, 2));
144
153
  throw new LLMServiceException(
145
154
  'Model returned empty response. This usually means the prompt or schema is confusing the model.',
146
155
  500
@@ -150,7 +159,7 @@ export class GeminiProvider extends BaseLLMProvider {
150
159
  // Return with parsed JSON if applicable
151
160
  return {
152
161
  content: textContent,
153
- tool_calls: toolCalls ? toolCalls.map(fc => ({ type: 'function', function: fc })) : null,
162
+ tool_calls: toolCalls ? (Array.isArray(toolCalls) ? toolCalls : [toolCalls]).map(fc => ({ type: 'function', function: fc })) : null,
154
163
  _responseFormat: options.responseFormat,
155
164
  ...(options.responseFormat && this._shouldAutoParse(options) ? {
156
165
  parsedContent: this._safeJsonParse(textContent)
@@ -158,6 +167,7 @@ export class GeminiProvider extends BaseLLMProvider {
158
167
  };
159
168
  }
160
169
 
170
+
161
171
  _buildGenerationConfig(options, maxTokens, temperature) {
162
172
  const config = {
163
173
  temperature: options.temperature ?? temperature,
@@ -299,11 +309,6 @@ export class GeminiProvider extends BaseLLMProvider {
299
309
  }
300
310
 
301
311
  async imageGeneration(prompt, modelName, systemPrompt, options = {}) {
302
- const model = this.client.getGenerativeModel({
303
- model: modelName,
304
- systemInstruction: systemPrompt,
305
- });
306
-
307
312
  const generationConfig = {
308
313
  responseModalities: ["IMAGE"],
309
314
  };
@@ -327,11 +332,14 @@ export class GeminiProvider extends BaseLLMProvider {
327
332
  });
328
333
  }
329
334
 
330
- const result = await model.generateContent({
335
+ // Use the new @google/genai API
336
+ const result = await this.client.models.generateContent({
337
+ model: modelName,
331
338
  contents: [{
332
339
  role: "user",
333
340
  parts: parts
334
341
  }],
342
+ systemInstruction: systemPrompt,
335
343
  generationConfig
336
344
  });
337
345
 
@@ -354,47 +362,63 @@ export class GeminiProvider extends BaseLLMProvider {
354
362
  return this.models[tier] || this.models.default;
355
363
  }
356
364
 
357
- async videoGeneration(prompt, images, modelName, systemPrompt, options = {}) {
358
- const model = this.client.getGenerativeModel({
365
+ async startVideoGeneration(prompt, images, modelName, systemPrompt, options = {}) {
366
+ // Use unified client for video generation
367
+ const operation = await this.client.models.generateVideos({
359
368
  model: modelName,
360
- systemInstruction: systemPrompt,
369
+ prompt: prompt,
370
+ config: {
371
+ durationSeconds: options.durationSeconds || 6,
372
+ aspectRatio: options.aspectRatio || '16:9',
373
+ numberOfVideos: 1,
374
+ // Pass reference images if provided
375
+ ...(images && images.length > 0 ? { referenceImages: images } : {}),
376
+ }
361
377
  });
362
378
 
363
- // Prepare image parts
364
- const imageParts = images.map(img => ({
365
- inlineData: {
366
- data: img.data, // Base64 string
367
- mimeType: img.mimeType
368
- }
369
- }));
379
+ // Store operation for later polling
380
+ this._pendingOperations.set(operation.name, operation);
370
381
 
371
- const result = await model.generateContent({
372
- contents: [{
373
- role: "user",
374
- parts: [
375
- { text: prompt },
376
- ...imageParts
377
- ]
378
- }]
379
- });
382
+ return { operationName: operation.name };
383
+ }
380
384
 
381
- const response = result.response;
385
+ async getVideoGenerationStatus(operationName) {
386
+ // Get the operation from cache or fetch it
387
+ let operation = this._pendingOperations.get(operationName);
382
388
 
383
- // Check for video attachment/URI in the response
384
- // This structure depends on the specific API response for Veo
385
- // Assuming it might return a file URI or a specific part type
389
+ if (!operation) {
390
+ // If not in cache, we need to fetch it by name
391
+ operation = await this.client.models.getOperation(operationName);
392
+ }
386
393
 
387
- // Fallback: Return text if no specific video part is found,
388
- // but try to find a URI in the text if possible.
389
- const text = response.text();
394
+ // Refresh status
395
+ operation = await operation.get();
390
396
 
391
- // TODO: Update this once Veo API response structure is fully documented/available
392
- // For now, we return the text which might contain the URI or status.
397
+ // Update cache
398
+ this._pendingOperations.set(operationName, operation);
393
399
 
394
- return {
395
- content: text,
396
- // potential video URI extraction
397
- videoUri: text.match(/https?:\/\/[^\s]+/) ? text.match(/https?:\/\/[^\s]+/)[0] : null
400
+ const result = {
401
+ done: operation.done,
402
+ progress: operation.metadata?.progressPercent || 0,
403
+ state: operation.metadata?.state || (operation.done ? 'COMPLETED' : 'PROCESSING'),
398
404
  };
405
+
406
+ if (operation.done) {
407
+ // Clean up from cache
408
+ this._pendingOperations.delete(operationName);
409
+
410
+ if (operation.error) {
411
+ result.error = operation.error;
412
+ } else {
413
+ const videoResult = operation.response;
414
+ // Extract video URI from response
415
+ result.videoUri = videoResult.videos?.[0]?.gcsUri ||
416
+ videoResult.uri ||
417
+ (videoResult.generatedAssets?.[0]?.uri);
418
+ result.content = "Video generation completed.";
419
+ }
420
+ }
421
+
422
+ return result;
399
423
  }
400
424
  }
@@ -205,11 +205,43 @@ export class LLMService {
205
205
  }
206
206
 
207
207
  /**
208
- * Generate a video
208
+ * Generate a video (async wrapper with polling - backward compatibility)
209
209
  */
210
210
  async videoGeneration(prompt, images, tenantId, modelName, systemPrompt, options = {}) {
211
+ const { operationName } = await this.startVideoGeneration(prompt, images, tenantId, modelName, systemPrompt, options);
212
+
213
+ let status = await this.getVideoGenerationStatus(operationName, tenantId);
214
+
215
+ while (!status.done) {
216
+ console.log(`Waiting for video generation... Progress: ${status.progress}%`);
217
+ await new Promise(resolve => setTimeout(resolve, 10000)); // Wait 10 seconds
218
+ status = await this.getVideoGenerationStatus(operationName, tenantId);
219
+ }
220
+
221
+ if (status.error) {
222
+ throw new Error(`Video generation failed: ${status.error.message || JSON.stringify(status.error)}`);
223
+ }
224
+
225
+ return {
226
+ content: status.content || "Video generation completed.",
227
+ videoUri: status.videoUri
228
+ };
229
+ }
230
+
231
+ /**
232
+ * Start video generation (returns operation name for polling)
233
+ */
234
+ async startVideoGeneration(prompt, images, tenantId, modelName, systemPrompt, options = {}) {
235
+ const provider = await this._getProvider(tenantId);
236
+ return provider.startVideoGeneration(prompt, images, modelName, systemPrompt, options);
237
+ }
238
+
239
+ /**
240
+ * Get video generation status
241
+ */
242
+ async getVideoGenerationStatus(operationName, tenantId) {
211
243
  const provider = await this._getProvider(tenantId);
212
- return provider.videoGeneration(prompt, images, modelName, systemPrompt, options);
244
+ return provider.getVideoGenerationStatus(operationName);
213
245
  }
214
246
 
215
247
  /**