@contentgrowth/llm-service 0.6.8 → 0.6.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@contentgrowth/llm-service",
3
- "version": "0.6.8",
3
+ "version": "0.6.91",
4
4
  "description": "Unified LLM Service for Content Growth",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -14,7 +14,7 @@
14
14
  "author": "Content Growth",
15
15
  "license": "MIT",
16
16
  "dependencies": {
17
- "@google/generative-ai": "^0.24.1",
17
+ "@google/genai": "^1.31.0",
18
18
  "openai": "^6.9.1"
19
19
  },
20
20
  "devDependencies": {
@@ -1,4 +1,4 @@
1
- import { GoogleGenerativeAI } from '@google/generative-ai';
1
+ import { GoogleGenAI } from '@google/genai';
2
2
  import { BaseLLMProvider } from './base-provider.js';
3
3
  import { LLMServiceException } from '../../llm-service.js';
4
4
  import { extractJsonFromResponse } from '../json-utils.js';
@@ -6,11 +6,21 @@ import { extractJsonFromResponse } from '../json-utils.js';
6
6
  export class GeminiProvider extends BaseLLMProvider {
7
7
  constructor(config) {
8
8
  super(config);
9
- this.client = new GoogleGenerativeAI(config.apiKey);
9
+
10
+ // Unified client for all operations (text, image, video)
11
+ // Uses apiKey for Gemini, and automatically handles Vertex AI env vars for Veo
12
+ this.client = new GoogleGenAI({
13
+ apiKey: config.apiKey,
14
+ });
15
+
10
16
  this.models = config.models;
11
17
  this.defaultModel = config.models.default;
18
+
19
+ // Store pending operations for polling
20
+ this._pendingOperations = new Map();
12
21
  }
13
22
 
23
+
14
24
  async chat(userMessage, systemPrompt = '', options = {}) {
15
25
  const messages = [{ role: 'user', content: userMessage }];
16
26
  const tier = options.tier || 'default';
@@ -42,25 +52,18 @@ export class GeminiProvider extends BaseLLMProvider {
42
52
  }
43
53
 
44
54
  async _chatCompletionWithModel(messages, systemPrompt, tools, modelName, maxTokens, temperature, options = {}) {
45
- const modelConfig = {
46
- model: modelName,
47
- systemInstruction: systemPrompt,
48
- tools: tools ? [{ functionDeclarations: tools.map(t => t.function) }] : undefined,
55
+ // Build generation config
56
+ const generationConfig = {
57
+ temperature: options.temperature ?? temperature,
58
+ maxOutputTokens: options.maxTokens ?? maxTokens,
49
59
  };
50
60
 
51
- // Add JSON mode support for Gemini (only used when NO tools are present)
61
+ // Add JSON mode support
52
62
  if (options.responseFormat) {
53
- modelConfig.generationConfig = this._buildGenerationConfig(options, maxTokens, temperature);
54
- } else if (options.temperature !== undefined || options.maxTokens !== undefined) {
55
- // Apply temperature/maxTokens overrides even without JSON mode
56
- modelConfig.generationConfig = {
57
- temperature: options.temperature ?? temperature,
58
- maxOutputTokens: options.maxTokens ?? maxTokens,
59
- };
63
+ const formatConfig = this._buildGenerationConfig(options, maxTokens, temperature);
64
+ Object.assign(generationConfig, formatConfig);
60
65
  }
61
66
 
62
- const model = this.client.getGenerativeModel(modelConfig);
63
-
64
67
  // Pre-process messages to handle the 'system' role for Gemini
65
68
  const geminiMessages = [];
66
69
  let systemContentBuffer = [];
@@ -79,7 +82,7 @@ export class GeminiProvider extends BaseLLMProvider {
79
82
  }
80
83
  }
81
84
 
82
- const history = geminiMessages.map((msg, index) => {
85
+ const contents = geminiMessages.map((msg, index) => {
83
86
  let role = '';
84
87
  let parts;
85
88
 
@@ -115,32 +118,67 @@ export class GeminiProvider extends BaseLLMProvider {
115
118
  return { role, parts };
116
119
  }).filter(Boolean);
117
120
 
118
- while (history.length > 0 && history[0].role !== 'user') {
119
- history.shift();
121
+ while (contents.length > 0 && contents[0].role !== 'user') {
122
+ contents.shift();
120
123
  }
121
124
 
122
- if (history.length === 0) {
125
+ if (contents.length === 0) {
123
126
  throw new LLMServiceException('Cannot process a conversation with no user messages.', 400);
124
127
  }
125
128
 
126
- const lastMessage = history.pop();
127
- const chat = model.startChat({ history });
129
+ // Use the new @google/genai API
130
+ const result = await this.client.models.generateContent({
131
+ model: modelName,
132
+ contents: contents,
133
+ systemInstruction: systemPrompt,
134
+ generationConfig: generationConfig,
135
+ tools: tools ? [{ functionDeclarations: tools.map(t => t.function) }] : undefined,
136
+ });
128
137
 
129
- const result = await chat.sendMessage(lastMessage.parts);
130
- const response = result.response;
131
- const toolCalls = response.functionCalls();
138
+ // New SDK returns result directly, not result.response
139
+ // Debug log to understand structure
140
+ console.log('[GeminiProvider] Result structure:', JSON.stringify(Object.keys(result), null, 2));
141
+
142
+ // Handle both old (result.response) and new (direct result) structures
143
+ const response = result.response || result;
144
+
145
+ // Extract function calls - try multiple possible locations
146
+ let toolCalls = null;
147
+ if (typeof response.functionCalls === 'function') {
148
+ toolCalls = response.functionCalls();
149
+ } else if (response.functionCalls) {
150
+ toolCalls = response.functionCalls;
151
+ } else if (response.candidates?.[0]?.content?.parts) {
152
+ // Check parts for function calls
153
+ const functionCallParts = response.candidates[0].content.parts.filter(p => p.functionCall);
154
+ if (functionCallParts.length > 0) {
155
+ toolCalls = functionCallParts.map(p => p.functionCall);
156
+ }
157
+ }
132
158
 
159
+ // Extract text content - try multiple possible locations
133
160
  let textContent = '';
134
161
  try {
135
- textContent = response.text();
162
+ if (typeof response.text === 'function') {
163
+ textContent = response.text();
164
+ } else if (typeof response.text === 'string') {
165
+ textContent = response.text;
166
+ } else if (response.candidates?.[0]?.content?.parts) {
167
+ // Concatenate text from parts
168
+ textContent = response.candidates[0].content.parts
169
+ .filter(p => p.text)
170
+ .map(p => p.text)
171
+ .join('');
172
+ }
136
173
  } catch (e) {
137
174
  // response.text() throws if there is no text content (e.g. only tool calls)
138
175
  // This is expected behavior for tool-only responses
139
176
  }
177
+
140
178
  // Validate that we have EITHER content OR tool calls
141
179
  if (!textContent && (!toolCalls || toolCalls.length === 0)) {
142
180
  console.error('[GeminiProvider] Model returned empty response (no text, no tool calls)');
143
- console.error('[GeminiProvider] Last message:', JSON.stringify(lastMessage, null, 2));
181
+ console.error('[GeminiProvider] Full result:', JSON.stringify(result, null, 2));
144
182
  throw new LLMServiceException(
145
183
  'Model returned empty response. This usually means the prompt or schema is confusing the model.',
146
184
  500
@@ -150,7 +188,7 @@ export class GeminiProvider extends BaseLLMProvider {
150
188
  // Return with parsed JSON if applicable
151
189
  return {
152
190
  content: textContent,
153
- tool_calls: toolCalls ? toolCalls.map(fc => ({ type: 'function', function: fc })) : null,
191
+ tool_calls: toolCalls ? (Array.isArray(toolCalls) ? toolCalls : [toolCalls]).map(fc => ({ type: 'function', function: fc })) : null,
154
192
  _responseFormat: options.responseFormat,
155
193
  ...(options.responseFormat && this._shouldAutoParse(options) ? {
156
194
  parsedContent: this._safeJsonParse(textContent)
@@ -158,6 +196,7 @@ export class GeminiProvider extends BaseLLMProvider {
158
196
  };
159
197
  }
160
198
 
199
+
161
200
  _buildGenerationConfig(options, maxTokens, temperature) {
162
201
  const config = {
163
202
  temperature: options.temperature ?? temperature,
@@ -299,11 +338,6 @@ export class GeminiProvider extends BaseLLMProvider {
299
338
  }
300
339
 
301
340
  async imageGeneration(prompt, modelName, systemPrompt, options = {}) {
302
- const model = this.client.getGenerativeModel({
303
- model: modelName,
304
- systemInstruction: systemPrompt,
305
- });
306
-
307
341
  const generationConfig = {
308
342
  responseModalities: ["IMAGE"],
309
343
  };
@@ -327,11 +361,14 @@ export class GeminiProvider extends BaseLLMProvider {
327
361
  });
328
362
  }
329
363
 
330
- const result = await model.generateContent({
364
+ // Use the new @google/genai API
365
+ const result = await this.client.models.generateContent({
366
+ model: modelName,
331
367
  contents: [{
332
368
  role: "user",
333
369
  parts: parts
334
370
  }],
371
+ systemInstruction: systemPrompt,
335
372
  generationConfig
336
373
  });
337
374
 
@@ -355,40 +392,58 @@ export class GeminiProvider extends BaseLLMProvider {
355
392
  }
356
393
 
357
394
  async startVideoGeneration(prompt, images, modelName, systemPrompt, options = {}) {
358
- // 1. Initiate the request
395
+ // Use unified client for video generation
359
396
  const operation = await this.client.models.generateVideos({
360
397
  model: modelName,
361
398
  prompt: prompt,
362
399
  config: {
363
- referenceImages: images,
400
+ durationSeconds: options.durationSeconds || 6,
401
+ aspectRatio: options.aspectRatio || '16:9',
402
+ numberOfVideos: 1,
403
+ // Pass reference images if provided
404
+ ...(images && images.length > 0 ? { referenceImages: images } : {}),
364
405
  }
365
406
  });
366
407
 
408
+ // Store operation for later polling
409
+ this._pendingOperations.set(operation.name, operation);
410
+
367
411
  return { operationName: operation.name };
368
412
  }
369
413
 
370
414
  async getVideoGenerationStatus(operationName) {
371
- // 2. Get operation status
372
- // Assuming the SDK supports retrieving operation by name via this.client.models.getOperation
373
- // If not, we might need to adjust based on the specific SDK version.
374
- const operation = await this.client.models.getOperation(operationName);
415
+ // Get the operation from cache or fetch it
416
+ let operation = this._pendingOperations.get(operationName);
417
+
418
+ if (!operation) {
419
+ // If not in cache, we need to fetch it by name
420
+ operation = await this.client.models.getOperation(operationName);
421
+ }
375
422
 
376
423
  // Refresh status
377
- await operation.get();
424
+ operation = await operation.get();
425
+
426
+ // Update cache
427
+ this._pendingOperations.set(operationName, operation);
378
428
 
379
429
  const result = {
380
430
  done: operation.done,
381
- // Extract progress if available in metadata
382
431
  progress: operation.metadata?.progressPercent || 0,
383
432
  state: operation.metadata?.state || (operation.done ? 'COMPLETED' : 'PROCESSING'),
384
433
  };
385
434
 
386
435
  if (operation.done) {
436
+ // Clean up from cache
437
+ this._pendingOperations.delete(operationName);
438
+
387
439
  if (operation.error) {
388
440
  result.error = operation.error;
389
441
  } else {
390
442
  const videoResult = operation.response;
391
- result.videoUri = videoResult.uri || (videoResult.generatedAssets && videoResult.generatedAssets[0] && videoResult.generatedAssets[0].uri);
443
+ // Extract video URI from response
444
+ result.videoUri = videoResult.videos?.[0]?.gcsUri ||
445
+ videoResult.uri ||
446
+ (videoResult.generatedAssets?.[0]?.uri);
392
447
  result.content = "Video generation completed.";
393
448
  }
394
449
  }