@contentgrowth/llm-service 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@contentgrowth/llm-service",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Unified LLM Service for Content Growth",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -0,0 +1,80 @@
1
+ /**
2
+ * Extracts and parses JSON from a text response (e.g., from an LLM).
3
+ * Handles JSON in markdown code blocks or plain JSON objects.
4
+ *
5
+ * TODO: improveme for better performance
6
+ *
7
+ * @param {string} text - The text containing JSON
8
+ * @returns {object|null} - The parsed JSON object, or null if no valid JSON found
9
+ */
10
+ export function extractJsonFromResponse(text) {
11
+ if (!text || typeof text !== 'string') {
12
+ return null;
13
+ }
14
+
15
+ // Helper function to attempt JSON parsing with escape sequence normalization
16
+ function tryParseJson(jsonStr) {
17
+ // First, try to parse as-is
18
+ try {
19
+ return JSON.parse(jsonStr);
20
+ } catch (e) {
21
+ // If that fails, check if the LLM over-escaped the content
22
+ // This is a common issue where LLMs return \\\\n instead of \\n
23
+
24
+ // Only attempt normalization if we detect the problematic pattern
25
+ if (jsonStr.includes('\\\\\\\\')) {
26
+ // Log the first parse attempt failure for debugging
27
+ console.warn('Initial JSON parse failed, attempting normalization:', e.message);
28
+
29
+ try {
30
+ // Strategy: The LLM sometimes escapes strings that are already escaped
31
+ // For example: "content": "text\\\\nmore" should be "content": "text\\nmore"
32
+ // Replace quadruple backslashes with double (handles over-escaping)
33
+ let normalized = jsonStr.replace(/\\\\\\\\/g, '\\\\');
34
+
35
+ return JSON.parse(normalized);
36
+ } catch (e2) {
37
+ // Log this failure too
38
+ console.warn('Normalized JSON parse also failed:', e2.message);
39
+ throw e; // Throw original error
40
+ }
41
+ } else {
42
+ // No over-escaping pattern detected, throw original error
43
+ throw e;
44
+ }
45
+ }
46
+ }
47
+
48
+ // Regular expression to find a JSON object within markdown code fences.
49
+ // It's flexible with or without the 'json' language specifier.
50
+ const jsonRegex = /```(?:json)?\s*({[\s\S]*?})\s*```/;
51
+ const match = text.match(jsonRegex);
52
+
53
+ // If a fenced JSON block is found, try to parse it.
54
+ if (match && match[1]) {
55
+ try {
56
+ return tryParseJson(match[1]);
57
+ } catch (e) {
58
+ // If parsing fails, log the error and fall through to the next method.
59
+ console.warn('Could not parse the content of a matched JSON block.', e.message);
60
+ }
61
+ }
62
+
63
+ // Fallback for cases where the AI might not use markdown fences correctly.
64
+ // Find the first opening brace and the last closing brace.
65
+ const firstBrace = text.indexOf('{');
66
+ const lastBrace = text.lastIndexOf('}');
67
+
68
+ if (firstBrace !== -1 && lastBrace > firstBrace) {
69
+ const potentialJson = text.substring(firstBrace, lastBrace + 1);
70
+ try {
71
+ return tryParseJson(potentialJson);
72
+ } catch (e) {
73
+ // This substring is not valid JSON.
74
+ console.error('Error parsing JSON extracted in { and }', e);
75
+ }
76
+ }
77
+
78
+ // If no valid JSON could be extracted by any method, return null.
79
+ return null;
80
+ }
@@ -1,6 +1,7 @@
1
1
  import { GoogleGenerativeAI } from '@google/generative-ai';
2
2
  import { BaseLLMProvider } from './base-provider.js';
3
3
  import { LLMServiceException } from '../../llm-service.js';
4
+ import { extractJsonFromResponse } from '../json-utils.js';
4
5
 
5
6
  export class GeminiProvider extends BaseLLMProvider {
6
7
  constructor(config) {
@@ -146,15 +147,27 @@ export class GeminiProvider extends BaseLLMProvider {
146
147
  maxOutputTokens: options.maxTokens ?? maxTokens,
147
148
  };
148
149
 
149
- switch (options.responseFormat) {
150
- case 'json':
151
- case 'json_schema':
150
+ // Handle responseFormat as an object with type and schema properties
151
+ if (options.responseFormat) {
152
+ const formatType = typeof options.responseFormat === 'string'
153
+ ? options.responseFormat
154
+ : options.responseFormat.type;
155
+
156
+ const schema = typeof options.responseFormat === 'object'
157
+ ? options.responseFormat.schema
158
+ : null;
159
+
160
+ if (formatType === 'json' || formatType === 'json_schema') {
152
161
  config.responseMimeType = 'application/json';
153
162
 
154
- if (options.responseSchema) {
155
- config.responseSchema = this._convertToGeminiSchema(options.responseSchema);
163
+ // CRITICAL: Must provide schema for "Strict Mode" to avoid markdown wrappers
164
+ if (schema) {
165
+ config.responseSchema = this._convertToGeminiSchema(schema);
166
+ console.log('[GeminiProvider] Using Strict JSON mode with schema');
167
+ } else {
168
+ console.warn('[GeminiProvider] Using legacy JSON mode without schema - may produce markdown wrappers');
156
169
  }
157
- break;
170
+ }
158
171
  }
159
172
 
160
173
  return config;
@@ -223,12 +236,22 @@ export class GeminiProvider extends BaseLLMProvider {
223
236
 
224
237
  _safeJsonParse(content) {
225
238
  if (!content) return null;
226
- try {
227
- return JSON.parse(content);
228
- } catch (e) {
229
- console.warn('[GeminiProvider] Failed to auto-parse JSON response:', e.message);
230
- return null;
239
+
240
+ // Use the robust JSON extractor that handles:
241
+ // - Markdown code blocks (```json ... ```)
242
+ // - Plain JSON objects
243
+ // - Over-escaped content (\\\\n instead of \\n)
244
+ // - Brace extraction as fallback
245
+ const parsed = extractJsonFromResponse(content);
246
+
247
+ if (parsed) {
248
+ console.log('[GeminiProvider] Successfully parsed JSON from response');
249
+ } else {
250
+ console.error('[GeminiProvider] Failed to extract valid JSON from response');
251
+ console.error('[GeminiProvider] Content preview:', content.substring(0, 200));
231
252
  }
253
+
254
+ return parsed;
232
255
  }
233
256
 
234
257
  async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {
@@ -1,5 +1,6 @@
1
1
  import OpenAI from 'openai';
2
2
  import { BaseLLMProvider } from './base-provider.js';
3
+ import { extractJsonFromResponse } from '../json-utils.js';
3
4
 
4
5
  export class OpenAIProvider extends BaseLLMProvider {
5
6
  constructor(config) {
@@ -71,20 +72,48 @@ export class OpenAIProvider extends BaseLLMProvider {
71
72
  }
72
73
 
73
74
  _buildResponseFormat(options) {
74
- switch (options.responseFormat) {
75
+ if (!options.responseFormat) {
76
+ return undefined;
77
+ }
78
+
79
+ // Handle responseFormat as either string or object { type, schema }
80
+ const formatType = typeof options.responseFormat === 'string'
81
+ ? options.responseFormat
82
+ : options.responseFormat.type;
83
+
84
+ const schema = typeof options.responseFormat === 'object'
85
+ ? options.responseFormat.schema
86
+ : null;
87
+
88
+ switch (formatType) {
75
89
  case 'json':
76
- return { type: 'json_object' };
90
+ // If schema is provided, use strict mode; otherwise use legacy json_object
91
+ if (schema) {
92
+ console.log('[OpenAIProvider] Using Strict JSON mode with schema');
93
+ return {
94
+ type: 'json_schema',
95
+ json_schema: {
96
+ name: options.schemaName || 'response_schema',
97
+ strict: options.strictSchema ?? true,
98
+ schema: schema
99
+ }
100
+ };
101
+ } else {
102
+ console.warn('[OpenAIProvider] Using legacy json_object mode without schema - may produce markdown wrappers');
103
+ return { type: 'json_object' };
104
+ }
77
105
 
78
106
  case 'json_schema':
79
- if (!options.responseSchema) {
80
- throw new Error('responseSchema required when using json_schema format');
107
+ if (!schema) {
108
+ throw new Error('schema required when using json_schema format');
81
109
  }
110
+ console.log('[OpenAIProvider] Using Strict JSON mode with schema');
82
111
  return {
83
112
  type: 'json_schema',
84
113
  json_schema: {
85
114
  name: options.schemaName || 'response_schema',
86
115
  strict: options.strictSchema ?? true,
87
- schema: options.responseSchema
116
+ schema: schema
88
117
  }
89
118
  };
90
119
 
@@ -99,12 +128,22 @@ export class OpenAIProvider extends BaseLLMProvider {
99
128
 
100
129
  _safeJsonParse(content) {
101
130
  if (!content) return null;
102
- try {
103
- return JSON.parse(content);
104
- } catch (e) {
105
- console.warn('[OpenAIProvider] Failed to auto-parse JSON response:', e.message);
106
- return null;
131
+
132
+ // Use the robust JSON extractor that handles:
133
+ // - Markdown code blocks (```json ... ```)
134
+ // - Plain JSON objects
135
+ // - Over-escaped content (\\\\n instead of \\n)
136
+ // - Brace extraction as fallback
137
+ const parsed = extractJsonFromResponse(content);
138
+
139
+ if (parsed) {
140
+ console.log('[OpenAIProvider] Successfully parsed JSON from response');
141
+ } else {
142
+ console.error('[OpenAIProvider] Failed to extract valid JSON from response');
143
+ console.error('[OpenAIProvider] Content preview:', content.substring(0, 200));
107
144
  }
145
+
146
+ return parsed;
108
147
  }
109
148
 
110
149
  async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {