@contentgrowth/llm-service 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extracts and parses JSON from a text response (e.g., from an LLM).
|
|
3
|
+
* Handles JSON in markdown code blocks or plain JSON objects.
|
|
4
|
+
*
|
|
5
|
+
* TODO: improveme for better performance
|
|
6
|
+
*
|
|
7
|
+
* @param {string} text - The text containing JSON
|
|
8
|
+
* @returns {object|null} - The parsed JSON object, or null if no valid JSON found
|
|
9
|
+
*/
|
|
10
|
+
export function extractJsonFromResponse(text) {
|
|
11
|
+
if (!text || typeof text !== 'string') {
|
|
12
|
+
return null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
// Helper function to attempt JSON parsing with escape sequence normalization
|
|
16
|
+
function tryParseJson(jsonStr) {
|
|
17
|
+
// First, try to parse as-is
|
|
18
|
+
try {
|
|
19
|
+
return JSON.parse(jsonStr);
|
|
20
|
+
} catch (e) {
|
|
21
|
+
// If that fails, check if the LLM over-escaped the content
|
|
22
|
+
// This is a common issue where LLMs return \\\\n instead of \\n
|
|
23
|
+
|
|
24
|
+
// Only attempt normalization if we detect the problematic pattern
|
|
25
|
+
if (jsonStr.includes('\\\\\\\\')) {
|
|
26
|
+
// Log the first parse attempt failure for debugging
|
|
27
|
+
console.warn('Initial JSON parse failed, attempting normalization:', e.message);
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
// Strategy: The LLM sometimes escapes strings that are already escaped
|
|
31
|
+
// For example: "content": "text\\\\nmore" should be "content": "text\\nmore"
|
|
32
|
+
// Replace quadruple backslashes with double (handles over-escaping)
|
|
33
|
+
let normalized = jsonStr.replace(/\\\\\\\\/g, '\\\\');
|
|
34
|
+
|
|
35
|
+
return JSON.parse(normalized);
|
|
36
|
+
} catch (e2) {
|
|
37
|
+
// Log this failure too
|
|
38
|
+
console.warn('Normalized JSON parse also failed:', e2.message);
|
|
39
|
+
throw e; // Throw original error
|
|
40
|
+
}
|
|
41
|
+
} else {
|
|
42
|
+
// No over-escaping pattern detected, throw original error
|
|
43
|
+
throw e;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Regular expression to find a JSON object within markdown code fences.
|
|
49
|
+
// It's flexible with or without the 'json' language specifier.
|
|
50
|
+
const jsonRegex = /```(?:json)?\s*({[\s\S]*?})\s*```/;
|
|
51
|
+
const match = text.match(jsonRegex);
|
|
52
|
+
|
|
53
|
+
// If a fenced JSON block is found, try to parse it.
|
|
54
|
+
if (match && match[1]) {
|
|
55
|
+
try {
|
|
56
|
+
return tryParseJson(match[1]);
|
|
57
|
+
} catch (e) {
|
|
58
|
+
// If parsing fails, log the error and fall through to the next method.
|
|
59
|
+
console.warn('Could not parse the content of a matched JSON block.', e.message);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Fallback for cases where the AI might not use markdown fences correctly.
|
|
64
|
+
// Find the first opening brace and the last closing brace.
|
|
65
|
+
const firstBrace = text.indexOf('{');
|
|
66
|
+
const lastBrace = text.lastIndexOf('}');
|
|
67
|
+
|
|
68
|
+
if (firstBrace !== -1 && lastBrace > firstBrace) {
|
|
69
|
+
const potentialJson = text.substring(firstBrace, lastBrace + 1);
|
|
70
|
+
try {
|
|
71
|
+
return tryParseJson(potentialJson);
|
|
72
|
+
} catch (e) {
|
|
73
|
+
// This substring is not valid JSON.
|
|
74
|
+
console.error('Error parsing JSON extracted in { and }', e);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// If no valid JSON could be extracted by any method, return null.
|
|
79
|
+
return null;
|
|
80
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { GoogleGenerativeAI } from '@google/generative-ai';
|
|
2
2
|
import { BaseLLMProvider } from './base-provider.js';
|
|
3
3
|
import { LLMServiceException } from '../../llm-service.js';
|
|
4
|
+
import { extractJsonFromResponse } from '../json-utils.js';
|
|
4
5
|
|
|
5
6
|
export class GeminiProvider extends BaseLLMProvider {
|
|
6
7
|
constructor(config) {
|
|
@@ -146,15 +147,27 @@ export class GeminiProvider extends BaseLLMProvider {
|
|
|
146
147
|
maxOutputTokens: options.maxTokens ?? maxTokens,
|
|
147
148
|
};
|
|
148
149
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
150
|
+
// Handle responseFormat as an object with type and schema properties
|
|
151
|
+
if (options.responseFormat) {
|
|
152
|
+
const formatType = typeof options.responseFormat === 'string'
|
|
153
|
+
? options.responseFormat
|
|
154
|
+
: options.responseFormat.type;
|
|
155
|
+
|
|
156
|
+
const schema = typeof options.responseFormat === 'object'
|
|
157
|
+
? options.responseFormat.schema
|
|
158
|
+
: null;
|
|
159
|
+
|
|
160
|
+
if (formatType === 'json' || formatType === 'json_schema') {
|
|
152
161
|
config.responseMimeType = 'application/json';
|
|
153
162
|
|
|
154
|
-
|
|
155
|
-
|
|
163
|
+
// CRITICAL: Must provide schema for "Strict Mode" to avoid markdown wrappers
|
|
164
|
+
if (schema) {
|
|
165
|
+
config.responseSchema = this._convertToGeminiSchema(schema);
|
|
166
|
+
console.log('[GeminiProvider] Using Strict JSON mode with schema');
|
|
167
|
+
} else {
|
|
168
|
+
console.warn('[GeminiProvider] Using legacy JSON mode without schema - may produce markdown wrappers');
|
|
156
169
|
}
|
|
157
|
-
|
|
170
|
+
}
|
|
158
171
|
}
|
|
159
172
|
|
|
160
173
|
return config;
|
|
@@ -223,12 +236,22 @@ export class GeminiProvider extends BaseLLMProvider {
|
|
|
223
236
|
|
|
224
237
|
_safeJsonParse(content) {
|
|
225
238
|
if (!content) return null;
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
239
|
+
|
|
240
|
+
// Use the robust JSON extractor that handles:
|
|
241
|
+
// - Markdown code blocks (```json ... ```)
|
|
242
|
+
// - Plain JSON objects
|
|
243
|
+
// - Over-escaped content (\\\\n instead of \\n)
|
|
244
|
+
// - Brace extraction as fallback
|
|
245
|
+
const parsed = extractJsonFromResponse(content);
|
|
246
|
+
|
|
247
|
+
if (parsed) {
|
|
248
|
+
console.log('[GeminiProvider] Successfully parsed JSON from response');
|
|
249
|
+
} else {
|
|
250
|
+
console.error('[GeminiProvider] Failed to extract valid JSON from response');
|
|
251
|
+
console.error('[GeminiProvider] Content preview:', content.substring(0, 200));
|
|
231
252
|
}
|
|
253
|
+
|
|
254
|
+
return parsed;
|
|
232
255
|
}
|
|
233
256
|
|
|
234
257
|
async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import OpenAI from 'openai';
|
|
2
2
|
import { BaseLLMProvider } from './base-provider.js';
|
|
3
|
+
import { extractJsonFromResponse } from '../json-utils.js';
|
|
3
4
|
|
|
4
5
|
export class OpenAIProvider extends BaseLLMProvider {
|
|
5
6
|
constructor(config) {
|
|
@@ -71,20 +72,48 @@ export class OpenAIProvider extends BaseLLMProvider {
|
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
_buildResponseFormat(options) {
|
|
74
|
-
|
|
75
|
+
if (!options.responseFormat) {
|
|
76
|
+
return undefined;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Handle responseFormat as either string or object { type, schema }
|
|
80
|
+
const formatType = typeof options.responseFormat === 'string'
|
|
81
|
+
? options.responseFormat
|
|
82
|
+
: options.responseFormat.type;
|
|
83
|
+
|
|
84
|
+
const schema = typeof options.responseFormat === 'object'
|
|
85
|
+
? options.responseFormat.schema
|
|
86
|
+
: null;
|
|
87
|
+
|
|
88
|
+
switch (formatType) {
|
|
75
89
|
case 'json':
|
|
76
|
-
|
|
90
|
+
// If schema is provided, use strict mode; otherwise use legacy json_object
|
|
91
|
+
if (schema) {
|
|
92
|
+
console.log('[OpenAIProvider] Using Strict JSON mode with schema');
|
|
93
|
+
return {
|
|
94
|
+
type: 'json_schema',
|
|
95
|
+
json_schema: {
|
|
96
|
+
name: options.schemaName || 'response_schema',
|
|
97
|
+
strict: options.strictSchema ?? true,
|
|
98
|
+
schema: schema
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
} else {
|
|
102
|
+
console.warn('[OpenAIProvider] Using legacy json_object mode without schema - may produce markdown wrappers');
|
|
103
|
+
return { type: 'json_object' };
|
|
104
|
+
}
|
|
77
105
|
|
|
78
106
|
case 'json_schema':
|
|
79
|
-
if (!
|
|
80
|
-
throw new Error('
|
|
107
|
+
if (!schema) {
|
|
108
|
+
throw new Error('schema required when using json_schema format');
|
|
81
109
|
}
|
|
110
|
+
console.log('[OpenAIProvider] Using Strict JSON mode with schema');
|
|
82
111
|
return {
|
|
83
112
|
type: 'json_schema',
|
|
84
113
|
json_schema: {
|
|
85
114
|
name: options.schemaName || 'response_schema',
|
|
86
115
|
strict: options.strictSchema ?? true,
|
|
87
|
-
schema:
|
|
116
|
+
schema: schema
|
|
88
117
|
}
|
|
89
118
|
};
|
|
90
119
|
|
|
@@ -99,12 +128,22 @@ export class OpenAIProvider extends BaseLLMProvider {
|
|
|
99
128
|
|
|
100
129
|
_safeJsonParse(content) {
|
|
101
130
|
if (!content) return null;
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
131
|
+
|
|
132
|
+
// Use the robust JSON extractor that handles:
|
|
133
|
+
// - Markdown code blocks (```json ... ```)
|
|
134
|
+
// - Plain JSON objects
|
|
135
|
+
// - Over-escaped content (\\\\n instead of \\n)
|
|
136
|
+
// - Brace extraction as fallback
|
|
137
|
+
const parsed = extractJsonFromResponse(content);
|
|
138
|
+
|
|
139
|
+
if (parsed) {
|
|
140
|
+
console.log('[OpenAIProvider] Successfully parsed JSON from response');
|
|
141
|
+
} else {
|
|
142
|
+
console.error('[OpenAIProvider] Failed to extract valid JSON from response');
|
|
143
|
+
console.error('[OpenAIProvider] Content preview:', content.substring(0, 200));
|
|
107
144
|
}
|
|
145
|
+
|
|
146
|
+
return parsed;
|
|
108
147
|
}
|
|
109
148
|
|
|
110
149
|
async executeTools(tool_calls, messages, tenantId, toolImplementations, env) {
|