@djangocfg/llm 2.1.164
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -0
- package/dist/index.cjs +1164 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +164 -0
- package/dist/index.d.ts +164 -0
- package/dist/index.mjs +1128 -0
- package/dist/index.mjs.map +1 -0
- package/dist/providers/index.cjs +317 -0
- package/dist/providers/index.cjs.map +1 -0
- package/dist/providers/index.d.cts +30 -0
- package/dist/providers/index.d.ts +30 -0
- package/dist/providers/index.mjs +304 -0
- package/dist/providers/index.mjs.map +1 -0
- package/dist/sdkrouter-D8GMBmTi.d.ts +171 -0
- package/dist/sdkrouter-hlQlVd0v.d.cts +171 -0
- package/dist/text-utils-DoYqMIr6.d.ts +289 -0
- package/dist/text-utils-VXWN-8Oq.d.cts +289 -0
- package/dist/translator/index.cjs +794 -0
- package/dist/translator/index.cjs.map +1 -0
- package/dist/translator/index.d.cts +24 -0
- package/dist/translator/index.d.ts +24 -0
- package/dist/translator/index.mjs +769 -0
- package/dist/translator/index.mjs.map +1 -0
- package/dist/types-D6lazgm1.d.cts +59 -0
- package/dist/types-D6lazgm1.d.ts +59 -0
- package/package.json +82 -0
- package/src/client.ts +119 -0
- package/src/index.ts +70 -0
- package/src/providers/anthropic.ts +98 -0
- package/src/providers/base.ts +90 -0
- package/src/providers/index.ts +15 -0
- package/src/providers/openai.ts +73 -0
- package/src/providers/sdkrouter.ts +279 -0
- package/src/translator/cache.ts +237 -0
- package/src/translator/index.ts +55 -0
- package/src/translator/json-translator.ts +408 -0
- package/src/translator/prompts.ts +90 -0
- package/src/translator/text-utils.ts +148 -0
- package/src/translator/types.ts +112 -0
- package/src/translator/validator.ts +181 -0
- package/src/types.ts +85 -0
- package/src/utils/env.ts +67 -0
- package/src/utils/index.ts +2 -0
- package/src/utils/json.ts +44 -0
- package/src/utils/schema.ts +153 -0
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Translator types
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { LLMRequestOptions } from '../types';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Language code
|
|
9
|
+
*/
|
|
10
|
+
export type LanguageCode =
|
|
11
|
+
| 'en'
|
|
12
|
+
| 'ru'
|
|
13
|
+
| 'ko'
|
|
14
|
+
| 'ja'
|
|
15
|
+
| 'zh'
|
|
16
|
+
| 'de'
|
|
17
|
+
| 'fr'
|
|
18
|
+
| 'es'
|
|
19
|
+
| 'it'
|
|
20
|
+
| 'pt'
|
|
21
|
+
| 'pt-BR'
|
|
22
|
+
| 'ar'
|
|
23
|
+
| 'nl'
|
|
24
|
+
| 'tr'
|
|
25
|
+
| 'pl'
|
|
26
|
+
| 'sv'
|
|
27
|
+
| 'no'
|
|
28
|
+
| 'da'
|
|
29
|
+
| 'uk'
|
|
30
|
+
| 'hi'
|
|
31
|
+
| string;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Language names map
|
|
35
|
+
*/
|
|
36
|
+
export const LANGUAGE_NAMES: Record<string, string> = {
|
|
37
|
+
en: 'English',
|
|
38
|
+
ru: 'Russian',
|
|
39
|
+
ko: 'Korean',
|
|
40
|
+
ja: 'Japanese',
|
|
41
|
+
zh: 'Chinese',
|
|
42
|
+
de: 'German',
|
|
43
|
+
fr: 'French',
|
|
44
|
+
es: 'Spanish',
|
|
45
|
+
it: 'Italian',
|
|
46
|
+
pt: 'Portuguese',
|
|
47
|
+
'pt-BR': 'Brazilian Portuguese',
|
|
48
|
+
ar: 'Arabic',
|
|
49
|
+
nl: 'Dutch',
|
|
50
|
+
tr: 'Turkish',
|
|
51
|
+
pl: 'Polish',
|
|
52
|
+
sv: 'Swedish',
|
|
53
|
+
no: 'Norwegian',
|
|
54
|
+
da: 'Danish',
|
|
55
|
+
uk: 'Ukrainian',
|
|
56
|
+
hi: 'Hindi',
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Translation options
|
|
61
|
+
*/
|
|
62
|
+
export interface TranslateOptions extends LLMRequestOptions {
|
|
63
|
+
/** Source language (default: 'en') */
|
|
64
|
+
sourceLanguage?: LanguageCode;
|
|
65
|
+
/** Max retries on validation failure */
|
|
66
|
+
maxRetries?: number;
|
|
67
|
+
/** Preserve placeholders like {name}, {{var}} */
|
|
68
|
+
preservePlaceholders?: boolean;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Translation result
|
|
73
|
+
*/
|
|
74
|
+
export interface TranslateResult<T> {
|
|
75
|
+
/** Translated data */
|
|
76
|
+
data: T;
|
|
77
|
+
/** Whether translation is valid (keys preserved) */
|
|
78
|
+
valid: boolean;
|
|
79
|
+
/** Validation errors if any */
|
|
80
|
+
errors: string[];
|
|
81
|
+
/** Number of retries used */
|
|
82
|
+
retries: number;
|
|
83
|
+
/** Source language */
|
|
84
|
+
sourceLanguage: string;
|
|
85
|
+
/** Target language */
|
|
86
|
+
targetLanguage: string;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Batch translation item
|
|
91
|
+
*/
|
|
92
|
+
export interface BatchTranslateItem<T> {
|
|
93
|
+
/** Data to translate */
|
|
94
|
+
data: T;
|
|
95
|
+
/** Target language */
|
|
96
|
+
targetLanguage: LanguageCode;
|
|
97
|
+
/** Options override */
|
|
98
|
+
options?: TranslateOptions;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Cache stats
|
|
103
|
+
*/
|
|
104
|
+
export interface CacheStats {
|
|
105
|
+
memorySize: number;
|
|
106
|
+
hits: number;
|
|
107
|
+
misses: number;
|
|
108
|
+
languagePairs: Array<{
|
|
109
|
+
pair: string;
|
|
110
|
+
translations: number;
|
|
111
|
+
}>;
|
|
112
|
+
}
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Translation validation
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { extractPlaceholders } from './text-utils';
|
|
6
|
+
|
|
7
|
+
export interface ValidationResult {
|
|
8
|
+
valid: boolean;
|
|
9
|
+
errors: string[];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Validate that JSON keys are not translated
|
|
14
|
+
*/
|
|
15
|
+
export function validateJsonKeys(
|
|
16
|
+
original: unknown,
|
|
17
|
+
translated: unknown,
|
|
18
|
+
path: string = ''
|
|
19
|
+
): ValidationResult {
|
|
20
|
+
const errors: string[] = [];
|
|
21
|
+
|
|
22
|
+
// Type check
|
|
23
|
+
if (typeof original !== typeof translated) {
|
|
24
|
+
errors.push(`Type mismatch at ${path || 'root'}: expected ${typeof original}, got ${typeof translated}`);
|
|
25
|
+
return { valid: false, errors };
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Null check
|
|
29
|
+
if (original === null || translated === null) {
|
|
30
|
+
if (original !== translated) {
|
|
31
|
+
errors.push(`Null mismatch at ${path || 'root'}`);
|
|
32
|
+
}
|
|
33
|
+
return { valid: errors.length === 0, errors };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Array
|
|
37
|
+
if (Array.isArray(original)) {
|
|
38
|
+
if (!Array.isArray(translated)) {
|
|
39
|
+
errors.push(`Expected array at ${path || 'root'}`);
|
|
40
|
+
return { valid: false, errors };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (original.length !== translated.length) {
|
|
44
|
+
errors.push(
|
|
45
|
+
`Array length mismatch at ${path || 'root'}: expected ${original.length}, got ${translated.length}`
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
const minLen = Math.min(original.length, translated.length);
|
|
50
|
+
for (let i = 0; i < minLen; i++) {
|
|
51
|
+
const result = validateJsonKeys(
|
|
52
|
+
original[i],
|
|
53
|
+
translated[i],
|
|
54
|
+
`${path}[${i}]`
|
|
55
|
+
);
|
|
56
|
+
errors.push(...result.errors);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return { valid: errors.length === 0, errors };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Object
|
|
63
|
+
if (typeof original === 'object') {
|
|
64
|
+
if (typeof translated !== 'object' || Array.isArray(translated)) {
|
|
65
|
+
errors.push(`Expected object at ${path || 'root'}`);
|
|
66
|
+
return { valid: false, errors };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const origKeys = Object.keys(original as Record<string, unknown>);
|
|
70
|
+
const transKeys = Object.keys(translated as Record<string, unknown>);
|
|
71
|
+
|
|
72
|
+
// Check for missing keys
|
|
73
|
+
for (const key of origKeys) {
|
|
74
|
+
if (!transKeys.includes(key)) {
|
|
75
|
+
errors.push(`Missing key: ${path ? `${path}.${key}` : key}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Check for extra keys (keys that were translated!)
|
|
80
|
+
for (const key of transKeys) {
|
|
81
|
+
if (!origKeys.includes(key)) {
|
|
82
|
+
errors.push(
|
|
83
|
+
`Unexpected key: ${path ? `${path}.${key}` : key} (key was translated?)`
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Recurse into matching keys
|
|
89
|
+
for (const key of origKeys) {
|
|
90
|
+
if (transKeys.includes(key)) {
|
|
91
|
+
const result = validateJsonKeys(
|
|
92
|
+
(original as Record<string, unknown>)[key],
|
|
93
|
+
(translated as Record<string, unknown>)[key],
|
|
94
|
+
path ? `${path}.${key}` : key
|
|
95
|
+
);
|
|
96
|
+
errors.push(...result.errors);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return { valid: errors.length === 0, errors };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Primitives (string, number, boolean) - no validation needed for structure
|
|
104
|
+
return { valid: true, errors: [] };
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Validate that placeholders are preserved
|
|
109
|
+
*/
|
|
110
|
+
export function validatePlaceholders(
|
|
111
|
+
original: unknown,
|
|
112
|
+
translated: unknown,
|
|
113
|
+
path: string = ''
|
|
114
|
+
): ValidationResult {
|
|
115
|
+
const errors: string[] = [];
|
|
116
|
+
|
|
117
|
+
if (typeof original === 'string' && typeof translated === 'string') {
|
|
118
|
+
const origPlaceholders = extractPlaceholders(original);
|
|
119
|
+
const transPlaceholders = extractPlaceholders(translated);
|
|
120
|
+
|
|
121
|
+
for (const placeholder of origPlaceholders) {
|
|
122
|
+
if (!transPlaceholders.includes(placeholder)) {
|
|
123
|
+
errors.push(
|
|
124
|
+
`Missing placeholder "${placeholder}" at ${path || 'root'}`
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
for (const placeholder of transPlaceholders) {
|
|
130
|
+
if (!origPlaceholders.includes(placeholder)) {
|
|
131
|
+
errors.push(
|
|
132
|
+
`Extra placeholder "${placeholder}" at ${path || 'root'}`
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
} else if (Array.isArray(original) && Array.isArray(translated)) {
|
|
137
|
+
const minLen = Math.min(original.length, translated.length);
|
|
138
|
+
for (let i = 0; i < minLen; i++) {
|
|
139
|
+
const result = validatePlaceholders(
|
|
140
|
+
original[i],
|
|
141
|
+
translated[i],
|
|
142
|
+
`${path}[${i}]`
|
|
143
|
+
);
|
|
144
|
+
errors.push(...result.errors);
|
|
145
|
+
}
|
|
146
|
+
} else if (
|
|
147
|
+
typeof original === 'object' &&
|
|
148
|
+
original !== null &&
|
|
149
|
+
typeof translated === 'object' &&
|
|
150
|
+
translated !== null
|
|
151
|
+
) {
|
|
152
|
+
for (const key of Object.keys(original as Record<string, unknown>)) {
|
|
153
|
+
if (key in (translated as Record<string, unknown>)) {
|
|
154
|
+
const result = validatePlaceholders(
|
|
155
|
+
(original as Record<string, unknown>)[key],
|
|
156
|
+
(translated as Record<string, unknown>)[key],
|
|
157
|
+
path ? `${path}.${key}` : key
|
|
158
|
+
);
|
|
159
|
+
errors.push(...result.errors);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
return { valid: errors.length === 0, errors };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Full validation of translation
|
|
169
|
+
*/
|
|
170
|
+
export function validateTranslation(
|
|
171
|
+
original: unknown,
|
|
172
|
+
translated: unknown
|
|
173
|
+
): ValidationResult {
|
|
174
|
+
const keyResult = validateJsonKeys(original, translated);
|
|
175
|
+
const placeholderResult = validatePlaceholders(original, translated);
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
valid: keyResult.valid && placeholderResult.valid,
|
|
179
|
+
errors: [...keyResult.errors, ...placeholderResult.errors],
|
|
180
|
+
};
|
|
181
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM types
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
export type LLMProvider = 'openai' | 'anthropic' | 'sdkrouter';
|
|
6
|
+
|
|
7
|
+
export type OpenAIModel =
|
|
8
|
+
| 'gpt-4o'
|
|
9
|
+
| 'gpt-4o-mini'
|
|
10
|
+
| 'gpt-4-turbo'
|
|
11
|
+
| 'gpt-4'
|
|
12
|
+
| 'gpt-3.5-turbo';
|
|
13
|
+
|
|
14
|
+
export type AnthropicModel =
|
|
15
|
+
| 'claude-3-5-sonnet-latest'
|
|
16
|
+
| 'claude-3-5-haiku-latest'
|
|
17
|
+
| 'claude-3-opus-latest';
|
|
18
|
+
|
|
19
|
+
export type LLMModel = OpenAIModel | AnthropicModel | string;
|
|
20
|
+
|
|
21
|
+
export interface LLMMessage {
|
|
22
|
+
role: 'system' | 'user' | 'assistant';
|
|
23
|
+
content: string;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface LLMConfig {
|
|
27
|
+
/** Provider (auto-detected from env if not specified) */
|
|
28
|
+
provider?: LLMProvider;
|
|
29
|
+
/** API key (from env if not specified) */
|
|
30
|
+
apiKey?: string;
|
|
31
|
+
/** Default model */
|
|
32
|
+
model?: LLMModel;
|
|
33
|
+
/** Default temperature (0-1) */
|
|
34
|
+
temperature?: number;
|
|
35
|
+
/** Default max tokens */
|
|
36
|
+
maxTokens?: number;
|
|
37
|
+
/** Base URL override */
|
|
38
|
+
baseUrl?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface LLMRequestOptions {
|
|
42
|
+
/** Model override */
|
|
43
|
+
model?: LLMModel;
|
|
44
|
+
/** Temperature override (0-1) */
|
|
45
|
+
temperature?: number;
|
|
46
|
+
/** Max tokens override */
|
|
47
|
+
maxTokens?: number;
|
|
48
|
+
/** System prompt */
|
|
49
|
+
system?: string;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export interface LLMResponse {
|
|
53
|
+
content: string;
|
|
54
|
+
model: string;
|
|
55
|
+
usage?: {
|
|
56
|
+
promptTokens: number;
|
|
57
|
+
completionTokens: number;
|
|
58
|
+
totalTokens: number;
|
|
59
|
+
};
|
|
60
|
+
finishReason?: string;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export interface LLMClient {
|
|
64
|
+
/** Provider name */
|
|
65
|
+
provider: LLMProvider;
|
|
66
|
+
|
|
67
|
+
/** Send chat message */
|
|
68
|
+
chat(prompt: string, options?: LLMRequestOptions): Promise<LLMResponse>;
|
|
69
|
+
|
|
70
|
+
/** Send chat messages */
|
|
71
|
+
chatMessages(
|
|
72
|
+
messages: LLMMessage[],
|
|
73
|
+
options?: LLMRequestOptions
|
|
74
|
+
): Promise<LLMResponse>;
|
|
75
|
+
|
|
76
|
+
/** Get JSON response */
|
|
77
|
+
json<T = unknown>(prompt: string, options?: LLMRequestOptions): Promise<T>;
|
|
78
|
+
|
|
79
|
+
/** Get JSON response with schema hint */
|
|
80
|
+
jsonSchema<T = unknown>(
|
|
81
|
+
prompt: string,
|
|
82
|
+
schema: string,
|
|
83
|
+
options?: LLMRequestOptions
|
|
84
|
+
): Promise<T>;
|
|
85
|
+
}
|
package/src/utils/env.ts
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { LLMProvider } from '../types';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Get API key from environment
|
|
9
|
+
*/
|
|
10
|
+
export function getApiKey(provider?: LLMProvider): string | undefined {
|
|
11
|
+
if (provider === 'sdkrouter') {
|
|
12
|
+
return process.env.SDKROUTER_API_KEY;
|
|
13
|
+
}
|
|
14
|
+
if (provider === 'openai') {
|
|
15
|
+
return process.env.OPENAI_API_KEY;
|
|
16
|
+
}
|
|
17
|
+
if (provider === 'anthropic') {
|
|
18
|
+
return process.env.ANTHROPIC_API_KEY;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Auto-detect (sdkrouter first as preferred)
|
|
22
|
+
return (
|
|
23
|
+
process.env.SDKROUTER_API_KEY ||
|
|
24
|
+
process.env.OPENAI_API_KEY ||
|
|
25
|
+
process.env.ANTHROPIC_API_KEY
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Detect provider from environment
|
|
31
|
+
*/
|
|
32
|
+
export function detectProvider(): LLMProvider | undefined {
|
|
33
|
+
// Explicit provider
|
|
34
|
+
const explicit = process.env.LLM_PROVIDER?.toLowerCase();
|
|
35
|
+
if (explicit === 'sdkrouter' || explicit === 'openai' || explicit === 'anthropic') {
|
|
36
|
+
return explicit;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Auto-detect from API keys (sdkrouter preferred)
|
|
40
|
+
if (process.env.SDKROUTER_API_KEY) {
|
|
41
|
+
return 'sdkrouter';
|
|
42
|
+
}
|
|
43
|
+
if (process.env.OPENAI_API_KEY) {
|
|
44
|
+
return 'openai';
|
|
45
|
+
}
|
|
46
|
+
if (process.env.ANTHROPIC_API_KEY) {
|
|
47
|
+
return 'anthropic';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return undefined;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Get default model for provider
|
|
55
|
+
*/
|
|
56
|
+
export function getDefaultModel(provider: LLMProvider): string {
|
|
57
|
+
if (provider === 'sdkrouter') {
|
|
58
|
+
return process.env.SDKROUTER_MODEL || '@balanced';
|
|
59
|
+
}
|
|
60
|
+
if (provider === 'openai') {
|
|
61
|
+
return process.env.OPENAI_MODEL || 'gpt-4o-mini';
|
|
62
|
+
}
|
|
63
|
+
if (provider === 'anthropic') {
|
|
64
|
+
return process.env.ANTHROPIC_MODEL || 'claude-3-5-haiku-latest';
|
|
65
|
+
}
|
|
66
|
+
return '@balanced';
|
|
67
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON parsing utilities
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Extract JSON from LLM response (handles markdown code blocks)
|
|
7
|
+
*/
|
|
8
|
+
export function extractJson<T = unknown>(text: string): T {
|
|
9
|
+
let jsonStr = text.trim();
|
|
10
|
+
|
|
11
|
+
// Remove markdown code blocks
|
|
12
|
+
if (jsonStr.startsWith('```json')) {
|
|
13
|
+
jsonStr = jsonStr.slice(7);
|
|
14
|
+
} else if (jsonStr.startsWith('```')) {
|
|
15
|
+
jsonStr = jsonStr.slice(3);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (jsonStr.endsWith('```')) {
|
|
19
|
+
jsonStr = jsonStr.slice(0, -3);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
jsonStr = jsonStr.trim();
|
|
23
|
+
|
|
24
|
+
// Try to find JSON object or array
|
|
25
|
+
const jsonStart = jsonStr.search(/[\[{]/);
|
|
26
|
+
const jsonEndBracket = jsonStr.lastIndexOf(']');
|
|
27
|
+
const jsonEndBrace = jsonStr.lastIndexOf('}');
|
|
28
|
+
const jsonEnd = Math.max(jsonEndBracket, jsonEndBrace);
|
|
29
|
+
|
|
30
|
+
if (jsonStart !== -1 && jsonEnd !== -1) {
|
|
31
|
+
jsonStr = jsonStr.slice(jsonStart, jsonEnd + 1);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
try {
|
|
35
|
+
return JSON.parse(jsonStr) as T;
|
|
36
|
+
} catch (error) {
|
|
37
|
+
throw new Error(
|
|
38
|
+
`Failed to parse JSON from LLM response: ${error instanceof Error ? error.message : 'Unknown error'}\n\nResponse:\n${text}`
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Re-export from translator validator for backwards compatibility
|
|
44
|
+
export { validateJsonKeys } from '../translator/validator';
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema utilities for structured LLM outputs
|
|
3
|
+
*
|
|
4
|
+
* Provides helpers for working with Zod schemas and LLM structured outputs.
|
|
5
|
+
* Zod is optional - only used if installed.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { LLMClient, LLMRequestOptions, LLMResponse } from '../types';
|
|
9
|
+
import { extractJson } from './json';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Minimal Zod-like schema interface
|
|
13
|
+
* Compatible with Zod but doesn't require it
|
|
14
|
+
*/
|
|
15
|
+
export interface ZodLikeSchema<T = unknown> {
|
|
16
|
+
parse(data: unknown): T;
|
|
17
|
+
safeParse(data: unknown): { success: true; data: T } | { success: false; error: unknown };
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* JSON Schema representation for LLM prompts
|
|
22
|
+
*/
|
|
23
|
+
export interface JsonSchemaDefinition {
|
|
24
|
+
type: string;
|
|
25
|
+
properties?: Record<string, JsonSchemaDefinition>;
|
|
26
|
+
items?: JsonSchemaDefinition;
|
|
27
|
+
required?: string[];
|
|
28
|
+
description?: string;
|
|
29
|
+
enum?: unknown[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generate a simple JSON schema string from a TypeScript type example
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```ts
|
|
37
|
+
* const schema = generateSchemaFromExample({ name: '', age: 0, active: true })
|
|
38
|
+
* // { type: "object", properties: { name: { type: "string" }, ... } }
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export function generateSchemaFromExample(example: unknown): JsonSchemaDefinition {
|
|
42
|
+
if (example === null) {
|
|
43
|
+
return { type: 'null' };
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (Array.isArray(example)) {
|
|
47
|
+
const itemSchema =
|
|
48
|
+
example.length > 0 ? generateSchemaFromExample(example[0]) : { type: 'string' };
|
|
49
|
+
return { type: 'array', items: itemSchema };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (typeof example === 'object') {
|
|
53
|
+
const properties: Record<string, JsonSchemaDefinition> = {};
|
|
54
|
+
const required: string[] = [];
|
|
55
|
+
|
|
56
|
+
for (const [key, value] of Object.entries(example)) {
|
|
57
|
+
properties[key] = generateSchemaFromExample(value);
|
|
58
|
+
required.push(key);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
return { type: 'object', properties, required };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
return { type: typeof example };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Convert JSON schema to a prompt-friendly string
|
|
69
|
+
*/
|
|
70
|
+
export function schemaToPromptString(schema: JsonSchemaDefinition): string {
|
|
71
|
+
return JSON.stringify(schema, null, 2);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Get structured output from LLM with validation
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* ```ts
|
|
79
|
+
* import { z } from 'zod'
|
|
80
|
+
*
|
|
81
|
+
* const UserSchema = z.object({
|
|
82
|
+
* name: z.string(),
|
|
83
|
+
* age: z.number(),
|
|
84
|
+
* })
|
|
85
|
+
*
|
|
86
|
+
* const user = await getStructuredOutput(llm, 'Extract user info from: John is 25', UserSchema)
|
|
87
|
+
* // { name: 'John', age: 25 }
|
|
88
|
+
* ```
|
|
89
|
+
*/
|
|
90
|
+
export async function getStructuredOutput<T>(
|
|
91
|
+
client: LLMClient,
|
|
92
|
+
prompt: string,
|
|
93
|
+
schema: ZodLikeSchema<T>,
|
|
94
|
+
options?: LLMRequestOptions
|
|
95
|
+
): Promise<{ data: T; response: LLMResponse } | { error: string; raw: unknown }> {
|
|
96
|
+
const response = await client.chat(prompt, {
|
|
97
|
+
...options,
|
|
98
|
+
temperature: 0,
|
|
99
|
+
system: options?.system ?? 'Return ONLY valid JSON matching the requested structure.',
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
try {
|
|
103
|
+
const parsed = extractJson(response.content);
|
|
104
|
+
const result = schema.safeParse(parsed);
|
|
105
|
+
|
|
106
|
+
if (result.success) {
|
|
107
|
+
return { data: result.data, response };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
error: 'Schema validation failed',
|
|
112
|
+
raw: parsed,
|
|
113
|
+
};
|
|
114
|
+
} catch (error) {
|
|
115
|
+
return {
|
|
116
|
+
error: error instanceof Error ? error.message : 'Failed to parse JSON',
|
|
117
|
+
raw: response.content,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* Get structured output with retry on validation failure
|
|
124
|
+
*/
|
|
125
|
+
export async function getStructuredOutputWithRetry<T>(
|
|
126
|
+
client: LLMClient,
|
|
127
|
+
prompt: string,
|
|
128
|
+
schema: ZodLikeSchema<T>,
|
|
129
|
+
options?: LLMRequestOptions & { maxRetries?: number }
|
|
130
|
+
): Promise<{ data: T; retries: number } | { error: string; raw: unknown; retries: number }> {
|
|
131
|
+
const maxRetries = options?.maxRetries ?? 2;
|
|
132
|
+
let retries = 0;
|
|
133
|
+
let lastError = '';
|
|
134
|
+
let lastRaw: unknown;
|
|
135
|
+
|
|
136
|
+
while (retries <= maxRetries) {
|
|
137
|
+
const result = await getStructuredOutput(client, prompt, schema, options);
|
|
138
|
+
|
|
139
|
+
if ('data' in result) {
|
|
140
|
+
return { data: result.data, retries };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
lastError = result.error;
|
|
144
|
+
lastRaw = result.raw;
|
|
145
|
+
retries++;
|
|
146
|
+
|
|
147
|
+
if (retries <= maxRetries) {
|
|
148
|
+
console.warn(`Structured output validation failed (attempt ${retries}/${maxRetries + 1})`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return { error: lastError, raw: lastRaw, retries };
|
|
153
|
+
}
|