llm-fns 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/package.json +27 -0
- package/readme.md +299 -0
- package/scripts/release.sh +32 -0
- package/src/createLlmClient.spec.ts +42 -0
- package/src/createLlmClient.ts +389 -0
- package/src/createLlmRetryClient.ts +244 -0
- package/src/createZodLlmClient.spec.ts +76 -0
- package/src/createZodLlmClient.ts +378 -0
- package/src/index.ts +5 -0
- package/src/llmFactory.ts +26 -0
- package/src/retryUtils.ts +91 -0
- package/tests/basic.test.ts +47 -0
- package/tests/env.ts +16 -0
- package/tests/setup.ts +24 -0
- package/tests/zod.test.ts +178 -0
- package/tsconfig.json +22 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import crypto from 'crypto';
|
|
2
|
+
import OpenAI from "openai";
|
|
3
|
+
import type { Cache } from 'cache-manager'; // Using Cache from cache-manager
|
|
4
|
+
import type PQueue from 'p-queue';
|
|
5
|
+
import { executeWithRetry } from './retryUtils.js';
|
|
6
|
+
|
|
7
|
+
export function countChars(message: OpenAI.Chat.Completions.ChatCompletionMessageParam): number {
|
|
8
|
+
if (!message.content) return 0;
|
|
9
|
+
if (typeof message.content === 'string') {
|
|
10
|
+
return message.content.length;
|
|
11
|
+
}
|
|
12
|
+
if (Array.isArray(message.content)) {
|
|
13
|
+
return message.content.reduce((sum, part) => {
|
|
14
|
+
if (part.type === 'text') {
|
|
15
|
+
return sum + part.text.length;
|
|
16
|
+
}
|
|
17
|
+
if (part.type === 'image_url') {
|
|
18
|
+
return sum + 2500;
|
|
19
|
+
}
|
|
20
|
+
return sum;
|
|
21
|
+
}, 0);
|
|
22
|
+
}
|
|
23
|
+
return 0;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function truncateSingleMessage(message: OpenAI.Chat.Completions.ChatCompletionMessageParam, charLimit: number): OpenAI.Chat.Completions.ChatCompletionMessageParam {
|
|
27
|
+
const TRUNCATION_SUFFIX = '...[truncated]';
|
|
28
|
+
const messageCopy = JSON.parse(JSON.stringify(message));
|
|
29
|
+
|
|
30
|
+
if (charLimit <= 0) {
|
|
31
|
+
messageCopy.content = null;
|
|
32
|
+
return messageCopy;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (!messageCopy.content || countChars(messageCopy) <= charLimit) {
|
|
36
|
+
return messageCopy;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
if (typeof messageCopy.content === 'string') {
|
|
40
|
+
let newContent = messageCopy.content;
|
|
41
|
+
if (newContent.length > charLimit) {
|
|
42
|
+
if (charLimit > TRUNCATION_SUFFIX.length) {
|
|
43
|
+
newContent = newContent.substring(0, charLimit - TRUNCATION_SUFFIX.length) + TRUNCATION_SUFFIX;
|
|
44
|
+
} else {
|
|
45
|
+
newContent = newContent.substring(0, charLimit);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
messageCopy.content = newContent;
|
|
49
|
+
return messageCopy;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (Array.isArray(messageCopy.content)) {
|
|
53
|
+
// Complex case: multipart message.
|
|
54
|
+
// Strategy: consolidate text, remove images if needed, then truncate text.
|
|
55
|
+
const textParts = messageCopy.content.filter((p: any) => p.type === 'text');
|
|
56
|
+
const imageParts = messageCopy.content.filter((p: any) => p.type === 'image_url');
|
|
57
|
+
let combinedText = textParts.map((p: any) => p.text).join('\n');
|
|
58
|
+
let keptImages = [...imageParts];
|
|
59
|
+
|
|
60
|
+
while (combinedText.length + (keptImages.length * 2500) > charLimit && keptImages.length > 0) {
|
|
61
|
+
keptImages.pop(); // remove images from the end
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const imageChars = keptImages.length * 2500;
|
|
65
|
+
const textCharLimit = charLimit - imageChars;
|
|
66
|
+
|
|
67
|
+
if (combinedText.length > textCharLimit) {
|
|
68
|
+
if (textCharLimit > TRUNCATION_SUFFIX.length) {
|
|
69
|
+
combinedText = combinedText.substring(0, textCharLimit - TRUNCATION_SUFFIX.length) + TRUNCATION_SUFFIX;
|
|
70
|
+
} else if (textCharLimit >= 0) {
|
|
71
|
+
combinedText = combinedText.substring(0, textCharLimit);
|
|
72
|
+
} else {
|
|
73
|
+
combinedText = "";
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const newContent: OpenAI.Chat.Completions.ChatCompletionContentPart[] = [];
|
|
78
|
+
if (combinedText) {
|
|
79
|
+
newContent.push({ type: 'text', text: combinedText });
|
|
80
|
+
}
|
|
81
|
+
newContent.push(...keptImages);
|
|
82
|
+
messageCopy.content = newContent;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return messageCopy;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
export function truncateMessages(messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[], limit: number): OpenAI.Chat.Completions.ChatCompletionMessageParam[] {
|
|
90
|
+
const systemMessage = messages.find(m => m.role === 'system');
|
|
91
|
+
const otherMessages = messages.filter(m => m.role !== 'system');
|
|
92
|
+
|
|
93
|
+
let totalChars = otherMessages.reduce((sum: number, msg) => sum + countChars(msg), 0);
|
|
94
|
+
|
|
95
|
+
if (totalChars <= limit) {
|
|
96
|
+
return messages;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const mutableOtherMessages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = JSON.parse(JSON.stringify(otherMessages));
|
|
100
|
+
let excessChars = totalChars - limit;
|
|
101
|
+
|
|
102
|
+
// Truncate messages starting from the second one.
|
|
103
|
+
for (let i = 1; i < mutableOtherMessages.length; i++) {
|
|
104
|
+
if (excessChars <= 0) break;
|
|
105
|
+
|
|
106
|
+
const message = mutableOtherMessages[i];
|
|
107
|
+
const messageChars = countChars(message);
|
|
108
|
+
const charsToCut = Math.min(excessChars, messageChars);
|
|
109
|
+
|
|
110
|
+
const newCharCount = messageChars - charsToCut;
|
|
111
|
+
mutableOtherMessages[i] = truncateSingleMessage(message, newCharCount);
|
|
112
|
+
|
|
113
|
+
excessChars -= charsToCut;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// If still over limit, truncate the first message.
|
|
117
|
+
if (excessChars > 0) {
|
|
118
|
+
const firstMessage = mutableOtherMessages[0];
|
|
119
|
+
const firstMessageChars = countChars(firstMessage);
|
|
120
|
+
const charsToCut = Math.min(excessChars, firstMessageChars);
|
|
121
|
+
const newCharCount = firstMessageChars - charsToCut;
|
|
122
|
+
mutableOtherMessages[0] = truncateSingleMessage(firstMessage, newCharCount);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// Filter out empty messages (char count is 0)
|
|
126
|
+
const finalMessages = mutableOtherMessages.filter(msg => countChars(msg) > 0);
|
|
127
|
+
|
|
128
|
+
return systemMessage ? [systemMessage, ...finalMessages] : finalMessages;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function concatMessageText(messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[]): string {
|
|
132
|
+
const textParts: string[] = [];
|
|
133
|
+
for (const message of messages) {
|
|
134
|
+
if (message.content) {
|
|
135
|
+
if (typeof message.content === 'string') {
|
|
136
|
+
textParts.push(message.content);
|
|
137
|
+
} else if (Array.isArray(message.content)) {
|
|
138
|
+
for (const part of message.content) {
|
|
139
|
+
if (part.type === 'text') {
|
|
140
|
+
textParts.push(part.text);
|
|
141
|
+
} else if (part.type === 'image_url') {
|
|
142
|
+
textParts.push('[IMAGE]');
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return textParts.join(' ');
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function getPromptSummary(messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[]): string {
|
|
152
|
+
const fullText = concatMessageText(messages);
|
|
153
|
+
// Replace multiple whitespace chars with a single space and trim.
|
|
154
|
+
const cleanedText = fullText.replace(/\s+/g, ' ').trim();
|
|
155
|
+
// Truncate to a reasonable length.
|
|
156
|
+
const maxLength = 150;
|
|
157
|
+
if (cleanedText.length > maxLength) {
|
|
158
|
+
return cleanedText.substring(0, maxLength) + '...';
|
|
159
|
+
}
|
|
160
|
+
return cleanedText;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* The response format for OpenAI and OpenRouter.
|
|
165
|
+
* OpenRouter extends this with 'json_schema'.
|
|
166
|
+
*/
|
|
167
|
+
export type ModelConfig = string | ({ model?: string } & Record<string, any>);
|
|
168
|
+
|
|
169
|
+
export type OpenRouterResponseFormat =
|
|
170
|
+
| { type: 'text' | 'json_object' }
|
|
171
|
+
| {
|
|
172
|
+
type: 'json_schema';
|
|
173
|
+
json_schema: {
|
|
174
|
+
name: string;
|
|
175
|
+
strict?: boolean;
|
|
176
|
+
schema: object;
|
|
177
|
+
};
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Options for the individual "prompt" function calls.
|
|
183
|
+
* These can override defaults or add call-specific parameters.
|
|
184
|
+
* 'messages' is a required property, inherited from OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming.
|
|
185
|
+
*/
|
|
186
|
+
export interface LlmPromptOptions extends Omit<OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, 'model' | 'response_format' | 'modalities' | 'messages'> {
|
|
187
|
+
messages: string | OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
188
|
+
model?: ModelConfig; // Allow overriding the default model for a specific call
|
|
189
|
+
ttl?: number; // Cache TTL in *MILLISECONDS* for this specific call, used if cache is enabled
|
|
190
|
+
retries?: number; // Number of retries for the API call.
|
|
191
|
+
/** @deprecated Use `reasoning` object instead. */
|
|
192
|
+
response_format?: OpenRouterResponseFormat;
|
|
193
|
+
modalities?: string[];
|
|
194
|
+
image_config?: {
|
|
195
|
+
aspect_ratio?: string;
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Options required to create an instance of the LlmClient.
|
|
201
|
+
* These are the core dependencies.
|
|
202
|
+
*/
|
|
203
|
+
export interface CreateLlmClientParams {
|
|
204
|
+
openai: OpenAI;
|
|
205
|
+
cache?: Cache; // Cache instance is now optional. Expect a cache-manager compatible instance if provided.
|
|
206
|
+
defaultModel: ModelConfig; // The default OpenAI model to use if not overridden in LlmPromptOptions
|
|
207
|
+
maxConversationChars?: number;
|
|
208
|
+
queue?: PQueue;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
export function normalizeOptions(arg1: string | LlmPromptOptions, arg2?: Omit<LlmPromptOptions, 'messages'>): LlmPromptOptions {
|
|
212
|
+
if (typeof arg1 === 'string') {
|
|
213
|
+
return {
|
|
214
|
+
messages: [{ role: 'user', content: arg1 }],
|
|
215
|
+
...arg2
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
const options = arg1;
|
|
219
|
+
if (typeof options.messages === 'string') {
|
|
220
|
+
return {
|
|
221
|
+
...options,
|
|
222
|
+
messages: [{ role: 'user', content: options.messages }]
|
|
223
|
+
};
|
|
224
|
+
}
|
|
225
|
+
return options;
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/**
|
|
229
|
+
* Factory function that creates a GPT "prompt" function, with optional caching.
|
|
230
|
+
* @param params - The core dependencies (API key, base URL, default model, and optional cache instance).
|
|
231
|
+
* @returns An async function `prompt` ready to make OpenAI calls, with caching if configured.
|
|
232
|
+
*/
|
|
233
|
+
export function createLlmClient(params: CreateLlmClientParams) {
|
|
234
|
+
const { openai, cache: cacheInstance, defaultModel: factoryDefaultModel, maxConversationChars, queue } = params;
|
|
235
|
+
|
|
236
|
+
const getCompletionParamsAndCacheKey = (options: LlmPromptOptions) => {
|
|
237
|
+
const { ttl, model: callSpecificModel, messages, reasoning_effort, retries, ...restApiOptions } = options;
|
|
238
|
+
|
|
239
|
+
// Ensure messages is an array (it should be if normalized, but for safety/types)
|
|
240
|
+
const messagesArray = typeof messages === 'string'
|
|
241
|
+
? [{ role: 'user', content: messages }] as OpenAI.Chat.Completions.ChatCompletionMessageParam[]
|
|
242
|
+
: messages;
|
|
243
|
+
|
|
244
|
+
const finalMessages = maxConversationChars ? truncateMessages(messagesArray, maxConversationChars) : messagesArray;
|
|
245
|
+
|
|
246
|
+
const baseConfig = typeof factoryDefaultModel === 'object' && factoryDefaultModel !== null
|
|
247
|
+
? factoryDefaultModel
|
|
248
|
+
: (typeof factoryDefaultModel === 'string' ? { model: factoryDefaultModel } : {});
|
|
249
|
+
|
|
250
|
+
const overrideConfig = typeof callSpecificModel === 'object' && callSpecificModel !== null
|
|
251
|
+
? callSpecificModel
|
|
252
|
+
: (typeof callSpecificModel === 'string' ? { model: callSpecificModel } : {});
|
|
253
|
+
|
|
254
|
+
const modelConfig = { ...baseConfig, ...overrideConfig };
|
|
255
|
+
|
|
256
|
+
const { model: modelToUse, ...modelParams } = modelConfig;
|
|
257
|
+
|
|
258
|
+
if (typeof modelToUse !== 'string' || !modelToUse) {
|
|
259
|
+
throw new Error('A model must be specified either in the default configuration or in the prompt options.');
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
const completionParams = {
|
|
263
|
+
...modelParams,
|
|
264
|
+
model: modelToUse,
|
|
265
|
+
messages: finalMessages,
|
|
266
|
+
...restApiOptions,
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
let cacheKey: string | undefined;
|
|
270
|
+
if (cacheInstance) {
|
|
271
|
+
const cacheKeyString = JSON.stringify(completionParams);
|
|
272
|
+
cacheKey = `gptask:${crypto.createHash('md5').update(cacheKeyString).digest('hex')}`;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
return { completionParams, cacheKey, ttl, modelToUse, finalMessages, retries };
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
async function prompt(content: string, options?: Omit<LlmPromptOptions, 'messages'>): Promise<OpenAI.Chat.Completions.ChatCompletion>;
|
|
279
|
+
async function prompt(options: LlmPromptOptions): Promise<OpenAI.Chat.Completions.ChatCompletion>;
|
|
280
|
+
async function prompt(arg1: string | LlmPromptOptions, arg2?: Omit<LlmPromptOptions, 'messages'>): Promise<OpenAI.Chat.Completions.ChatCompletion> {
|
|
281
|
+
const options = normalizeOptions(arg1, arg2);
|
|
282
|
+
const { completionParams, cacheKey, ttl, modelToUse, finalMessages, retries } = getCompletionParamsAndCacheKey(options);
|
|
283
|
+
|
|
284
|
+
if (cacheInstance && cacheKey) {
|
|
285
|
+
try {
|
|
286
|
+
const cachedResponse = await cacheInstance.get<string>(cacheKey);
|
|
287
|
+
if (cachedResponse !== undefined && cachedResponse !== null) {
|
|
288
|
+
return JSON.parse(cachedResponse);
|
|
289
|
+
}
|
|
290
|
+
} catch (error) {
|
|
291
|
+
console.warn("Cache get error:", error);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
const promptSummary = getPromptSummary(finalMessages);
|
|
296
|
+
|
|
297
|
+
const apiCallAndCache = async (): Promise<OpenAI.Chat.Completions.ChatCompletion> => {
|
|
298
|
+
const task = () => executeWithRetry<OpenAI.Chat.Completions.ChatCompletion, OpenAI.Chat.Completions.ChatCompletion>(
|
|
299
|
+
async () => {
|
|
300
|
+
return openai.chat.completions.create(completionParams as any);
|
|
301
|
+
},
|
|
302
|
+
async (completion) => {
|
|
303
|
+
return { isValid: true, data: completion };
|
|
304
|
+
},
|
|
305
|
+
retries ?? 3,
|
|
306
|
+
undefined,
|
|
307
|
+
(error: any) => {
|
|
308
|
+
// Do not retry if the API key is invalid (401) or if the error code explicitly states it.
|
|
309
|
+
if (error?.status === 401 || error?.code === 'invalid_api_key') {
|
|
310
|
+
return false;
|
|
311
|
+
}
|
|
312
|
+
return true;
|
|
313
|
+
}
|
|
314
|
+
);
|
|
315
|
+
|
|
316
|
+
const response = (await (queue ? queue.add(task, { id: promptSummary } as any) : task())) as OpenAI.Chat.Completions.ChatCompletion;
|
|
317
|
+
|
|
318
|
+
if (cacheInstance && response && cacheKey) {
|
|
319
|
+
try {
|
|
320
|
+
await cacheInstance.set(cacheKey, JSON.stringify(response), ttl);
|
|
321
|
+
} catch (error) {
|
|
322
|
+
console.warn("Cache set error:", error);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return response;
|
|
326
|
+
};
|
|
327
|
+
|
|
328
|
+
return apiCallAndCache();
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
async function isPromptCached(content: string, options?: Omit<LlmPromptOptions, 'messages'>): Promise<boolean>;
|
|
332
|
+
async function isPromptCached(options: LlmPromptOptions): Promise<boolean>;
|
|
333
|
+
async function isPromptCached(arg1: string | LlmPromptOptions, arg2?: Omit<LlmPromptOptions, 'messages'>): Promise<boolean> {
|
|
334
|
+
const options = normalizeOptions(arg1, arg2);
|
|
335
|
+
const { cacheKey } = getCompletionParamsAndCacheKey(options);
|
|
336
|
+
if (!cacheInstance || !cacheKey) {
|
|
337
|
+
return false;
|
|
338
|
+
}
|
|
339
|
+
try {
|
|
340
|
+
const cachedResponse = await cacheInstance.get<string>(cacheKey);
|
|
341
|
+
return cachedResponse !== undefined && cachedResponse !== null;
|
|
342
|
+
} catch (error) {
|
|
343
|
+
console.warn("Cache get error:", error);
|
|
344
|
+
return false;
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
async function promptText(content: string, options?: Omit<LlmPromptOptions, 'messages'>): Promise<string>;
|
|
349
|
+
async function promptText(options: LlmPromptOptions): Promise<string>;
|
|
350
|
+
async function promptText(arg1: string | LlmPromptOptions, arg2?: Omit<LlmPromptOptions, 'messages'>): Promise<string> {
|
|
351
|
+
const options = normalizeOptions(arg1, arg2);
|
|
352
|
+
const response = await prompt(options);
|
|
353
|
+
const content = response.choices[0]?.message?.content;
|
|
354
|
+
if (content === null || content === undefined) {
|
|
355
|
+
throw new Error("LLM returned no text content.");
|
|
356
|
+
}
|
|
357
|
+
return content;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async function promptImage(content: string, options?: Omit<LlmPromptOptions, 'messages'>): Promise<Buffer>;
|
|
361
|
+
async function promptImage(options: LlmPromptOptions): Promise<Buffer>;
|
|
362
|
+
async function promptImage(arg1: string | LlmPromptOptions, arg2?: Omit<LlmPromptOptions, 'messages'>): Promise<Buffer> {
|
|
363
|
+
const options = normalizeOptions(arg1, arg2);
|
|
364
|
+
const response = await prompt(options);
|
|
365
|
+
const message = response.choices[0]?.message as any;
|
|
366
|
+
|
|
367
|
+
if (message.images && Array.isArray(message.images) && message.images.length > 0) {
|
|
368
|
+
const imageUrl = message.images[0].image_url.url;
|
|
369
|
+
if (typeof imageUrl === 'string') {
|
|
370
|
+
if (imageUrl.startsWith('http')) {
|
|
371
|
+
const imgRes = await fetch(imageUrl);
|
|
372
|
+
const arrayBuffer = await imgRes.arrayBuffer();
|
|
373
|
+
return Buffer.from(arrayBuffer);
|
|
374
|
+
} else {
|
|
375
|
+
const base64Data = imageUrl.replace(/^data:image\/\w+;base64,/, "");
|
|
376
|
+
return Buffer.from(base64Data, 'base64');
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
throw new Error("LLM returned no image content.");
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return { prompt, isPromptCached, promptText, promptImage };
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
export type PromptFunction = ReturnType<typeof createLlmClient>['prompt'];
|
|
387
|
+
export type IsPromptCachedFunction = ReturnType<typeof createLlmClient>['isPromptCached'];
|
|
388
|
+
export type PromptTextFunction = ReturnType<typeof createLlmClient>['promptText'];
|
|
389
|
+
export type PromptImageFunction = ReturnType<typeof createLlmClient>['promptImage'];
|
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import OpenAI from 'openai';
|
|
2
|
+
import { PromptFunction, LlmPromptOptions, normalizeOptions } from "./createLlmClient.js";
|
|
3
|
+
|
|
4
|
+
// Custom error for the querier to handle, allowing retries with structured feedback.
|
|
5
|
+
export class LlmRetryError extends Error {
|
|
6
|
+
constructor(
|
|
7
|
+
public readonly message: string,
|
|
8
|
+
public readonly type: 'JSON_PARSE_ERROR' | 'CUSTOM_ERROR',
|
|
9
|
+
public readonly details?: any,
|
|
10
|
+
public readonly rawResponse?: string | null,
|
|
11
|
+
) {
|
|
12
|
+
super(message);
|
|
13
|
+
this.name = 'LlmRetryError';
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export class LlmRetryExhaustedError extends Error {
|
|
18
|
+
constructor(
|
|
19
|
+
public readonly message: string,
|
|
20
|
+
options?: ErrorOptions
|
|
21
|
+
) {
|
|
22
|
+
super(message, options);
|
|
23
|
+
this.name = 'LlmRetryExhaustedError';
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// This error is thrown by LlmRetryClient for each failed attempt.
|
|
28
|
+
// It wraps the underlying error (from API call or validation) and adds context.
|
|
29
|
+
export class LlmRetryAttemptError extends Error {
|
|
30
|
+
constructor(
|
|
31
|
+
public readonly message: string,
|
|
32
|
+
public readonly mode: 'main' | 'fallback',
|
|
33
|
+
public readonly conversation: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
|
|
34
|
+
public readonly attemptNumber: number,
|
|
35
|
+
options?: ErrorOptions
|
|
36
|
+
) {
|
|
37
|
+
super(message, options);
|
|
38
|
+
this.name = 'LlmRetryAttemptError';
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface LlmRetryResponseInfo {
|
|
43
|
+
mode: 'main' | 'fallback';
|
|
44
|
+
conversation: OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
45
|
+
attemptNumber: number;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export type LlmRetryOptions<T = any> = LlmPromptOptions & {
|
|
49
|
+
maxRetries?: number;
|
|
50
|
+
validate?: (response: any, info: LlmRetryResponseInfo) => Promise<T>;
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
export interface CreateLlmRetryClientParams {
|
|
54
|
+
prompt: PromptFunction;
|
|
55
|
+
fallbackPrompt?: PromptFunction;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function constructLlmMessages(
|
|
59
|
+
initialMessages: OpenAI.Chat.Completions.ChatCompletionMessageParam[],
|
|
60
|
+
attemptNumber: number,
|
|
61
|
+
previousError?: LlmRetryAttemptError
|
|
62
|
+
): OpenAI.Chat.Completions.ChatCompletionMessageParam[] {
|
|
63
|
+
if (attemptNumber === 0) {
|
|
64
|
+
// First attempt
|
|
65
|
+
return initialMessages;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
if (!previousError) {
|
|
69
|
+
// Should not happen for attempt > 0, but as a safeguard...
|
|
70
|
+
throw new Error("Invariant violation: previousError is missing for a retry attempt.");
|
|
71
|
+
}
|
|
72
|
+
const cause = previousError.cause;
|
|
73
|
+
|
|
74
|
+
if (!(cause instanceof LlmRetryError)) {
|
|
75
|
+
throw Error('cause must be an instanceof LlmRetryError')
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const messages: OpenAI.Chat.Completions.ChatCompletionMessageParam[] = [...previousError.conversation];
|
|
79
|
+
|
|
80
|
+
messages.push({ role: "user", content: cause.message });
|
|
81
|
+
|
|
82
|
+
return messages;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function createLlmRetryClient(params: CreateLlmRetryClientParams) {
|
|
86
|
+
const { prompt, fallbackPrompt } = params;
|
|
87
|
+
|
|
88
|
+
async function runPromptLoop<T>(
|
|
89
|
+
options: LlmRetryOptions<T>,
|
|
90
|
+
responseType: 'raw' | 'text' | 'image'
|
|
91
|
+
): Promise<T> {
|
|
92
|
+
const { maxRetries = 3, validate, messages, ...restOptions } = options;
|
|
93
|
+
|
|
94
|
+
// Ensure messages is an array (normalizeOptions ensures this but types might be loose)
|
|
95
|
+
const initialMessages = messages as OpenAI.Chat.Completions.ChatCompletionMessageParam[];
|
|
96
|
+
|
|
97
|
+
let lastError: LlmRetryAttemptError | undefined;
|
|
98
|
+
|
|
99
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
100
|
+
const useFallback = !!fallbackPrompt && attempt > 0;
|
|
101
|
+
const currentPrompt = useFallback ? fallbackPrompt! : prompt;
|
|
102
|
+
const mode = useFallback ? 'fallback' : 'main';
|
|
103
|
+
|
|
104
|
+
const currentMessages = constructLlmMessages(
|
|
105
|
+
initialMessages,
|
|
106
|
+
attempt,
|
|
107
|
+
lastError
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
const completion = await currentPrompt({
|
|
112
|
+
messages: currentMessages,
|
|
113
|
+
...restOptions,
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
const assistantMessage = completion.choices[0]?.message;
|
|
117
|
+
let dataToProcess: any = completion;
|
|
118
|
+
|
|
119
|
+
if (responseType === 'text') {
|
|
120
|
+
const content = assistantMessage?.content;
|
|
121
|
+
if (content === null || content === undefined) {
|
|
122
|
+
throw new LlmRetryError("LLM returned no text content.", 'CUSTOM_ERROR', undefined, JSON.stringify(completion));
|
|
123
|
+
}
|
|
124
|
+
dataToProcess = content;
|
|
125
|
+
} else if (responseType === 'image') {
|
|
126
|
+
const messageAny = assistantMessage as any;
|
|
127
|
+
if (messageAny.images && Array.isArray(messageAny.images) && messageAny.images.length > 0) {
|
|
128
|
+
const imageUrl = messageAny.images[0].image_url.url;
|
|
129
|
+
if (typeof imageUrl === 'string') {
|
|
130
|
+
if (imageUrl.startsWith('http')) {
|
|
131
|
+
const imgRes = await fetch(imageUrl);
|
|
132
|
+
const arrayBuffer = await imgRes.arrayBuffer();
|
|
133
|
+
dataToProcess = Buffer.from(arrayBuffer);
|
|
134
|
+
} else {
|
|
135
|
+
const base64Data = imageUrl.replace(/^data:image\/\w+;base64,/, "");
|
|
136
|
+
dataToProcess = Buffer.from(base64Data, 'base64');
|
|
137
|
+
}
|
|
138
|
+
} else {
|
|
139
|
+
throw new LlmRetryError("LLM returned invalid image URL.", 'CUSTOM_ERROR', undefined, JSON.stringify(completion));
|
|
140
|
+
}
|
|
141
|
+
} else {
|
|
142
|
+
throw new LlmRetryError("LLM returned no image.", 'CUSTOM_ERROR', undefined, JSON.stringify(completion));
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Construct conversation history for success or potential error reporting
|
|
147
|
+
const finalConversation = [...currentMessages];
|
|
148
|
+
if (assistantMessage) {
|
|
149
|
+
finalConversation.push(assistantMessage);
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const info: LlmRetryResponseInfo = {
|
|
153
|
+
mode,
|
|
154
|
+
conversation: finalConversation,
|
|
155
|
+
attemptNumber: attempt,
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
if (validate) {
|
|
159
|
+
const result = await validate(dataToProcess, info);
|
|
160
|
+
return result;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return dataToProcess as T;
|
|
164
|
+
|
|
165
|
+
} catch (error: any) {
|
|
166
|
+
if (error instanceof LlmRetryError) {
|
|
167
|
+
// This is a recoverable error, so we'll create a detailed attempt error and continue the loop.
|
|
168
|
+
const conversationForError = [...currentMessages];
|
|
169
|
+
|
|
170
|
+
// If the error contains the raw response (e.g. the invalid text), add it to history
|
|
171
|
+
// so the LLM knows what it generated previously.
|
|
172
|
+
if (error.rawResponse) {
|
|
173
|
+
conversationForError.push({ role: 'assistant', content: error.rawResponse });
|
|
174
|
+
} else if (responseType === 'raw' && error.details) {
|
|
175
|
+
// For raw mode, if we have details, maybe we can infer something, but usually rawResponse is key.
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
lastError = new LlmRetryAttemptError(
|
|
179
|
+
`Attempt ${attempt + 1} failed.`,
|
|
180
|
+
mode,
|
|
181
|
+
conversationForError,
|
|
182
|
+
attempt,
|
|
183
|
+
{ cause: error }
|
|
184
|
+
);
|
|
185
|
+
} else {
|
|
186
|
+
// This is a non-recoverable error (e.g., network, API key), so we re-throw it immediately.
|
|
187
|
+
throw error;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
throw new LlmRetryExhaustedError(
|
|
193
|
+
`Operation failed after ${maxRetries + 1} attempts.`,
|
|
194
|
+
{ cause: lastError }
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
async function promptRetry<T = OpenAI.Chat.Completions.ChatCompletion>(
|
|
199
|
+
content: string,
|
|
200
|
+
options?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
201
|
+
): Promise<T>;
|
|
202
|
+
async function promptRetry<T = OpenAI.Chat.Completions.ChatCompletion>(
|
|
203
|
+
options: LlmRetryOptions<T>
|
|
204
|
+
): Promise<T>;
|
|
205
|
+
async function promptRetry<T = OpenAI.Chat.Completions.ChatCompletion>(
|
|
206
|
+
arg1: string | LlmRetryOptions<T>,
|
|
207
|
+
arg2?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
208
|
+
): Promise<T> {
|
|
209
|
+
const options = normalizeOptions(arg1, arg2) as LlmRetryOptions<T>;
|
|
210
|
+
return runPromptLoop(options, 'raw');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async function promptTextRetry<T = string>(
|
|
214
|
+
content: string,
|
|
215
|
+
options?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
216
|
+
): Promise<T>;
|
|
217
|
+
async function promptTextRetry<T = string>(
|
|
218
|
+
options: LlmRetryOptions<T>
|
|
219
|
+
): Promise<T>;
|
|
220
|
+
async function promptTextRetry<T = string>(
|
|
221
|
+
arg1: string | LlmRetryOptions<T>,
|
|
222
|
+
arg2?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
223
|
+
): Promise<T> {
|
|
224
|
+
const options = normalizeOptions(arg1, arg2) as LlmRetryOptions<T>;
|
|
225
|
+
return runPromptLoop(options, 'text');
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
async function promptImageRetry<T = Buffer>(
|
|
229
|
+
content: string,
|
|
230
|
+
options?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
231
|
+
): Promise<T>;
|
|
232
|
+
async function promptImageRetry<T = Buffer>(
|
|
233
|
+
options: LlmRetryOptions<T>
|
|
234
|
+
): Promise<T>;
|
|
235
|
+
async function promptImageRetry<T = Buffer>(
|
|
236
|
+
arg1: string | LlmRetryOptions<T>,
|
|
237
|
+
arg2?: Omit<LlmRetryOptions<T>, 'messages'>
|
|
238
|
+
): Promise<T> {
|
|
239
|
+
const options = normalizeOptions(arg1, arg2) as LlmRetryOptions<T>;
|
|
240
|
+
return runPromptLoop(options, 'image');
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { promptRetry, promptTextRetry, promptImageRetry };
|
|
244
|
+
}
|