@proofhound/llm-client 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +190 -0
  2. package/dist/cost.d.ts +10 -0
  3. package/dist/cost.d.ts.map +1 -0
  4. package/dist/cost.js +31 -0
  5. package/dist/cost.js.map +1 -0
  6. package/dist/image-preprocess.d.ts +6 -0
  7. package/dist/image-preprocess.d.ts.map +1 -0
  8. package/dist/image-preprocess.js +286 -0
  9. package/dist/image-preprocess.js.map +1 -0
  10. package/dist/index.d.ts +8 -0
  11. package/dist/index.d.ts.map +1 -0
  12. package/dist/index.js +26 -0
  13. package/dist/index.js.map +1 -0
  14. package/dist/invoke-streaming.d.ts +2 -0
  15. package/dist/invoke-streaming.d.ts.map +1 -0
  16. package/dist/invoke-streaming.js +3 -0
  17. package/dist/invoke-streaming.js.map +1 -0
  18. package/dist/invoke.d.ts +13 -0
  19. package/dist/invoke.d.ts.map +1 -0
  20. package/dist/invoke.js +706 -0
  21. package/dist/invoke.js.map +1 -0
  22. package/dist/json-parse.d.ts +2 -0
  23. package/dist/json-parse.d.ts.map +1 -0
  24. package/dist/json-parse.js +35 -0
  25. package/dist/json-parse.js.map +1 -0
  26. package/dist/payload-cap.d.ts +15 -0
  27. package/dist/payload-cap.d.ts.map +1 -0
  28. package/dist/payload-cap.js +27 -0
  29. package/dist/payload-cap.js.map +1 -0
  30. package/dist/providers/anthropic.adapter.d.ts +5 -0
  31. package/dist/providers/anthropic.adapter.d.ts.map +1 -0
  32. package/dist/providers/anthropic.adapter.js +145 -0
  33. package/dist/providers/anthropic.adapter.js.map +1 -0
  34. package/dist/providers/azure-openai.adapter.d.ts +4 -0
  35. package/dist/providers/azure-openai.adapter.d.ts.map +1 -0
  36. package/dist/providers/azure-openai.adapter.js +39 -0
  37. package/dist/providers/azure-openai.adapter.js.map +1 -0
  38. package/dist/providers/openai.adapter.d.ts +11 -0
  39. package/dist/providers/openai.adapter.d.ts.map +1 -0
  40. package/dist/providers/openai.adapter.js +126 -0
  41. package/dist/providers/openai.adapter.js.map +1 -0
  42. package/dist/token-estimate.d.ts +15 -0
  43. package/dist/token-estimate.d.ts.map +1 -0
  44. package/dist/token-estimate.js +25 -0
  45. package/dist/token-estimate.js.map +1 -0
  46. package/dist/types.d.ts +245 -0
  47. package/dist/types.d.ts.map +1 -0
  48. package/dist/types.js +3 -0
  49. package/dist/types.js.map +1 -0
  50. package/package.json +46 -0
package/dist/invoke.js ADDED
@@ -0,0 +1,706 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.invokeLLM = invokeLLM;
4
+ exports.testModelConnectivity = testModelConnectivity;
5
+ exports.defaultLLMAdapters = defaultLLMAdapters;
6
+ exports.resolveLLMAdapter = resolveLLMAdapter;
7
+ exports.normalizeLLMError = normalizeLLMError;
8
+ const node_crypto_1 = require("node:crypto");
9
+ const node_fs_1 = require("node:fs");
10
+ const node_path_1 = require("node:path");
11
+ const limiter_1 = require("@proofhound/limiter");
12
+ const cost_1 = require("./cost");
13
+ const image_preprocess_1 = require("./image-preprocess");
14
+ const payload_cap_1 = require("./payload-cap");
15
+ const anthropic_adapter_1 = require("./providers/anthropic.adapter");
16
+ const azure_openai_adapter_1 = require("./providers/azure-openai.adapter");
17
+ const openai_adapter_1 = require("./providers/openai.adapter");
18
+ const token_estimate_1 = require("./token-estimate");
19
+ const IMAGE_PROBE_URL = 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg';
20
+ const IMAGE_PROBE_FILE_NAME = 'qwen-vl-demo.jpeg';
21
+ const IMAGE_PROBE_MEDIA_TYPE = 'image/jpeg';
22
+ const IMAGE_PROBE_TEXT = 'Reply with "pong" if you can process this image input.';
23
+ let cachedImageProbeAsset;
24
+ const DEFAULT_TIMEOUT_MS = 300_000;
25
+ // Unified invokeLLM entrypoint
26
+ // Order: image pre-processing -> limiter.acquire -> provider.invoke -> application log -> [success: run_results] -> limiter.release
27
+ // On failure, only log; do NOT write run_results; after BullMQ retries are exhausted, the consumer writes the final error row in OnWorkerEvent('failed'),
28
+ // avoiding "the first failed error row blocks INSERT...WHERE NOT EXISTS so subsequent retry success cannot be persisted".
29
+ async function invokeLLM(args, deps) {
30
+ assertInvocationShape(args);
31
+ const startedAt = deps.now?.() ?? Date.now();
32
+ const controller = new AbortController();
33
+ const timeout = setTimeout(() => controller.abort(), args.timeoutMs ?? DEFAULT_TIMEOUT_MS);
34
+ let acquired = false;
35
+ let invocationArgs = args;
36
+ try {
37
+ invocationArgs = await (0, image_preprocess_1.preprocessLLMImageInputs)(args);
38
+ const params = normalizeInferenceParamsForProvider(invocationArgs.model, invocationArgs.params ?? {});
39
+ invocationArgs = { ...invocationArgs, params };
40
+ const estimated = (0, token_estimate_1.estimateLLMTokens)({
41
+ messages: invocationArgs.messages,
42
+ prompt: invocationArgs.prompt,
43
+ tools: params.tools,
44
+ responseFormat: params.responseFormat,
45
+ maxTokens: params.maxTokens,
46
+ });
47
+ const provider = resolveLLMAdapter(invocationArgs.model.providerType, deps.adapters);
48
+ const acquireResult = await deps.limiter.acquire({
49
+ key: invocationArgs.limiterKey,
50
+ estimatedTokens: estimated.totalTokens,
51
+ limits: {
52
+ rpmLimit: invocationArgs.model.rpmLimit,
53
+ tpmLimit: invocationArgs.model.tpmLimit,
54
+ concurrencyLimit: invocationArgs.model.concurrencyLimit,
55
+ },
56
+ autoConcurrency: invocationArgs.model.autoConcurrency,
57
+ });
58
+ acquired = true;
59
+ if (invocationArgs.model.autoConcurrency && acquireResult) {
60
+ deps.logger.debug?.({
61
+ modelId: invocationArgs.model.id,
62
+ effectiveConcurrency: acquireResult.effectiveConcurrency,
63
+ ceiling: invocationArgs.model.concurrencyLimit,
64
+ backoffFactor: acquireResult.backoffFactor,
65
+ latencyEwmaMs: acquireResult.latencyEwmaMs,
66
+ }, 'limiter_auto_concurrency');
67
+ }
68
+ const providerInvokeArgs = {
69
+ model: invocationArgs.model,
70
+ messages: invocationArgs.messages,
71
+ prompt: invocationArgs.prompt,
72
+ params,
73
+ signal: controller.signal,
74
+ };
75
+ logLLMRequest(deps.logger, invocationArgs, provider, providerInvokeArgs, estimated.totalTokens, args.maxRetries ?? 0);
76
+ const providerResult = await invokeProviderWithRetry(provider, providerInvokeArgs, {
77
+ maxRetries: args.maxRetries ?? 0,
78
+ signal: controller.signal,
79
+ logger: deps.logger,
80
+ context: invocationArgs.context,
81
+ modelId: invocationArgs.model.id,
82
+ providerModelId: invocationArgs.model.providerModelId,
83
+ });
84
+ const durationMs = (deps.now?.() ?? Date.now()) - startedAt;
85
+ const parsed = args.parseResponse ? args.parseResponse(providerResult.content) : undefined;
86
+ const usage = {
87
+ inputTokens: providerResult.usage.inputTokens ?? estimated.inputTokens,
88
+ outputTokens: providerResult.usage.outputTokens ?? estimated.outputTokens,
89
+ };
90
+ const costEstimate = (0, cost_1.estimateCostFromTokenUsage)(usage, invocationArgs.model);
91
+ logLLMSuccess(deps.logger, invocationArgs, providerResult, parsed, usage, costEstimate, durationMs);
92
+ if (invocationArgs.model.autoConcurrency) {
93
+ try {
94
+ await deps.limiter.reportOutcome?.({
95
+ key: invocationArgs.limiterKey,
96
+ kind: 'success',
97
+ latencyMs: durationMs,
98
+ tokens: usage.inputTokens + usage.outputTokens,
99
+ });
100
+ }
101
+ catch {
102
+ // auto-concurrency feedback is best-effort; never fail the call because of it
103
+ }
104
+ }
105
+ const judgmentOutcome = args.evaluateJudgment
106
+ ? safeEvaluateJudgment(args.evaluateJudgment, parsed, providerResult.content)
107
+ : null;
108
+ if (invocationArgs.runResult && deps.runResultWriter) {
109
+ await deps.runResultWriter.writeRunResult({
110
+ ...invocationArgs.runResult,
111
+ roundIndex: invocationArgs.runResult.roundIndex ?? null,
112
+ rawResponse: providerResult.content,
113
+ parsedOutput: parsed,
114
+ decisionOutput: judgmentOutcome?.decisionOutput ?? null,
115
+ isCorrect: judgmentOutcome?.isCorrect ?? null,
116
+ judgmentStatus: judgmentOutcome?.judgmentStatus ?? null,
117
+ status: 'success',
118
+ errorClass: null,
119
+ errorMessage: null,
120
+ latencyMs: durationMs,
121
+ inputTokens: usage.inputTokens,
122
+ outputTokens: usage.outputTokens,
123
+ costEstimate,
124
+ });
125
+ }
126
+ return {
127
+ runResultId: invocationArgs.runResult?.id,
128
+ content: providerResult.content,
129
+ rawResponse: providerResult.rawResponse,
130
+ parsed,
131
+ decisionOutput: judgmentOutcome?.decisionOutput ?? null,
132
+ isCorrect: judgmentOutcome?.isCorrect ?? null,
133
+ judgmentStatus: judgmentOutcome?.judgmentStatus ?? null,
134
+ finishReason: providerResult.finishReason,
135
+ usage,
136
+ costEstimate,
137
+ durationMs,
138
+ };
139
+ }
140
+ catch (error) {
141
+ // Rate-limit is a "transient" signal, not a business failure: do not write run_result (to avoid polluting metrics); let the caller (worker) defer requeue by retryAfterMs
142
+ if (error instanceof limiter_1.RateLimitExceededError) {
143
+ throw error;
144
+ }
145
+ // Upstream provider throttle (HTTP 429) feeds the auto-concurrency backoff so effective concurrency
146
+ // converges to what the provider actually sustains. Best-effort; the original error is still rethrown.
147
+ if (invocationArgs.model.autoConcurrency && error instanceof openai_adapter_1.LLMAdapterHttpError && error.httpStatus === 429) {
148
+ try {
149
+ await deps.limiter.reportOutcome?.({ key: invocationArgs.limiterKey, kind: 'upstream_throttle' });
150
+ deps.logger.debug?.({ modelId: invocationArgs.model.id }, 'limiter_backoff_applied');
151
+ }
152
+ catch {
153
+ // best-effort
154
+ }
155
+ }
156
+ const durationMs = (deps.now?.() ?? Date.now()) - startedAt;
157
+ const normalized = normalizeError(error);
158
+ logLLMFailure(deps.logger, invocationArgs, normalized, durationMs);
159
+ // Intentionally do not write run_result: a single job may still succeed during BullMQ retries; only after attempts are exhausted does the consumer write the final error row
160
+ throw error;
161
+ }
162
+ finally {
163
+ clearTimeout(timeout);
164
+ if (acquired) {
165
+ await deps.limiter.release({ key: invocationArgs.limiterKey });
166
+ }
167
+ }
168
+ }
169
+ async function testModelConnectivity(args, deps) {
170
+ const probe = buildConnectivityProbe(args.model);
171
+ const params = normalizeInferenceParamsForProvider(args.model, {
172
+ maxTokens: 8,
173
+ imageRefs: probe.imageRefs,
174
+ });
175
+ const messages = probe.messages;
176
+ const estimated = (0, token_estimate_1.estimateLLMTokens)({ messages, maxTokens: params.maxTokens });
177
+ const provider = resolveLLMAdapter(args.model.providerType, deps.adapters);
178
+ const startedAt = deps.now?.() ?? Date.now();
179
+ const controller = new AbortController();
180
+ const timeout = setTimeout(() => controller.abort(), args.timeoutMs ?? 30_000);
181
+ const endpoint = safeEndpoint(args.model.endpoint);
182
+ let acquired = false;
183
+ let probeRequestLogged = false;
184
+ try {
185
+ // Connectivity probe goes through the limiter but never reports outcomes — a single health check
186
+ // must not pollute the model's auto-concurrency EWMA / backoff state.
187
+ await deps.limiter.acquire({
188
+ key: args.limiterKey,
189
+ estimatedTokens: estimated.totalTokens,
190
+ limits: {
191
+ rpmLimit: args.model.rpmLimit,
192
+ tpmLimit: args.model.tpmLimit,
193
+ concurrencyLimit: args.model.concurrencyLimit,
194
+ },
195
+ autoConcurrency: args.model.autoConcurrency,
196
+ });
197
+ acquired = true;
198
+ const providerInvokeArgs = {
199
+ model: args.model,
200
+ messages,
201
+ params,
202
+ signal: controller.signal,
203
+ };
204
+ logProbeRequest(deps.logger, args, provider, providerInvokeArgs, probe.type, estimated.totalTokens, probe.imageRefs);
205
+ probeRequestLogged = true;
206
+ const result = await provider.invoke(providerInvokeArgs);
207
+ const durationMs = (deps.now?.() ?? Date.now()) - startedAt;
208
+ logProbeResponse(deps.logger, args, result, probe.type, durationMs, probe.imageRefs);
209
+ deps.logger.info({
210
+ requestId: args.requestId,
211
+ model: {
212
+ id: args.model.id,
213
+ providerModelId: args.model.providerModelId,
214
+ providerType: args.model.providerType,
215
+ endpoint,
216
+ },
217
+ durationMs,
218
+ probeType: probe.type,
219
+ image_refs: probe.imageRefs,
220
+ }, 'model_connectivity_probe_completed');
221
+ return {
222
+ ok: true,
223
+ modelId: args.model.id,
224
+ providerType: args.model.providerType,
225
+ providerModelId: args.model.providerModelId,
226
+ endpoint,
227
+ durationMs,
228
+ checkedAt: new Date().toISOString(),
229
+ responsePreview: result.content.slice(0, 200),
230
+ };
231
+ }
232
+ catch (error) {
233
+ const durationMs = (deps.now?.() ?? Date.now()) - startedAt;
234
+ const normalized = normalizeError(error);
235
+ if (probeRequestLogged) {
236
+ logProbeFailureResponse(deps.logger, args, normalized, probe.type, durationMs, probe.imageRefs);
237
+ }
238
+ deps.logger.error({
239
+ requestId: args.requestId,
240
+ model: {
241
+ id: args.model.id,
242
+ providerModelId: args.model.providerModelId,
243
+ providerType: args.model.providerType,
244
+ endpoint,
245
+ },
246
+ durationMs,
247
+ probeType: probe.type,
248
+ image_refs: probe.imageRefs,
249
+ ...normalized,
250
+ }, 'model_connectivity_probe_failed');
251
+ return {
252
+ ok: false,
253
+ modelId: args.model.id,
254
+ providerType: args.model.providerType,
255
+ providerModelId: args.model.providerModelId,
256
+ endpoint,
257
+ durationMs,
258
+ checkedAt: new Date().toISOString(),
259
+ ...normalized,
260
+ };
261
+ }
262
+ finally {
263
+ clearTimeout(timeout);
264
+ if (acquired) {
265
+ await deps.limiter.release({ key: args.limiterKey });
266
+ }
267
+ }
268
+ }
269
+ const OPENAI_COMPATIBLE_PROVIDER_TYPES = ['openai', 'deepseek', 'kimi', 'minimax', 'qwen', 'ernie'];
270
+ function defaultLLMAdapters() {
271
+ return [
272
+ ...OPENAI_COMPATIBLE_PROVIDER_TYPES.map((providerType) => ({ ...openai_adapter_1.openAIAdapter, providerType })),
273
+ azure_openai_adapter_1.azureOpenAIAdapter,
274
+ { ...azure_openai_adapter_1.azureOpenAIAdapter, providerType: 'azure' },
275
+ anthropic_adapter_1.anthropicAdapter,
276
+ ];
277
+ }
278
+ function resolveLLMAdapter(providerType, adapters = defaultLLMAdapters()) {
279
+ const normalized = normalizeProviderType(providerType);
280
+ const adapter = adapters.find((candidate) => candidate.providerType === normalized);
281
+ if (!adapter) {
282
+ throw new Error(`unsupported llm provider type: ${providerType}`);
283
+ }
284
+ return adapter;
285
+ }
286
+ function buildConnectivityProbe(model) {
287
+ const imageCapability = model.capabilities?.image ?? 'none';
288
+ if (imageCapability === 'none') {
289
+ return {
290
+ type: 'text',
291
+ messages: [{ role: 'user', content: 'ping' }],
292
+ };
293
+ }
294
+ const shouldUseBase64 = imageCapability === 'base64' || imageCapability === 'both';
295
+ const type = shouldUseBase64 ? 'image_base64' : 'image_url';
296
+ const base64Asset = shouldUseBase64 ? loadImageProbeAsset() : undefined;
297
+ const imageRefs = shouldUseBase64
298
+ ? [{ kind: 'base64', mediaType: IMAGE_PROBE_MEDIA_TYPE, sha256: base64Asset?.sha256 }]
299
+ : [{ kind: 'url', url: IMAGE_PROBE_URL }];
300
+ return {
301
+ type,
302
+ messages: normalizeProviderType(model.providerType) === 'anthropic'
303
+ ? buildAnthropicImageProbeMessages(base64Asset?.base64)
304
+ : buildOpenAICompatibleImageProbeMessages(base64Asset?.base64),
305
+ imageRefs,
306
+ };
307
+ }
308
+ function buildOpenAICompatibleImageProbeMessages(base64) {
309
+ const url = base64 ? `data:${IMAGE_PROBE_MEDIA_TYPE};base64,${base64}` : IMAGE_PROBE_URL;
310
+ return [
311
+ {
312
+ role: 'user',
313
+ content: [
314
+ { type: 'text', text: IMAGE_PROBE_TEXT },
315
+ { type: 'image_url', image_url: { url } },
316
+ ],
317
+ },
318
+ ];
319
+ }
320
+ function buildAnthropicImageProbeMessages(base64) {
321
+ return [
322
+ {
323
+ role: 'user',
324
+ content: [
325
+ base64
326
+ ? {
327
+ type: 'image',
328
+ source: {
329
+ type: 'base64',
330
+ media_type: IMAGE_PROBE_MEDIA_TYPE,
331
+ data: base64,
332
+ },
333
+ }
334
+ : {
335
+ type: 'image',
336
+ source: {
337
+ type: 'url',
338
+ url: IMAGE_PROBE_URL,
339
+ },
340
+ },
341
+ { type: 'text', text: IMAGE_PROBE_TEXT },
342
+ ],
343
+ },
344
+ ];
345
+ }
346
+ function loadImageProbeAsset() {
347
+ if (cachedImageProbeAsset)
348
+ return cachedImageProbeAsset;
349
+ const assetPath = findImageProbeAssetPath();
350
+ const bytes = (0, node_fs_1.readFileSync)(assetPath);
351
+ cachedImageProbeAsset = {
352
+ base64: bytes.toString('base64'),
353
+ sha256: (0, node_crypto_1.createHash)('sha256').update(bytes).digest('hex'),
354
+ };
355
+ return cachedImageProbeAsset;
356
+ }
357
+ function findImageProbeAssetPath() {
358
+ const candidates = [
359
+ (0, node_path_1.resolve)(process.cwd(), 'packages/llm-client/src/assets', IMAGE_PROBE_FILE_NAME),
360
+ (0, node_path_1.resolve)(process.cwd(), 'src/assets', IMAGE_PROBE_FILE_NAME),
361
+ (0, node_path_1.resolve)(process.cwd(), 'dist/packages/llm-client/src/assets', IMAGE_PROBE_FILE_NAME),
362
+ (0, node_path_1.resolve)(process.cwd(), '../../packages/llm-client/src/assets', IMAGE_PROBE_FILE_NAME),
363
+ (0, node_path_1.resolve)(process.cwd(), '../packages/llm-client/src/assets', IMAGE_PROBE_FILE_NAME),
364
+ ];
365
+ const found = candidates.find((candidate) => (0, node_fs_1.existsSync)(candidate));
366
+ if (!found) {
367
+ throw new Error(`model connectivity image probe asset not found: ${IMAGE_PROBE_FILE_NAME}`);
368
+ }
369
+ return found;
370
+ }
371
+ function normalizeProviderType(providerType) {
372
+ return providerType.trim().toLowerCase().replace(/_/gu, '-');
373
+ }
374
+ function normalizeInferenceParamsForProvider(model, params) {
375
+ if (normalizeProviderType(model.providerType) === 'anthropic') {
376
+ return (0, anthropic_adapter_1.normalizeAnthropicInferenceParams)(model.providerModelId, params);
377
+ }
378
+ return params;
379
+ }
380
+ function safeEvaluateJudgment(evaluator, parsed, rawResponse) {
381
+ try {
382
+ return evaluator({ parsed, rawResponse });
383
+ }
384
+ catch {
385
+ return { decisionOutput: null, isCorrect: null, judgmentStatus: 'judge_error' };
386
+ }
387
+ }
388
+ function logLLMRequest(logger, args, provider, providerArgs, estimatedTokens, maxRetries) {
389
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
390
+ ...buildBaseLogPayload(args),
391
+ request: buildProviderRequestLog(provider, providerArgs),
392
+ estimatedTokens,
393
+ maxRetries,
394
+ });
395
+ logger.info(toLogObject(capped), 'llm_call_request_sent');
396
+ }
397
+ function logLLMSuccess(logger, args, result, parsed, usage, costEstimate, durationMs) {
398
+ const payload = buildBaseLogPayload(args, durationMs);
399
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
400
+ ...payload,
401
+ response: {
402
+ content: result.content,
403
+ raw: result.rawResponse,
404
+ finish_reason: result.finishReason,
405
+ usage: {
406
+ input_tokens: usage.inputTokens,
407
+ output_tokens: usage.outputTokens,
408
+ },
409
+ },
410
+ parsed,
411
+ costEstimate,
412
+ });
413
+ logger.info(toLogObject(capped), 'llm_call_completed');
414
+ }
415
+ function logLLMFailure(logger, args, error, durationMs) {
416
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
417
+ ...buildBaseLogPayload(args, durationMs),
418
+ response: {
419
+ outcome: 'failure',
420
+ ...error,
421
+ provider_error: parseProviderErrorBody(error.providerErrorBody),
422
+ },
423
+ ...error,
424
+ });
425
+ logger.error(toLogObject(capped), 'llm_call_failed');
426
+ }
427
+ function logProbeRequest(logger, args, provider, providerArgs, probeType, estimatedTokens, imageRefs) {
428
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
429
+ requestId: args.requestId,
430
+ model: {
431
+ id: args.model.id,
432
+ providerModelId: args.model.providerModelId,
433
+ providerType: args.model.providerType,
434
+ endpoint: safeEndpoint(args.model.endpoint),
435
+ temperature: providerArgs.params.temperature,
436
+ max_tokens: providerArgs.params.maxTokens,
437
+ },
438
+ messages: providerArgs.messages,
439
+ request: buildProviderRequestLog(provider, providerArgs),
440
+ image_refs: imageRefs,
441
+ probeType,
442
+ estimatedTokens,
443
+ });
444
+ logger.info(toLogObject(capped), 'model_connectivity_probe_request_sent');
445
+ }
446
+ function logProbeResponse(logger, args, result, probeType, durationMs, imageRefs) {
447
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
448
+ requestId: args.requestId,
449
+ model: {
450
+ id: args.model.id,
451
+ providerModelId: args.model.providerModelId,
452
+ providerType: args.model.providerType,
453
+ endpoint: safeEndpoint(args.model.endpoint),
454
+ },
455
+ durationMs,
456
+ probeType,
457
+ image_refs: imageRefs,
458
+ outcome: 'success',
459
+ response: {
460
+ content: result.content,
461
+ raw: result.rawResponse,
462
+ finish_reason: result.finishReason,
463
+ usage: {
464
+ input_tokens: result.usage.inputTokens,
465
+ output_tokens: result.usage.outputTokens,
466
+ },
467
+ },
468
+ });
469
+ logger.info(toLogObject(capped), 'model_connectivity_probe_response_received');
470
+ }
471
+ function logProbeFailureResponse(logger, args, error, probeType, durationMs, imageRefs) {
472
+ const capped = (0, payload_cap_1.capLLMLogPayload)({
473
+ requestId: args.requestId,
474
+ model: {
475
+ id: args.model.id,
476
+ providerModelId: args.model.providerModelId,
477
+ providerType: args.model.providerType,
478
+ endpoint: safeEndpoint(args.model.endpoint),
479
+ },
480
+ durationMs,
481
+ probeType,
482
+ image_refs: imageRefs,
483
+ outcome: 'failure',
484
+ response: {
485
+ ...error,
486
+ provider_error: parseProviderErrorBody(error.providerErrorBody),
487
+ },
488
+ });
489
+ logger.info(toLogObject(capped), 'model_connectivity_probe_response_received');
490
+ }
491
+ function buildProviderRequestLog(provider, providerArgs) {
492
+ try {
493
+ return provider.buildRequestLog?.(providerArgs) ?? buildFallbackRequestLog(providerArgs);
494
+ }
495
+ catch {
496
+ return buildFallbackRequestLog(providerArgs);
497
+ }
498
+ }
499
+ function buildFallbackRequestLog(providerArgs) {
500
+ const params = providerArgs.params;
501
+ return {
502
+ method: 'POST',
503
+ url: safeEndpoint(providerArgs.model.endpoint),
504
+ body: {
505
+ ...(providerArgs.model.extraBody ?? {}),
506
+ model: providerArgs.model.providerModelId,
507
+ messages: providerArgs.messages ?? [{ role: 'user', content: providerArgs.prompt ?? '' }],
508
+ temperature: params.temperature,
509
+ max_tokens: params.maxTokens,
510
+ top_p: params.topP,
511
+ tools: params.tools,
512
+ response_format: params.responseFormat,
513
+ },
514
+ headers: { 'Content-Type': 'application/json' },
515
+ };
516
+ }
517
+ function buildBaseLogPayload(args, durationMs) {
518
+ const params = args.params ?? {};
519
+ return {
520
+ model: {
521
+ id: args.model.id,
522
+ providerModelId: args.model.providerModelId,
523
+ endpoint: safeEndpoint(args.model.endpoint),
524
+ temperature: params.temperature,
525
+ max_tokens: params.maxTokens,
526
+ top_p: params.topP,
527
+ },
528
+ messages: args.messages,
529
+ prompt: args.prompt,
530
+ tools: params.tools,
531
+ response_format: params.responseFormat,
532
+ image_refs: params.imageRefs,
533
+ requestId: args.context?.requestId,
534
+ dbosWorkflowId: args.context?.dbosWorkflowId,
535
+ bullmqJobId: args.context?.bullmqJobId,
536
+ bullmqQueue: args.context?.bullmqQueue,
537
+ stepName: args.context?.stepName,
538
+ runResultId: args.runResult?.id ?? args.context?.runResultId,
539
+ promptId: args.context?.promptId,
540
+ promptVersionId: args.runResult?.promptVersionId ?? args.context?.promptVersionId,
541
+ source: args.runResult?.source ?? args.context?.source,
542
+ attempt: args.runResult?.attempt ?? args.context?.attempt,
543
+ ...(durationMs === undefined ? {} : { durationMs }),
544
+ };
545
+ }
546
+ function toLogObject(capped) {
547
+ if (!capped.overflow && typeof capped.payload === 'object' && capped.payload !== null) {
548
+ return capped.payload;
549
+ }
550
+ return {
551
+ payload_overflow: true,
552
+ payload: capped.payload,
553
+ };
554
+ }
555
+ function normalizeLLMError(error) {
556
+ if (error instanceof openai_adapter_1.LLMAdapterHttpError) {
557
+ return {
558
+ errorClass: error.name,
559
+ errorMessage: extractProviderErrorMessage(error.providerErrorBody) ?? error.message,
560
+ httpStatus: error.httpStatus,
561
+ providerErrorBody: error.providerErrorBody,
562
+ };
563
+ }
564
+ if (error instanceof Error) {
565
+ return {
566
+ errorClass: error.name,
567
+ errorMessage: error.message,
568
+ };
569
+ }
570
+ return {
571
+ errorClass: 'UnknownError',
572
+ errorMessage: String(error),
573
+ };
574
+ }
575
+ const normalizeError = normalizeLLMError;
576
+ function extractProviderErrorMessage(providerErrorBody) {
577
+ try {
578
+ const parsed = JSON.parse(providerErrorBody);
579
+ return findErrorMessage(parsed);
580
+ }
581
+ catch {
582
+ return undefined;
583
+ }
584
+ }
585
+ function parseProviderErrorBody(providerErrorBody) {
586
+ if (!providerErrorBody)
587
+ return undefined;
588
+ try {
589
+ return JSON.parse(providerErrorBody);
590
+ }
591
+ catch {
592
+ return undefined;
593
+ }
594
+ }
595
+ function findErrorMessage(value) {
596
+ if (typeof value === 'string')
597
+ return value.trim() || undefined;
598
+ if (Array.isArray(value)) {
599
+ for (const item of value) {
600
+ const message = findErrorMessage(item);
601
+ if (message)
602
+ return message;
603
+ }
604
+ return undefined;
605
+ }
606
+ if (!value || typeof value !== 'object')
607
+ return undefined;
608
+ const record = value;
609
+ for (const key of ['message', 'errorMessage']) {
610
+ const candidate = record[key];
611
+ if (typeof candidate === 'string' && candidate.trim())
612
+ return candidate;
613
+ }
614
+ return findErrorMessage(record['error']);
615
+ }
616
+ function safeEndpoint(endpoint) {
617
+ try {
618
+ const url = new URL(endpoint);
619
+ return `${url.protocol}//${url.host}${url.pathname}`;
620
+ }
621
+ catch {
622
+ return endpoint;
623
+ }
624
+ }
625
+ function assertInvocationShape(args) {
626
+ if (!args.messages && !args.prompt) {
627
+ throw new Error('invokeLLM requires messages or prompt');
628
+ }
629
+ }
630
+ const RETRYABLE_HTTP_STATUSES = new Set([408, 429, 500, 502, 503, 504]);
631
+ const RETRY_BASE_BACKOFF_MS = 500;
632
+ // LLM call internal retry layer: retries only on retryable HTTP statuses + network errors, with exponential backoff + jitter.
633
+ // Reuses the same limiter.acquire quota (only one acquire/release inside the outer try).
634
+ // RateLimitExceededError / 4xx business errors / AbortError are all passed through, not swallowed.
635
+ async function invokeProviderWithRetry(provider, args, control) {
636
+ let attempt = 0;
637
+ while (true) {
638
+ try {
639
+ return await provider.invoke(args);
640
+ }
641
+ catch (error) {
642
+ if (control.signal.aborted)
643
+ throw error;
644
+ if (attempt >= control.maxRetries || !isRetryableProviderError(error)) {
645
+ throw error;
646
+ }
647
+ const backoffMs = computeRetryBackoff(attempt);
648
+ const normalized = normalizeError(error);
649
+ control.logger.info({
650
+ requestId: control.context?.requestId,
651
+ dbosWorkflowId: control.context?.dbosWorkflowId,
652
+ bullmqJobId: control.context?.bullmqJobId,
653
+ model: { id: control.modelId, providerModelId: control.providerModelId },
654
+ attempt: attempt + 1,
655
+ maxRetries: control.maxRetries,
656
+ nextBackoffMs: backoffMs,
657
+ errorClass: normalized.errorClass,
658
+ errorMessage: normalized.errorMessage,
659
+ httpStatus: normalized.httpStatus,
660
+ }, 'llm_call_retrying');
661
+ attempt += 1;
662
+ await sleepMs(backoffMs, control.signal);
663
+ }
664
+ }
665
+ }
666
+ function isRetryableProviderError(error) {
667
+ if (error instanceof limiter_1.RateLimitExceededError)
668
+ return false;
669
+ if (error instanceof openai_adapter_1.LLMAdapterHttpError) {
670
+ return RETRYABLE_HTTP_STATUSES.has(error.httpStatus);
671
+ }
672
+ if (error instanceof Error) {
673
+ if (error.name === 'AbortError')
674
+ return false;
675
+ const code = error.code;
676
+ if (code === 'ECONNRESET' || code === 'ETIMEDOUT' || code === 'ENOTFOUND' || code === 'EAI_AGAIN') {
677
+ return true;
678
+ }
679
+ if (/fetch failed/iu.test(error.message))
680
+ return true;
681
+ }
682
+ return false;
683
+ }
684
+ function computeRetryBackoff(attempt) {
685
+ const exp = RETRY_BASE_BACKOFF_MS * Math.pow(2, attempt);
686
+ const jitter = Math.random() * RETRY_BASE_BACKOFF_MS;
687
+ return exp + jitter;
688
+ }
689
+ function sleepMs(ms, signal) {
690
+ return new Promise((resolve, reject) => {
691
+ if (signal.aborted) {
692
+ reject(signal.reason instanceof Error ? signal.reason : new Error('aborted'));
693
+ return;
694
+ }
695
+ const timer = setTimeout(() => {
696
+ signal.removeEventListener('abort', onAbort);
697
+ resolve();
698
+ }, ms);
699
+ const onAbort = () => {
700
+ clearTimeout(timer);
701
+ reject(signal.reason instanceof Error ? signal.reason : new Error('aborted'));
702
+ };
703
+ signal.addEventListener('abort', onAbort, { once: true });
704
+ });
705
+ }
706
+ //# sourceMappingURL=invoke.js.map