@diabolicallabs/llm-client 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,6 +7,13 @@ var LlmError = class extends Error {
7
7
  provider;
8
8
  statusCode;
9
9
  retryable;
10
+ /**
11
+ * Optional error kind discriminator. Present on errors produced by the abort/timeout/stall
12
+ * machinery (v0.3.0+). May be undefined on errors from providers that pre-date the kind field
13
+ * or on errors that fall through to the generic normalization path.
14
+ * Typed as LlmErrorKind | undefined to satisfy exactOptionalPropertyTypes.
15
+ */
16
+ kind;
10
17
  // `cause` is declared on Error in lib.es2022.error.d.ts as `cause?: unknown`
11
18
  // We override it here to make it always present (not optional) after construction.
12
19
  cause;
@@ -15,10 +22,249 @@ var LlmError = class extends Error {
15
22
  this.provider = opts.provider;
16
23
  this.statusCode = opts.statusCode;
17
24
  this.retryable = opts.retryable;
25
+ this.kind = opts.kind;
18
26
  this.cause = opts.cause;
19
27
  }
20
28
  };
21
29
 
30
+ // src/abort.ts
31
+ function createAttemptController(callerSignal, timeoutMs) {
32
+ const internal = new AbortController();
33
+ let reason;
34
+ const timer = setTimeout(() => {
35
+ reason ??= "timeout";
36
+ internal.abort(new Error("llm-client: timeout"));
37
+ }, timeoutMs);
38
+ timer.unref?.();
39
+ const onCallerAbort = () => {
40
+ reason ??= "caller";
41
+ if (callerSignal !== void 0) internal.abort(callerSignal.reason);
42
+ };
43
+ if (callerSignal !== void 0) {
44
+ if (callerSignal.aborted) {
45
+ onCallerAbort();
46
+ } else {
47
+ callerSignal.addEventListener("abort", onCallerAbort, { once: true });
48
+ }
49
+ }
50
+ return {
51
+ signal: internal.signal,
52
+ abortReason: () => reason,
53
+ abort: (r) => {
54
+ reason ??= r;
55
+ internal.abort();
56
+ },
57
+ dispose: () => {
58
+ clearTimeout(timer);
59
+ callerSignal?.removeEventListener("abort", onCallerAbort);
60
+ }
61
+ };
62
+ }
63
+ function cancellableSleep(ms, signal) {
64
+ return new Promise((resolve) => {
65
+ if (signal?.aborted) {
66
+ resolve();
67
+ return;
68
+ }
69
+ const timer = setTimeout(() => {
70
+ signal?.removeEventListener("abort", onAbort);
71
+ resolve();
72
+ }, ms);
73
+ timer.unref?.();
74
+ const onAbort = () => {
75
+ clearTimeout(timer);
76
+ resolve();
77
+ };
78
+ signal?.addEventListener("abort", onAbort, { once: true });
79
+ });
80
+ }
81
+ async function* withStallTimeout(source, stallMs, ctl, provider) {
82
+ const it = source[Symbol.asyncIterator]();
83
+ while (true) {
84
+ let stallTimer;
85
+ const stallPromise = new Promise((_, reject) => {
86
+ stallTimer = setTimeout(() => {
87
+ ctl.abort("stall");
88
+ reject(
89
+ new LlmError({
90
+ provider,
91
+ kind: "stream_stall",
92
+ retryable: true,
93
+ message: `llm-client: no chunk for ${stallMs}ms`
94
+ })
95
+ );
96
+ }, stallMs);
97
+ });
98
+ try {
99
+ const next = await Promise.race([it.next(), stallPromise]);
100
+ clearTimeout(stallTimer);
101
+ if (next.done) return;
102
+ yield next.value;
103
+ } catch (err) {
104
+ clearTimeout(stallTimer);
105
+ throw err;
106
+ }
107
+ }
108
+ }
109
+ function classifyAbort(err, abortReason, provider) {
110
+ const controllerFired = abortReason !== void 0;
111
+ if (!controllerFired && !isAbortError(err)) return err;
112
+ switch (abortReason) {
113
+ case "timeout":
114
+ return new LlmError({
115
+ message: "llm-client: request timed out",
116
+ provider,
117
+ kind: "timeout",
118
+ retryable: true,
119
+ cause: err
120
+ });
121
+ case "stall":
122
+ return new LlmError({
123
+ message: "llm-client: stream stalled",
124
+ provider,
125
+ kind: "stream_stall",
126
+ retryable: true,
127
+ cause: err
128
+ });
129
+ default:
130
+ return new LlmError({
131
+ message: "llm-client: cancelled by caller",
132
+ provider,
133
+ kind: "cancelled",
134
+ retryable: false,
135
+ cause: err
136
+ });
137
+ }
138
+ }
139
+ function isAbortError(err) {
140
+ if (err instanceof Error && err.name === "AbortError") return true;
141
+ if (typeof DOMException !== "undefined" && err instanceof DOMException && err.name === "AbortError")
142
+ return true;
143
+ return false;
144
+ }
145
+
146
+ // src/json-schema.ts
147
+ import { z } from "zod";
148
+ function isZodSchema(s) {
149
+ if (typeof s !== "object" || s === null) return false;
150
+ const hasZod4Marker = "_zod" in s && typeof s._zod === "object";
151
+ const hasZod3Marker = "_def" in s;
152
+ if (hasZod3Marker && !hasZod4Marker) {
153
+ throw new LlmError({
154
+ message: 'llm-client: detected a Zod 3 schema. Upgrade to Zod 4 to use strict structured-output mode, or pass providerOptions.structuredMode = "prompt" to keep the v0.3.0 prompt-only path.',
155
+ provider: "llm-client",
156
+ retryable: false,
157
+ kind: "unknown"
158
+ });
159
+ }
160
+ if (!hasZod4Marker) return false;
161
+ return typeof s.parse === "function";
162
+ }
163
+ function toProviderSchema(schema, profile) {
164
+ const target = profile === "gemini" ? "openapi-3.0" : "draft-2020-12";
165
+ let json;
166
+ try {
167
+ json = z.toJSONSchema(schema, {
168
+ target,
169
+ unrepresentable: "throw",
170
+ cycles: "throw"
171
+ });
172
+ } catch (e) {
173
+ throw new LlmError({
174
+ message: `llm-client: schema is not representable for ${profile} strict mode \u2014 ${e.message}. Pass providerOptions.structuredMode = 'prompt' to fall back to prompt-only mode.`,
175
+ provider: profile,
176
+ retryable: false,
177
+ kind: "unknown",
178
+ cause: e
179
+ });
180
+ }
181
+ if (profile === "openai") return openAIStrictPostprocess(json);
182
+ if (profile === "gemini") return geminiPostprocess(json);
183
+ return anthropicPostprocess(json);
184
+ }
185
+ function openAIStrictPostprocess(node) {
186
+ if (typeof node !== "object" || node === null) {
187
+ return node;
188
+ }
189
+ if (Array.isArray(node)) {
190
+ return node.map(openAIStrictPostprocess);
191
+ }
192
+ const src = node;
193
+ const obj = { ...src };
194
+ delete obj.$schema;
195
+ delete obj.format;
196
+ delete obj.pattern;
197
+ delete obj.default;
198
+ delete obj.examples;
199
+ if (obj.type === "object" && obj.properties !== void 0) {
200
+ const props = obj.properties;
201
+ const allKeys = Object.keys(props);
202
+ obj.required = allKeys;
203
+ obj.additionalProperties = false;
204
+ const processedProps = {};
205
+ for (const key of allKeys) {
206
+ processedProps[key] = openAIStrictPostprocess(props[key]);
207
+ }
208
+ obj.properties = processedProps;
209
+ }
210
+ if (obj.items !== void 0) {
211
+ obj.items = openAIStrictPostprocess(obj.items);
212
+ }
213
+ if (Array.isArray(obj.anyOf)) {
214
+ obj.anyOf = obj.anyOf.map(openAIStrictPostprocess);
215
+ }
216
+ if (Array.isArray(obj.oneOf)) {
217
+ obj.oneOf = obj.oneOf.map(openAIStrictPostprocess);
218
+ }
219
+ if (Array.isArray(obj.allOf)) {
220
+ obj.allOf = obj.allOf.map(openAIStrictPostprocess);
221
+ }
222
+ if (Array.isArray(obj.prefixItems)) {
223
+ obj.prefixItems = obj.prefixItems.map(openAIStrictPostprocess);
224
+ }
225
+ return obj;
226
+ }
227
+ function anthropicPostprocess(node) {
228
+ const obj = { ...node };
229
+ delete obj.$schema;
230
+ return obj;
231
+ }
232
+ function geminiPostprocess(node) {
233
+ if (typeof node !== "object" || node === null) {
234
+ return node;
235
+ }
236
+ if (Array.isArray(node)) {
237
+ return node.map(geminiPostprocess);
238
+ }
239
+ const src = node;
240
+ const obj = { ...src };
241
+ delete obj.$schema;
242
+ delete obj.additionalProperties;
243
+ delete obj.default;
244
+ delete obj.examples;
245
+ if (obj.properties !== void 0) {
246
+ const props = obj.properties;
247
+ const processedProps = {};
248
+ for (const key of Object.keys(props)) {
249
+ processedProps[key] = geminiPostprocess(props[key]);
250
+ }
251
+ obj.properties = processedProps;
252
+ }
253
+ if (obj.items !== void 0) {
254
+ obj.items = geminiPostprocess(obj.items);
255
+ }
256
+ if (Array.isArray(obj.anyOf)) {
257
+ obj.anyOf = obj.anyOf.map(geminiPostprocess);
258
+ }
259
+ if (Array.isArray(obj.oneOf)) {
260
+ obj.oneOf = obj.oneOf.map(geminiPostprocess);
261
+ }
262
+ if (Array.isArray(obj.allOf)) {
263
+ obj.allOf = obj.allOf.map(geminiPostprocess);
264
+ }
265
+ return obj;
266
+ }
267
+
22
268
  // src/retry.ts
23
269
  var RETRYABLE_HTTP_STATUSES = /* @__PURE__ */ new Set([429, 502, 503, 504]);
24
270
  var RETRYABLE_ERROR_CODES = /* @__PURE__ */ new Set(["ECONNRESET", "ETIMEDOUT", "ECONNABORTED"]);
@@ -35,19 +281,32 @@ function computeBackoffMs(attempt, baseDelayMs) {
35
281
  const ceiling = baseDelayMs * 2 ** attempt;
36
282
  return Math.random() * ceiling;
37
283
  }
284
+ function mergeRetryOptsWithSignal(base, signal) {
285
+ return signal !== void 0 ? { ...base, signal } : { ...base };
286
+ }
38
287
  async function withRetry(fn, opts) {
39
288
  let lastError;
40
289
  for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
290
+ if (opts.signal?.aborted === true) {
291
+ throw new LlmError({
292
+ message: "llm-client: cancelled by caller",
293
+ provider: opts.provider,
294
+ kind: "cancelled",
295
+ retryable: false,
296
+ cause: opts.signal.reason
297
+ });
298
+ }
41
299
  try {
42
300
  return await fn(attempt);
43
301
  } catch (err) {
44
302
  const llmErr = normalizeThrownError(err, opts.provider);
303
+ if (llmErr.kind === "cancelled") throw llmErr;
45
304
  if (!llmErr.retryable || attempt === opts.maxRetries) {
46
305
  throw llmErr;
47
306
  }
48
307
  lastError = llmErr;
49
308
  const delayMs = computeBackoffMs(attempt, opts.baseDelayMs);
50
- await sleep(delayMs);
309
+ await cancellableSleep(delayMs, opts.signal);
51
310
  }
52
311
  }
53
312
  throw lastError ?? new LlmError({
@@ -59,6 +318,15 @@ async function withRetry(fn, opts) {
59
318
  function normalizeThrownError(err, provider) {
60
319
  if (err instanceof LlmError) return err;
61
320
  if (err instanceof Error) {
321
+ if (err.name === "AbortError" || typeof DOMException !== "undefined" && err instanceof DOMException && err.name === "AbortError") {
322
+ return new LlmError({
323
+ message: err.message || "llm-client: cancelled by caller",
324
+ provider,
325
+ kind: "cancelled",
326
+ retryable: false,
327
+ cause: err
328
+ });
329
+ }
62
330
  const errWithCode = err;
63
331
  const statusCode = errWithCode.status ?? errWithCode.statusCode;
64
332
  if (errWithCode.code !== void 0 && isRetryableErrorCode(errWithCode.code)) {
@@ -67,24 +335,34 @@ function normalizeThrownError(err, provider) {
67
335
  message: err.message,
68
336
  provider,
69
337
  statusCode,
338
+ kind: "network",
70
339
  retryable: true,
71
340
  cause: err
72
341
  });
73
342
  }
74
- return new LlmError({ message: err.message, provider, retryable: true, cause: err });
343
+ return new LlmError({
344
+ message: err.message,
345
+ provider,
346
+ kind: "network",
347
+ retryable: true,
348
+ cause: err
349
+ });
75
350
  }
76
351
  if (statusCode !== void 0) {
352
+ const retryable = isRetryableStatus(statusCode);
77
353
  return new LlmError({
78
354
  message: err.message,
79
355
  provider,
80
356
  statusCode,
81
- retryable: isRetryableStatus(statusCode),
357
+ kind: retryable ? "http" : "http",
358
+ retryable,
82
359
  cause: err
83
360
  });
84
361
  }
85
362
  return new LlmError({
86
363
  message: err.message,
87
364
  provider,
365
+ kind: "unknown",
88
366
  retryable: false,
89
367
  cause: err
90
368
  });
@@ -92,13 +370,11 @@ function normalizeThrownError(err, provider) {
92
370
  return new LlmError({
93
371
  message: String(err),
94
372
  provider,
373
+ kind: "unknown",
95
374
  retryable: false,
96
375
  cause: err
97
376
  });
98
377
  }
99
- function sleep(ms) {
100
- return new Promise((resolve) => setTimeout(resolve, ms));
101
- }
102
378
 
103
379
  // src/providers/anthropic.ts
104
380
  var PROVIDER = "anthropic";
@@ -166,35 +442,44 @@ function createAnthropicProvider(config) {
166
442
  async function complete(messages, options) {
167
443
  const model = options?.model ?? config.model;
168
444
  const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
445
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
169
446
  const start = Date.now();
170
- return withRetry(async () => {
171
- try {
172
- const params = {
173
- model,
174
- messages: anthropicMessages,
175
- max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
176
- };
177
- if (system !== void 0) params.system = system;
178
- const temperature = options?.temperature ?? config.temperature;
179
- if (temperature !== void 0) {
180
- params.temperature = temperature;
447
+ return withRetry(
448
+ async () => {
449
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
450
+ try {
451
+ const params = {
452
+ model,
453
+ messages: anthropicMessages,
454
+ max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024
455
+ };
456
+ if (system !== void 0) params.system = system;
457
+ const temperature = options?.temperature ?? config.temperature;
458
+ if (temperature !== void 0) {
459
+ params.temperature = temperature;
460
+ }
461
+ const response = await client.messages.create(params, { signal: ctl.signal });
462
+ const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
463
+ return {
464
+ content,
465
+ model: response.model,
466
+ usage: normalizeUsage(response.usage),
467
+ latencyMs: Date.now() - start
468
+ };
469
+ } catch (err) {
470
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
471
+ } finally {
472
+ ctl.dispose();
181
473
  }
182
- const response = await client.messages.create(params);
183
- const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
184
- return {
185
- content,
186
- model: response.model,
187
- usage: normalizeUsage(response.usage),
188
- latencyMs: Date.now() - start
189
- };
190
- } catch (err) {
191
- throw normalizeAnthropicError(err);
192
- }
193
- }, retryOpts);
474
+ },
475
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
476
+ );
194
477
  }
195
478
  async function* stream(messages, options) {
196
479
  const model = options?.model ?? config.model;
197
480
  const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
481
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
482
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
198
483
  const params = {
199
484
  model,
200
485
  messages: anthropicMessages,
@@ -205,15 +490,23 @@ function createAnthropicProvider(config) {
205
490
  if (streamTemperature !== void 0) {
206
491
  params.temperature = streamTemperature;
207
492
  }
493
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
208
494
  let sdkStream;
209
495
  try {
210
- sdkStream = client.messages.stream(params);
496
+ sdkStream = client.messages.stream(params, { signal: ctl.signal });
211
497
  } catch (err) {
212
- throw normalizeAnthropicError(err);
498
+ ctl.dispose();
499
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
213
500
  }
214
501
  let finalUsage;
215
502
  try {
216
- for await (const event of sdkStream) {
503
+ const stallWrapped = withStallTimeout(
504
+ sdkStream,
505
+ stallMs,
506
+ ctl,
507
+ PROVIDER
508
+ );
509
+ for await (const event of stallWrapped) {
217
510
  if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
218
511
  yield { token: event.delta.text };
219
512
  } else if (event.type === "message_delta" && "usage" in event) {
@@ -222,13 +515,85 @@ function createAnthropicProvider(config) {
222
515
  }
223
516
  }
224
517
  } catch (err) {
225
- throw normalizeAnthropicError(err);
518
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
519
+ } finally {
520
+ ctl.dispose();
226
521
  }
227
522
  if (finalUsage !== void 0) {
228
523
  yield { token: "", usage: finalUsage };
229
524
  }
230
525
  }
231
526
  async function structured(messages, schema, options) {
527
+ const structuredMode = options?.providerOptions?.["structuredMode"];
528
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
529
+ if (!useStrict) {
530
+ return structuredPromptFallback(messages, schema, options);
531
+ }
532
+ const inputSchema = toProviderSchema(schema, "anthropic");
533
+ const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
534
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
535
+ const start = Date.now();
536
+ const response = await withRetry(
537
+ async () => {
538
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
539
+ try {
540
+ const params = {
541
+ model: options?.model ?? config.model,
542
+ messages: anthropicMessages,
543
+ max_tokens: options?.maxTokens ?? config.maxTokens ?? 1024,
544
+ tools: [
545
+ {
546
+ name: "extract",
547
+ description: "Return the structured data.",
548
+ input_schema: inputSchema
549
+ }
550
+ ],
551
+ tool_choice: { type: "tool", name: "extract" }
552
+ };
553
+ if (system !== void 0) params.system = system;
554
+ const temperature = options?.temperature ?? config.temperature;
555
+ if (temperature !== void 0) params.temperature = temperature;
556
+ return await client.messages.create(params, { signal: ctl.signal });
557
+ } catch (err) {
558
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
559
+ } finally {
560
+ ctl.dispose();
561
+ }
562
+ },
563
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
564
+ );
565
+ const toolBlock = response.content.find(
566
+ (b) => b.type === "tool_use" && b.name === "extract"
567
+ );
568
+ if (toolBlock === void 0) {
569
+ const textContent = response.content.filter((b) => b.type === "text").map((b) => b.text).join("");
570
+ throw new LlmError({
571
+ message: `Anthropic structured: model did not call the extract tool (stop_reason=${response.stop_reason}). Text: ${textContent.slice(0, 200)}`,
572
+ provider: PROVIDER,
573
+ retryable: false,
574
+ kind: "unknown"
575
+ });
576
+ }
577
+ let data;
578
+ try {
579
+ data = schema.parse(toolBlock.input);
580
+ } catch (err) {
581
+ throw new LlmError({
582
+ message: `Anthropic structured output: tool response failed schema validation. ${String(err)}`,
583
+ provider: PROVIDER,
584
+ retryable: false,
585
+ cause: err
586
+ });
587
+ }
588
+ return {
589
+ data,
590
+ model: response.model,
591
+ id: response.id,
592
+ usage: normalizeUsage(response.usage),
593
+ latencyMs: Date.now() - start
594
+ };
595
+ }
596
+ async function structuredPromptFallback(messages, schema, options) {
232
597
  const jsonSystemInstruction = {
233
598
  role: "system",
234
599
  content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -261,6 +626,7 @@ function createAnthropicProvider(config) {
261
626
  }
262
627
  return {
263
628
  data,
629
+ model: response.model,
264
630
  usage: response.usage,
265
631
  latencyMs: Date.now() - start
266
632
  };
@@ -334,34 +700,43 @@ function createDeepSeekProvider(config) {
334
700
  async function complete(messages, options) {
335
701
  const model = options?.model ?? config.model;
336
702
  const chatMessages = buildMessages(messages);
703
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
337
704
  const start = Date.now();
338
- return withRetry(async () => {
339
- try {
340
- const params = {
341
- model,
342
- messages: chatMessages,
343
- stream: false
344
- };
345
- const maxTokens = options?.maxTokens ?? config.maxTokens;
346
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
347
- const temperature = options?.temperature ?? config.temperature;
348
- if (temperature !== void 0) params.temperature = temperature;
349
- const response = await client.chat.completions.create(params);
350
- const content = response.choices.map((c) => c.message.content ?? "").join("");
351
- return {
352
- content,
353
- model: response.model,
354
- usage: normalizeUsage2(response.usage),
355
- latencyMs: Date.now() - start
356
- };
357
- } catch (err) {
358
- throw normalizeDeepSeekError(err);
359
- }
360
- }, retryOpts);
705
+ return withRetry(
706
+ async () => {
707
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
708
+ try {
709
+ const params = {
710
+ model,
711
+ messages: chatMessages,
712
+ stream: false
713
+ };
714
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
715
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
716
+ const temperature = options?.temperature ?? config.temperature;
717
+ if (temperature !== void 0) params.temperature = temperature;
718
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
719
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
720
+ return {
721
+ content,
722
+ model: response.model,
723
+ usage: normalizeUsage2(response.usage),
724
+ latencyMs: Date.now() - start
725
+ };
726
+ } catch (err) {
727
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
728
+ } finally {
729
+ ctl.dispose();
730
+ }
731
+ },
732
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
733
+ );
361
734
  }
362
735
  async function* stream(messages, options) {
363
736
  const model = options?.model ?? config.model;
364
737
  const chatMessages = buildMessages(messages);
738
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
739
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
365
740
  const params = {
366
741
  model,
367
742
  messages: chatMessages,
@@ -372,15 +747,17 @@ function createDeepSeekProvider(config) {
372
747
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
373
748
  const temperature = options?.temperature ?? config.temperature;
374
749
  if (temperature !== void 0) params.temperature = temperature;
750
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
375
751
  let sdkStream;
376
752
  try {
377
- sdkStream = await client.chat.completions.create(params);
753
+ sdkStream = await client.chat.completions.create(params, { signal: ctl.signal });
378
754
  } catch (err) {
379
- throw normalizeDeepSeekError(err);
755
+ ctl.dispose();
756
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
380
757
  }
381
758
  let finalUsage;
382
759
  try {
383
- for await (const chunk of sdkStream) {
760
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER2)) {
384
761
  const delta = chunk.choices[0]?.delta.content;
385
762
  if (delta !== void 0 && delta !== null && delta.length > 0) {
386
763
  yield { token: delta };
@@ -390,7 +767,9 @@ function createDeepSeekProvider(config) {
390
767
  }
391
768
  }
392
769
  } catch (err) {
393
- throw normalizeDeepSeekError(err);
770
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
771
+ } finally {
772
+ ctl.dispose();
394
773
  }
395
774
  if (finalUsage !== void 0) {
396
775
  yield { token: "", usage: finalUsage };
@@ -404,23 +783,30 @@ function createDeepSeekProvider(config) {
404
783
  const augmentedMessages = [jsonSystemInstruction, ...messages];
405
784
  const model = options?.model ?? config.model;
406
785
  const chatMessages = buildMessages(augmentedMessages);
786
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
407
787
  const start = Date.now();
408
- const rawResponse = await withRetry(async () => {
409
- try {
410
- const params = {
411
- model,
412
- messages: chatMessages,
413
- stream: false
414
- };
415
- const maxTokens = options?.maxTokens ?? config.maxTokens;
416
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
417
- const temperature = options?.temperature ?? config.temperature;
418
- if (temperature !== void 0) params.temperature = temperature;
419
- return await client.chat.completions.create(params);
420
- } catch (err) {
421
- throw normalizeDeepSeekError(err);
422
- }
423
- }, retryOpts);
788
+ const rawResponse = await withRetry(
789
+ async () => {
790
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
791
+ try {
792
+ const params = {
793
+ model,
794
+ messages: chatMessages,
795
+ stream: false
796
+ };
797
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
798
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
799
+ const temperature = options?.temperature ?? config.temperature;
800
+ if (temperature !== void 0) params.temperature = temperature;
801
+ return await client.chat.completions.create(params, { signal: ctl.signal });
802
+ } catch (err) {
803
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
804
+ } finally {
805
+ ctl.dispose();
806
+ }
807
+ },
808
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
809
+ );
424
810
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
425
811
  let parsed;
426
812
  try {
@@ -447,6 +833,8 @@ function createDeepSeekProvider(config) {
447
833
  }
448
834
  return {
449
835
  data,
836
+ model: rawResponse.model,
837
+ id: rawResponse.id,
450
838
  usage: normalizeUsage2(rawResponse.usage),
451
839
  latencyMs: Date.now() - start
452
840
  };
@@ -498,11 +886,26 @@ function normalizeGeminiError(err) {
498
886
  }
499
887
  return normalizeThrownError(err, PROVIDER3);
500
888
  }
889
+ function makeAbortRacePromise(signal) {
890
+ return new Promise((_, reject) => {
891
+ const onAbort = () => {
892
+ const e = new Error("AbortError");
893
+ e.name = "AbortError";
894
+ reject(e);
895
+ };
896
+ if (signal.aborted) {
897
+ onAbort();
898
+ } else {
899
+ signal.addEventListener("abort", onAbort, { once: true });
900
+ }
901
+ });
902
+ }
501
903
  function createGeminiProvider(config) {
904
+ const configTimeoutMs = config.timeoutMs ?? 3e4;
502
905
  const ai = new GoogleGenAI({
503
906
  apiKey: config.apiKey,
504
907
  httpOptions: {
505
- timeout: config.timeoutMs ?? 3e4
908
+ timeout: configTimeoutMs * 2
506
909
  }
507
910
  });
508
911
  const retryOpts = {
@@ -513,53 +916,62 @@ function createGeminiProvider(config) {
513
916
  async function complete(messages, options) {
514
917
  const model = options?.model ?? config.model;
515
918
  const { system, contents } = buildGeminiContents(messages);
919
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
516
920
  const start = Date.now();
517
- return withRetry(async () => {
518
- try {
519
- const geminiConfig = {};
520
- if (system !== void 0) geminiConfig.systemInstruction = system;
521
- const maxTokens = options?.maxTokens ?? config.maxTokens;
522
- if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
523
- const temperature = options?.temperature ?? config.temperature;
524
- if (temperature !== void 0) geminiConfig.temperature = temperature;
525
- const response = await ai.models.generateContent({
526
- model,
527
- contents,
528
- config: geminiConfig
529
- });
530
- return {
531
- content: response.text ?? "",
532
- model,
533
- usage: normalizeUsage3(response.usageMetadata),
534
- latencyMs: Date.now() - start
535
- };
536
- } catch (err) {
537
- throw normalizeGeminiError(err);
538
- }
539
- }, retryOpts);
921
+ return withRetry(
922
+ async () => {
923
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
924
+ try {
925
+ const geminiConfig = {};
926
+ if (system !== void 0) geminiConfig.systemInstruction = system;
927
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
928
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
929
+ const temperature = options?.temperature ?? config.temperature;
930
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
931
+ const response = await Promise.race([
932
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
933
+ makeAbortRacePromise(ctl.signal)
934
+ ]);
935
+ return {
936
+ content: response.text ?? "",
937
+ model,
938
+ usage: normalizeUsage3(response.usageMetadata),
939
+ latencyMs: Date.now() - start
940
+ };
941
+ } catch (err) {
942
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
943
+ } finally {
944
+ ctl.dispose();
945
+ }
946
+ },
947
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
948
+ );
540
949
  }
541
950
  async function* stream(messages, options) {
542
951
  const model = options?.model ?? config.model;
543
952
  const { system, contents } = buildGeminiContents(messages);
953
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
954
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
544
955
  const geminiConfig = {};
545
956
  if (system !== void 0) geminiConfig.systemInstruction = system;
546
957
  const maxTokens = options?.maxTokens ?? config.maxTokens;
547
958
  if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
548
959
  const temperature = options?.temperature ?? config.temperature;
549
960
  if (temperature !== void 0) geminiConfig.temperature = temperature;
961
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
550
962
  let sdkStream;
551
963
  try {
552
- sdkStream = await ai.models.generateContentStream({
553
- model,
554
- contents,
555
- config: geminiConfig
556
- });
964
+ sdkStream = await Promise.race([
965
+ ai.models.generateContentStream({ model, contents, config: geminiConfig }),
966
+ makeAbortRacePromise(ctl.signal)
967
+ ]);
557
968
  } catch (err) {
558
- throw normalizeGeminiError(err);
969
+ ctl.dispose();
970
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
559
971
  }
560
972
  let finalUsage;
561
973
  try {
562
- for await (const chunk of sdkStream) {
974
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER3)) {
563
975
  const text = chunk.text;
564
976
  if (text !== void 0 && text.length > 0) {
565
977
  yield { token: text };
@@ -569,13 +981,84 @@ function createGeminiProvider(config) {
569
981
  }
570
982
  }
571
983
  } catch (err) {
572
- throw normalizeGeminiError(err);
984
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
985
+ } finally {
986
+ ctl.dispose();
573
987
  }
574
988
  if (finalUsage !== void 0) {
575
989
  yield { token: "", usage: finalUsage };
576
990
  }
577
991
  }
578
992
  async function structured(messages, schema, options) {
993
+ const structuredMode = options?.providerOptions?.["structuredMode"];
994
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
995
+ if (!useStrict) {
996
+ return structuredPromptFallback(messages, schema, options);
997
+ }
998
+ const responseSchemaObj = toProviderSchema(schema, "gemini");
999
+ const model = options?.model ?? config.model;
1000
+ const { system, contents } = buildGeminiContents(messages);
1001
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
1002
+ const start = Date.now();
1003
+ const rawResponse = await withRetry(
1004
+ async () => {
1005
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1006
+ try {
1007
+ const geminiConfig = {
1008
+ responseMimeType: "application/json",
1009
+ // responseSchema SDK type is permissive; cast through never to avoid SDK type mismatch
1010
+ responseSchema: responseSchemaObj
1011
+ };
1012
+ if (system !== void 0) geminiConfig.systemInstruction = system;
1013
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1014
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
1015
+ const temperature = options?.temperature ?? config.temperature;
1016
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
1017
+ return await Promise.race([
1018
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
1019
+ makeAbortRacePromise(ctl.signal)
1020
+ ]);
1021
+ } catch (err) {
1022
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
1023
+ } finally {
1024
+ ctl.dispose();
1025
+ }
1026
+ },
1027
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1028
+ );
1029
+ const rawContent = rawResponse.text ?? "";
1030
+ let parsed;
1031
+ try {
1032
+ const cleaned = rawContent.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
1033
+ parsed = JSON.parse(cleaned);
1034
+ } catch (err) {
1035
+ throw new LlmError({
1036
+ message: `Gemini structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
1037
+ provider: PROVIDER3,
1038
+ retryable: false,
1039
+ cause: err
1040
+ });
1041
+ }
1042
+ let data;
1043
+ try {
1044
+ data = schema.parse(parsed);
1045
+ } catch (err) {
1046
+ throw new LlmError({
1047
+ message: `Gemini structured output: response failed schema validation. ${String(err)}`,
1048
+ provider: PROVIDER3,
1049
+ retryable: false,
1050
+ cause: err
1051
+ });
1052
+ }
1053
+ return {
1054
+ data,
1055
+ // Gemini does not return a request ID; model comes from response.modelVersion if available
1056
+ model: rawResponse.modelVersion ?? model,
1057
+ usage: normalizeUsage3(rawResponse.usageMetadata),
1058
+ latencyMs: Date.now() - start
1059
+ };
1060
+ }
1061
+ async function structuredPromptFallback(messages, schema, options) {
579
1062
  const augmentedMessages = [
580
1063
  {
581
1064
  role: "system",
@@ -585,27 +1068,32 @@ function createGeminiProvider(config) {
585
1068
  ];
586
1069
  const model = options?.model ?? config.model;
587
1070
  const { system, contents } = buildGeminiContents(augmentedMessages);
1071
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
588
1072
  const start = Date.now();
589
- const rawResponse = await withRetry(async () => {
590
- try {
591
- const geminiConfig = {
592
- // Instruct Gemini to return JSON directly
593
- responseMimeType: "application/json"
594
- };
595
- if (system !== void 0) geminiConfig.systemInstruction = system;
596
- const maxTokens = options?.maxTokens ?? config.maxTokens;
597
- if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
598
- const temperature = options?.temperature ?? config.temperature;
599
- if (temperature !== void 0) geminiConfig.temperature = temperature;
600
- return await ai.models.generateContent({
601
- model,
602
- contents,
603
- config: geminiConfig
604
- });
605
- } catch (err) {
606
- throw normalizeGeminiError(err);
607
- }
608
- }, retryOpts);
1073
+ const rawResponse = await withRetry(
1074
+ async () => {
1075
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1076
+ try {
1077
+ const geminiConfig = {
1078
+ responseMimeType: "application/json"
1079
+ };
1080
+ if (system !== void 0) geminiConfig.systemInstruction = system;
1081
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1082
+ if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
1083
+ const temperature = options?.temperature ?? config.temperature;
1084
+ if (temperature !== void 0) geminiConfig.temperature = temperature;
1085
+ return await Promise.race([
1086
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
1087
+ makeAbortRacePromise(ctl.signal)
1088
+ ]);
1089
+ } catch (err) {
1090
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
1091
+ } finally {
1092
+ ctl.dispose();
1093
+ }
1094
+ },
1095
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1096
+ );
609
1097
  const rawContent = rawResponse.text ?? "";
610
1098
  let parsed;
611
1099
  try {
@@ -632,6 +1120,7 @@ function createGeminiProvider(config) {
632
1120
  }
633
1121
  return {
634
1122
  data,
1123
+ model,
635
1124
  usage: normalizeUsage3(rawResponse.usageMetadata),
636
1125
  latencyMs: Date.now() - start
637
1126
  };
@@ -703,34 +1192,43 @@ function createOpenAIProvider(config) {
703
1192
  async function complete(messages, options) {
704
1193
  const model = options?.model ?? config.model;
705
1194
  const openAIMessages = buildOpenAIMessages(messages);
1195
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
706
1196
  const start = Date.now();
707
- return withRetry(async () => {
708
- try {
709
- const params = {
710
- model,
711
- messages: openAIMessages,
712
- stream: false
713
- };
714
- const maxTokens = options?.maxTokens ?? config.maxTokens;
715
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
716
- const temperature = options?.temperature ?? config.temperature;
717
- if (temperature !== void 0) params.temperature = temperature;
718
- const response = await client.chat.completions.create(params);
719
- const content = response.choices.map((c) => c.message.content ?? "").join("");
720
- return {
721
- content,
722
- model: response.model,
723
- usage: normalizeUsage4(response.usage),
724
- latencyMs: Date.now() - start
725
- };
726
- } catch (err) {
727
- throw normalizeOpenAIError(err);
728
- }
729
- }, retryOpts);
1197
+ return withRetry(
1198
+ async () => {
1199
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1200
+ try {
1201
+ const params = {
1202
+ model,
1203
+ messages: openAIMessages,
1204
+ stream: false
1205
+ };
1206
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1207
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1208
+ const temperature = options?.temperature ?? config.temperature;
1209
+ if (temperature !== void 0) params.temperature = temperature;
1210
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
1211
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
1212
+ return {
1213
+ content,
1214
+ model: response.model,
1215
+ usage: normalizeUsage4(response.usage),
1216
+ latencyMs: Date.now() - start
1217
+ };
1218
+ } catch (err) {
1219
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1220
+ } finally {
1221
+ ctl.dispose();
1222
+ }
1223
+ },
1224
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1225
+ );
730
1226
  }
731
1227
  async function* stream(messages, options) {
732
1228
  const model = options?.model ?? config.model;
733
1229
  const openAIMessages = buildOpenAIMessages(messages);
1230
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1231
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
734
1232
  const params = {
735
1233
  model,
736
1234
  messages: openAIMessages,
@@ -741,15 +1239,17 @@ function createOpenAIProvider(config) {
741
1239
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
742
1240
  const temperature = options?.temperature ?? config.temperature;
743
1241
  if (temperature !== void 0) params.temperature = temperature;
1242
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
744
1243
  let sdkStream;
745
1244
  try {
746
- sdkStream = await client.chat.completions.create(params);
1245
+ sdkStream = await client.chat.completions.create(params, { signal: ctl.signal });
747
1246
  } catch (err) {
748
- throw normalizeOpenAIError(err);
1247
+ ctl.dispose();
1248
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
749
1249
  }
750
1250
  let finalUsage;
751
1251
  try {
752
- for await (const chunk of sdkStream) {
1252
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER4)) {
753
1253
  const delta = chunk.choices[0]?.delta.content;
754
1254
  if (delta !== void 0 && delta !== null && delta.length > 0) {
755
1255
  yield { token: delta };
@@ -759,13 +1259,95 @@ function createOpenAIProvider(config) {
759
1259
  }
760
1260
  }
761
1261
  } catch (err) {
762
- throw normalizeOpenAIError(err);
1262
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1263
+ } finally {
1264
+ ctl.dispose();
763
1265
  }
764
1266
  if (finalUsage !== void 0) {
765
1267
  yield { token: "", usage: finalUsage };
766
1268
  }
767
1269
  }
768
1270
  async function structured(messages, schema, options) {
1271
+ const structuredMode = options?.providerOptions?.["structuredMode"];
1272
+ const useStrict = isZodSchema(schema) && structuredMode !== "prompt";
1273
+ if (!useStrict) {
1274
+ return structuredPromptFallback(messages, schema, options);
1275
+ }
1276
+ const jsonSchema = toProviderSchema(schema, "openai");
1277
+ const model = options?.model ?? config.model;
1278
+ const openAIMessages = buildOpenAIMessages(messages);
1279
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1280
+ const start = Date.now();
1281
+ const rawResponse = await withRetry(
1282
+ async () => {
1283
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1284
+ try {
1285
+ const params = {
1286
+ model,
1287
+ messages: openAIMessages,
1288
+ stream: false,
1289
+ response_format: {
1290
+ type: "json_schema",
1291
+ json_schema: { name: "response", schema: jsonSchema, strict: true }
1292
+ }
1293
+ };
1294
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1295
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1296
+ const temperature = options?.temperature ?? config.temperature;
1297
+ if (temperature !== void 0) params.temperature = temperature;
1298
+ return await client.chat.completions.create(
1299
+ params,
1300
+ { signal: ctl.signal }
1301
+ );
1302
+ } catch (err) {
1303
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1304
+ } finally {
1305
+ ctl.dispose();
1306
+ }
1307
+ },
1308
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1309
+ );
1310
+ const choice = rawResponse.choices[0];
1311
+ if (choice?.message.refusal !== null && choice?.message.refusal !== void 0) {
1312
+ throw new LlmError({
1313
+ message: `OpenAI structured output: model refused to generate. Refusal: ${choice.message.refusal.slice(0, 200)}`,
1314
+ provider: PROVIDER4,
1315
+ retryable: false,
1316
+ kind: "unknown"
1317
+ });
1318
+ }
1319
+ const rawContent = choice?.message.content ?? "";
1320
+ let parsed;
1321
+ try {
1322
+ parsed = JSON.parse(rawContent);
1323
+ } catch (err) {
1324
+ throw new LlmError({
1325
+ message: `OpenAI structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
1326
+ provider: PROVIDER4,
1327
+ retryable: false,
1328
+ cause: err
1329
+ });
1330
+ }
1331
+ let data;
1332
+ try {
1333
+ data = schema.parse(parsed);
1334
+ } catch (err) {
1335
+ throw new LlmError({
1336
+ message: `OpenAI structured output: response failed schema validation. ${String(err)}`,
1337
+ provider: PROVIDER4,
1338
+ retryable: false,
1339
+ cause: err
1340
+ });
1341
+ }
1342
+ return {
1343
+ data,
1344
+ model: rawResponse.model,
1345
+ id: rawResponse.id,
1346
+ usage: normalizeUsage4(rawResponse.usage),
1347
+ latencyMs: Date.now() - start
1348
+ };
1349
+ }
1350
+ async function structuredPromptFallback(messages, schema, options) {
769
1351
  const jsonSystemInstruction = {
770
1352
  role: "system",
771
1353
  content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
@@ -773,24 +1355,31 @@ function createOpenAIProvider(config) {
773
1355
  const augmentedMessages = [jsonSystemInstruction, ...messages];
774
1356
  const model = options?.model ?? config.model;
775
1357
  const openAIMessages = buildOpenAIMessages(augmentedMessages);
1358
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
776
1359
  const start = Date.now();
777
- const rawResponse = await withRetry(async () => {
778
- try {
779
- const params = {
780
- model,
781
- messages: openAIMessages,
782
- stream: false,
783
- response_format: { type: "json_object" }
784
- };
785
- const maxTokens = options?.maxTokens ?? config.maxTokens;
786
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
787
- const temperature = options?.temperature ?? config.temperature;
788
- if (temperature !== void 0) params.temperature = temperature;
789
- return await client.chat.completions.create(params);
790
- } catch (err) {
791
- throw normalizeOpenAIError(err);
792
- }
793
- }, retryOpts);
1360
+ const rawResponse = await withRetry(
1361
+ async () => {
1362
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1363
+ try {
1364
+ const params = {
1365
+ model,
1366
+ messages: openAIMessages,
1367
+ stream: false,
1368
+ response_format: { type: "json_object" }
1369
+ };
1370
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1371
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1372
+ const temperature = options?.temperature ?? config.temperature;
1373
+ if (temperature !== void 0) params.temperature = temperature;
1374
+ return await client.chat.completions.create(params, { signal: ctl.signal });
1375
+ } catch (err) {
1376
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1377
+ } finally {
1378
+ ctl.dispose();
1379
+ }
1380
+ },
1381
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1382
+ );
794
1383
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
795
1384
  let parsed;
796
1385
  try {
@@ -816,6 +1405,8 @@ function createOpenAIProvider(config) {
816
1405
  }
817
1406
  return {
818
1407
  data,
1408
+ model: rawResponse.model,
1409
+ id: rawResponse.id,
819
1410
  usage: normalizeUsage4(rawResponse.usage),
820
1411
  latencyMs: Date.now() - start
821
1412
  };
@@ -906,42 +1497,52 @@ function createPerplexityProvider(config) {
906
1497
  async function complete(messages, options) {
907
1498
  const model = options?.model ?? config.model;
908
1499
  const chatMessages = buildMessages2(messages);
1500
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
909
1501
  const start = Date.now();
910
1502
  const extraParams = extractProviderOptions(options?.providerOptions);
911
- return withRetry(async () => {
912
- try {
913
- const params = {
914
- model,
915
- messages: chatMessages,
916
- stream: false,
917
- ...extraParams
918
- };
919
- const maxTokens = options?.maxTokens ?? config.maxTokens;
920
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
921
- const temperature = options?.temperature ?? config.temperature;
922
- if (temperature !== void 0) params.temperature = temperature;
923
- const rawResponse = await client.chat.completions.create(
924
- params
925
- );
926
- const response = rawResponse;
927
- const content = response.choices.map((c) => c.message.content ?? "").join("");
928
- const result = {
929
- content,
930
- model: response.model,
931
- usage: normalizeUsage5(response.usage),
932
- latencyMs: Date.now() - start
933
- };
934
- const citations = extractCitations(response);
935
- if (citations !== void 0) result.citations = citations;
936
- return result;
937
- } catch (err) {
938
- throw normalizePerplexityError(err);
939
- }
940
- }, retryOpts);
1503
+ return withRetry(
1504
+ async () => {
1505
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1506
+ try {
1507
+ const params = {
1508
+ model,
1509
+ messages: chatMessages,
1510
+ stream: false,
1511
+ ...extraParams
1512
+ };
1513
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1514
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1515
+ const temperature = options?.temperature ?? config.temperature;
1516
+ if (temperature !== void 0) params.temperature = temperature;
1517
+ const rawResponse = await client.chat.completions.create(
1518
+ params,
1519
+ { signal: ctl.signal }
1520
+ );
1521
+ const response = rawResponse;
1522
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
1523
+ const result = {
1524
+ content,
1525
+ model: response.model,
1526
+ usage: normalizeUsage5(response.usage),
1527
+ latencyMs: Date.now() - start
1528
+ };
1529
+ const citations = extractCitations(response);
1530
+ if (citations !== void 0) result.citations = citations;
1531
+ return result;
1532
+ } catch (err) {
1533
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1534
+ } finally {
1535
+ ctl.dispose();
1536
+ }
1537
+ },
1538
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1539
+ );
941
1540
  }
942
1541
  async function* stream(messages, options) {
943
1542
  const model = options?.model ?? config.model;
944
1543
  const chatMessages = buildMessages2(messages);
1544
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1545
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
945
1546
  const extraParams = extractProviderOptions(options?.providerOptions);
946
1547
  const params = {
947
1548
  model,
@@ -954,17 +1555,20 @@ function createPerplexityProvider(config) {
954
1555
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
955
1556
  const temperature = options?.temperature ?? config.temperature;
956
1557
  if (temperature !== void 0) params.temperature = temperature;
1558
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
957
1559
  let sdkStream;
958
1560
  try {
959
1561
  sdkStream = await client.chat.completions.create(
960
- params
1562
+ params,
1563
+ { signal: ctl.signal }
961
1564
  );
962
1565
  } catch (err) {
963
- throw normalizePerplexityError(err);
1566
+ ctl.dispose();
1567
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
964
1568
  }
965
1569
  let finalUsage;
966
1570
  try {
967
- for await (const chunk of sdkStream) {
1571
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER5)) {
968
1572
  const delta = chunk.choices[0]?.delta.content;
969
1573
  if (delta !== void 0 && delta !== null && delta.length > 0) {
970
1574
  yield { token: delta };
@@ -974,7 +1578,9 @@ function createPerplexityProvider(config) {
974
1578
  }
975
1579
  }
976
1580
  } catch (err) {
977
- throw normalizePerplexityError(err);
1581
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1582
+ } finally {
1583
+ ctl.dispose();
978
1584
  }
979
1585
  if (finalUsage !== void 0) {
980
1586
  yield { token: "", usage: finalUsage };
@@ -988,28 +1594,37 @@ function createPerplexityProvider(config) {
988
1594
  const augmentedMessages = [jsonSystemInstruction, ...messages];
989
1595
  const model = options?.model ?? config.model;
990
1596
  const chatMessages = buildMessages2(augmentedMessages);
1597
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
991
1598
  const start = Date.now();
992
1599
  const extraParams = extractProviderOptions(options?.providerOptions);
993
- const rawResponse = await withRetry(async () => {
994
- try {
995
- const params = {
996
- model,
997
- messages: chatMessages,
998
- stream: false,
999
- ...extraParams
1000
- };
1001
- const maxTokens = options?.maxTokens ?? config.maxTokens;
1002
- if (maxTokens !== void 0) params.max_tokens = maxTokens;
1003
- const temperature = options?.temperature ?? config.temperature;
1004
- if (temperature !== void 0) params.temperature = temperature;
1005
- return await client.chat.completions.create(
1006
- params
1007
- );
1008
- } catch (err) {
1009
- throw normalizePerplexityError(err);
1010
- }
1011
- }, retryOpts);
1012
- const rawContent = rawResponse.choices[0]?.message.content ?? "";
1600
+ const rawResponse = await withRetry(
1601
+ async () => {
1602
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1603
+ try {
1604
+ const params = {
1605
+ model,
1606
+ messages: chatMessages,
1607
+ stream: false,
1608
+ ...extraParams
1609
+ };
1610
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1611
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1612
+ const temperature = options?.temperature ?? config.temperature;
1613
+ if (temperature !== void 0) params.temperature = temperature;
1614
+ return await client.chat.completions.create(
1615
+ params,
1616
+ { signal: ctl.signal }
1617
+ );
1618
+ } catch (err) {
1619
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1620
+ } finally {
1621
+ ctl.dispose();
1622
+ }
1623
+ },
1624
+ mergeRetryOptsWithSignal(retryOpts, options?.signal)
1625
+ );
1626
+ const response = rawResponse;
1627
+ const rawContent = response.choices[0]?.message.content ?? "";
1013
1628
  let parsed;
1014
1629
  try {
1015
1630
  const cleaned = rawContent.replace(/<think>[\s\S]*?<\/think>/i, "").replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
@@ -1033,11 +1648,16 @@ function createPerplexityProvider(config) {
1033
1648
  cause: err
1034
1649
  });
1035
1650
  }
1036
- return {
1651
+ const citations = extractCitations(response);
1652
+ const result = {
1037
1653
  data,
1038
- usage: normalizeUsage5(rawResponse.usage),
1654
+ model: response.model,
1655
+ id: response.id,
1656
+ usage: normalizeUsage5(response.usage),
1039
1657
  latencyMs: Date.now() - start
1040
1658
  };
1659
+ if (citations !== void 0) result.citations = citations;
1660
+ return result;
1041
1661
  }
1042
1662
  return {
1043
1663
  config,