@diabolicallabs/llm-client 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -7,6 +7,13 @@ var LlmError = class extends Error {
7
7
  provider;
8
8
  statusCode;
9
9
  retryable;
10
+ /**
11
+ * Optional error kind discriminator. Present on errors produced by the abort/timeout/stall
12
+ * machinery (v0.3.0+). May be undefined on errors from providers that pre-date the kind field
13
+ * or on errors that fall through to the generic normalization path.
14
+ * Typed as LlmErrorKind | undefined to satisfy exactOptionalPropertyTypes.
15
+ */
16
+ kind;
10
17
  // `cause` is declared on Error in lib.es2022.error.d.ts as `cause?: unknown`
11
18
  // We override it here to make it always present (not optional) after construction.
12
19
  cause;
@@ -15,10 +22,128 @@ var LlmError = class extends Error {
15
22
  this.provider = opts.provider;
16
23
  this.statusCode = opts.statusCode;
17
24
  this.retryable = opts.retryable;
25
+ this.kind = opts.kind;
18
26
  this.cause = opts.cause;
19
27
  }
20
28
  };
21
29
 
30
+ // src/abort.ts
31
+ function createAttemptController(callerSignal, timeoutMs) {
32
+ const internal = new AbortController();
33
+ let reason;
34
+ const timer = setTimeout(() => {
35
+ reason ??= "timeout";
36
+ internal.abort(new Error("llm-client: timeout"));
37
+ }, timeoutMs);
38
+ timer.unref?.();
39
+ const onCallerAbort = () => {
40
+ reason ??= "caller";
41
+ internal.abort(callerSignal.reason);
42
+ };
43
+ if (callerSignal !== void 0) {
44
+ if (callerSignal.aborted) {
45
+ onCallerAbort();
46
+ } else {
47
+ callerSignal.addEventListener("abort", onCallerAbort, { once: true });
48
+ }
49
+ }
50
+ return {
51
+ signal: internal.signal,
52
+ abortReason: () => reason,
53
+ abort: (r) => {
54
+ reason ??= r;
55
+ internal.abort();
56
+ },
57
+ dispose: () => {
58
+ clearTimeout(timer);
59
+ callerSignal?.removeEventListener("abort", onCallerAbort);
60
+ }
61
+ };
62
+ }
63
+ function cancellableSleep(ms, signal) {
64
+ return new Promise((resolve) => {
65
+ if (signal?.aborted) {
66
+ resolve();
67
+ return;
68
+ }
69
+ const timer = setTimeout(() => {
70
+ signal?.removeEventListener("abort", onAbort);
71
+ resolve();
72
+ }, ms);
73
+ timer.unref?.();
74
+ const onAbort = () => {
75
+ clearTimeout(timer);
76
+ resolve();
77
+ };
78
+ signal?.addEventListener("abort", onAbort, { once: true });
79
+ });
80
+ }
81
+ async function* withStallTimeout(source, stallMs, ctl, provider) {
82
+ const it = source[Symbol.asyncIterator]();
83
+ while (true) {
84
+ let stallTimer;
85
+ const stallPromise = new Promise((_, reject) => {
86
+ stallTimer = setTimeout(() => {
87
+ ctl.abort("stall");
88
+ reject(
89
+ new LlmError({
90
+ provider,
91
+ kind: "stream_stall",
92
+ retryable: true,
93
+ message: `llm-client: no chunk for ${stallMs}ms`
94
+ })
95
+ );
96
+ }, stallMs);
97
+ });
98
+ try {
99
+ const next = await Promise.race([it.next(), stallPromise]);
100
+ clearTimeout(stallTimer);
101
+ if (next.done) return;
102
+ yield next.value;
103
+ } catch (err) {
104
+ clearTimeout(stallTimer);
105
+ throw err;
106
+ }
107
+ }
108
+ }
109
+ function classifyAbort(err, abortReason, provider) {
110
+ const controllerFired = abortReason !== void 0;
111
+ if (!controllerFired && !isAbortError(err)) return err;
112
+ switch (abortReason) {
113
+ case "timeout":
114
+ return new LlmError({
115
+ message: "llm-client: request timed out",
116
+ provider,
117
+ kind: "timeout",
118
+ retryable: true,
119
+ cause: err
120
+ });
121
+ case "stall":
122
+ return new LlmError({
123
+ message: "llm-client: stream stalled",
124
+ provider,
125
+ kind: "stream_stall",
126
+ retryable: true,
127
+ cause: err
128
+ });
129
+ case "caller":
130
+ default:
131
+ return new LlmError({
132
+ message: "llm-client: cancelled by caller",
133
+ provider,
134
+ kind: "cancelled",
135
+ retryable: false,
136
+ cause: err
137
+ });
138
+ }
139
+ }
140
+ function isAbortError(err) {
141
+ if (err instanceof Error && err.name === "AbortError") return true;
142
+ if (typeof DOMException !== "undefined" && err instanceof DOMException && err.name === "AbortError")
143
+ return true;
144
+ return false;
145
+ }
146
+
22
147
  // src/retry.ts
23
148
  var RETRYABLE_HTTP_STATUSES = /* @__PURE__ */ new Set([429, 502, 503, 504]);
24
149
  var RETRYABLE_ERROR_CODES = /* @__PURE__ */ new Set(["ECONNRESET", "ETIMEDOUT", "ECONNABORTED"]);
@@ -35,19 +160,32 @@ function computeBackoffMs(attempt, baseDelayMs) {
35
160
  const ceiling = baseDelayMs * 2 ** attempt;
36
161
  return Math.random() * ceiling;
37
162
  }
163
+ function mergeRetryOptsWithSignal(base, signal) {
164
+ return signal !== void 0 ? { ...base, signal } : { ...base };
165
+ }
38
166
  async function withRetry(fn, opts) {
39
167
  let lastError;
40
168
  for (let attempt = 0; attempt <= opts.maxRetries; attempt++) {
169
+ if (opts.signal?.aborted === true) {
170
+ throw new LlmError({
171
+ message: "llm-client: cancelled by caller",
172
+ provider: opts.provider,
173
+ kind: "cancelled",
174
+ retryable: false,
175
+ cause: opts.signal.reason
176
+ });
177
+ }
41
178
  try {
42
179
  return await fn(attempt);
43
180
  } catch (err) {
44
181
  const llmErr = normalizeThrownError(err, opts.provider);
182
+ if (llmErr.kind === "cancelled") throw llmErr;
45
183
  if (!llmErr.retryable || attempt === opts.maxRetries) {
46
184
  throw llmErr;
47
185
  }
48
186
  lastError = llmErr;
49
187
  const delayMs = computeBackoffMs(attempt, opts.baseDelayMs);
50
- await sleep(delayMs);
188
+ await cancellableSleep(delayMs, opts.signal);
51
189
  }
52
190
  }
53
191
  throw lastError ?? new LlmError({
@@ -59,6 +197,15 @@ async function withRetry(fn, opts) {
59
197
  function normalizeThrownError(err, provider) {
60
198
  if (err instanceof LlmError) return err;
61
199
  if (err instanceof Error) {
200
+ if (err.name === "AbortError" || typeof DOMException !== "undefined" && err instanceof DOMException && err.name === "AbortError") {
201
+ return new LlmError({
202
+ message: err.message || "llm-client: cancelled by caller",
203
+ provider,
204
+ kind: "cancelled",
205
+ retryable: false,
206
+ cause: err
207
+ });
208
+ }
62
209
  const errWithCode = err;
63
210
  const statusCode = errWithCode.status ?? errWithCode.statusCode;
64
211
  if (errWithCode.code !== void 0 && isRetryableErrorCode(errWithCode.code)) {
@@ -67,24 +214,28 @@ function normalizeThrownError(err, provider) {
67
214
  message: err.message,
68
215
  provider,
69
216
  statusCode,
217
+ kind: "network",
70
218
  retryable: true,
71
219
  cause: err
72
220
  });
73
221
  }
74
- return new LlmError({ message: err.message, provider, retryable: true, cause: err });
222
+ return new LlmError({ message: err.message, provider, kind: "network", retryable: true, cause: err });
75
223
  }
76
224
  if (statusCode !== void 0) {
225
+ const retryable = isRetryableStatus(statusCode);
77
226
  return new LlmError({
78
227
  message: err.message,
79
228
  provider,
80
229
  statusCode,
81
- retryable: isRetryableStatus(statusCode),
230
+ kind: retryable ? "http" : "http",
231
+ retryable,
82
232
  cause: err
83
233
  });
84
234
  }
85
235
  return new LlmError({
86
236
  message: err.message,
87
237
  provider,
238
+ kind: "unknown",
88
239
  retryable: false,
89
240
  cause: err
90
241
  });
@@ -92,13 +243,11 @@ function normalizeThrownError(err, provider) {
92
243
  return new LlmError({
93
244
  message: String(err),
94
245
  provider,
246
+ kind: "unknown",
95
247
  retryable: false,
96
248
  cause: err
97
249
  });
98
250
  }
99
- function sleep(ms) {
100
- return new Promise((resolve) => setTimeout(resolve, ms));
101
- }
102
251
 
103
252
  // src/providers/anthropic.ts
104
253
  var PROVIDER = "anthropic";
@@ -166,8 +315,10 @@ function createAnthropicProvider(config) {
166
315
  async function complete(messages, options) {
167
316
  const model = options?.model ?? config.model;
168
317
  const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
318
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
169
319
  const start = Date.now();
170
320
  return withRetry(async () => {
321
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
171
322
  try {
172
323
  const params = {
173
324
  model,
@@ -179,7 +330,7 @@ function createAnthropicProvider(config) {
179
330
  if (temperature !== void 0) {
180
331
  params.temperature = temperature;
181
332
  }
182
- const response = await client.messages.create(params);
333
+ const response = await client.messages.create(params, { signal: ctl.signal });
183
334
  const content = response.content.filter((block) => block.type === "text").map((block) => block.text).join("");
184
335
  return {
185
336
  content,
@@ -188,13 +339,17 @@ function createAnthropicProvider(config) {
188
339
  latencyMs: Date.now() - start
189
340
  };
190
341
  } catch (err) {
191
- throw normalizeAnthropicError(err);
342
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
343
+ } finally {
344
+ ctl.dispose();
192
345
  }
193
- }, retryOpts);
346
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
194
347
  }
195
348
  async function* stream(messages, options) {
196
349
  const model = options?.model ?? config.model;
197
350
  const { system, messages: anthropicMessages } = buildAnthropicMessages(messages);
351
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
352
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
198
353
  const params = {
199
354
  model,
200
355
  messages: anthropicMessages,
@@ -205,15 +360,23 @@ function createAnthropicProvider(config) {
205
360
  if (streamTemperature !== void 0) {
206
361
  params.temperature = streamTemperature;
207
362
  }
363
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
208
364
  let sdkStream;
209
365
  try {
210
- sdkStream = client.messages.stream(params);
366
+ sdkStream = client.messages.stream(params, { signal: ctl.signal });
211
367
  } catch (err) {
212
- throw normalizeAnthropicError(err);
368
+ ctl.dispose();
369
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
213
370
  }
214
371
  let finalUsage;
215
372
  try {
216
- for await (const event of sdkStream) {
373
+ const stallWrapped = withStallTimeout(
374
+ sdkStream,
375
+ stallMs,
376
+ ctl,
377
+ PROVIDER
378
+ );
379
+ for await (const event of stallWrapped) {
217
380
  if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
218
381
  yield { token: event.delta.text };
219
382
  } else if (event.type === "message_delta" && "usage" in event) {
@@ -222,7 +385,9 @@ function createAnthropicProvider(config) {
222
385
  }
223
386
  }
224
387
  } catch (err) {
225
- throw normalizeAnthropicError(err);
388
+ throw normalizeAnthropicError(classifyAbort(err, ctl.abortReason(), PROVIDER));
389
+ } finally {
390
+ ctl.dispose();
226
391
  }
227
392
  if (finalUsage !== void 0) {
228
393
  yield { token: "", usage: finalUsage };
@@ -334,8 +499,10 @@ function createDeepSeekProvider(config) {
334
499
  async function complete(messages, options) {
335
500
  const model = options?.model ?? config.model;
336
501
  const chatMessages = buildMessages(messages);
502
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
337
503
  const start = Date.now();
338
504
  return withRetry(async () => {
505
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
339
506
  try {
340
507
  const params = {
341
508
  model,
@@ -346,7 +513,7 @@ function createDeepSeekProvider(config) {
346
513
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
347
514
  const temperature = options?.temperature ?? config.temperature;
348
515
  if (temperature !== void 0) params.temperature = temperature;
349
- const response = await client.chat.completions.create(params);
516
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
350
517
  const content = response.choices.map((c) => c.message.content ?? "").join("");
351
518
  return {
352
519
  content,
@@ -355,13 +522,17 @@ function createDeepSeekProvider(config) {
355
522
  latencyMs: Date.now() - start
356
523
  };
357
524
  } catch (err) {
358
- throw normalizeDeepSeekError(err);
525
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
526
+ } finally {
527
+ ctl.dispose();
359
528
  }
360
- }, retryOpts);
529
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
361
530
  }
362
531
  async function* stream(messages, options) {
363
532
  const model = options?.model ?? config.model;
364
533
  const chatMessages = buildMessages(messages);
534
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
535
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
365
536
  const params = {
366
537
  model,
367
538
  messages: chatMessages,
@@ -372,15 +543,17 @@ function createDeepSeekProvider(config) {
372
543
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
373
544
  const temperature = options?.temperature ?? config.temperature;
374
545
  if (temperature !== void 0) params.temperature = temperature;
546
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
375
547
  let sdkStream;
376
548
  try {
377
- sdkStream = await client.chat.completions.create(params);
549
+ sdkStream = await client.chat.completions.create(params, { signal: ctl.signal });
378
550
  } catch (err) {
379
- throw normalizeDeepSeekError(err);
551
+ ctl.dispose();
552
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
380
553
  }
381
554
  let finalUsage;
382
555
  try {
383
- for await (const chunk of sdkStream) {
556
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER2)) {
384
557
  const delta = chunk.choices[0]?.delta.content;
385
558
  if (delta !== void 0 && delta !== null && delta.length > 0) {
386
559
  yield { token: delta };
@@ -390,7 +563,9 @@ function createDeepSeekProvider(config) {
390
563
  }
391
564
  }
392
565
  } catch (err) {
393
- throw normalizeDeepSeekError(err);
566
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
567
+ } finally {
568
+ ctl.dispose();
394
569
  }
395
570
  if (finalUsage !== void 0) {
396
571
  yield { token: "", usage: finalUsage };
@@ -404,8 +579,10 @@ function createDeepSeekProvider(config) {
404
579
  const augmentedMessages = [jsonSystemInstruction, ...messages];
405
580
  const model = options?.model ?? config.model;
406
581
  const chatMessages = buildMessages(augmentedMessages);
582
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
407
583
  const start = Date.now();
408
584
  const rawResponse = await withRetry(async () => {
585
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
409
586
  try {
410
587
  const params = {
411
588
  model,
@@ -416,11 +593,13 @@ function createDeepSeekProvider(config) {
416
593
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
417
594
  const temperature = options?.temperature ?? config.temperature;
418
595
  if (temperature !== void 0) params.temperature = temperature;
419
- return await client.chat.completions.create(params);
596
+ return await client.chat.completions.create(params, { signal: ctl.signal });
420
597
  } catch (err) {
421
- throw normalizeDeepSeekError(err);
598
+ throw normalizeDeepSeekError(classifyAbort(err, ctl.abortReason(), PROVIDER2));
599
+ } finally {
600
+ ctl.dispose();
422
601
  }
423
- }, retryOpts);
602
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
424
603
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
425
604
  let parsed;
426
605
  try {
@@ -498,11 +677,26 @@ function normalizeGeminiError(err) {
498
677
  }
499
678
  return normalizeThrownError(err, PROVIDER3);
500
679
  }
680
+ function makeAbortRacePromise(signal) {
681
+ return new Promise((_, reject) => {
682
+ const onAbort = () => {
683
+ const e = new Error("AbortError");
684
+ e.name = "AbortError";
685
+ reject(e);
686
+ };
687
+ if (signal.aborted) {
688
+ onAbort();
689
+ } else {
690
+ signal.addEventListener("abort", onAbort, { once: true });
691
+ }
692
+ });
693
+ }
501
694
  function createGeminiProvider(config) {
695
+ const configTimeoutMs = config.timeoutMs ?? 3e4;
502
696
  const ai = new GoogleGenAI({
503
697
  apiKey: config.apiKey,
504
698
  httpOptions: {
505
- timeout: config.timeoutMs ?? 3e4
699
+ timeout: configTimeoutMs * 2
506
700
  }
507
701
  });
508
702
  const retryOpts = {
@@ -513,8 +707,10 @@ function createGeminiProvider(config) {
513
707
  async function complete(messages, options) {
514
708
  const model = options?.model ?? config.model;
515
709
  const { system, contents } = buildGeminiContents(messages);
710
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
516
711
  const start = Date.now();
517
712
  return withRetry(async () => {
713
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
518
714
  try {
519
715
  const geminiConfig = {};
520
716
  if (system !== void 0) geminiConfig.systemInstruction = system;
@@ -522,11 +718,10 @@ function createGeminiProvider(config) {
522
718
  if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
523
719
  const temperature = options?.temperature ?? config.temperature;
524
720
  if (temperature !== void 0) geminiConfig.temperature = temperature;
525
- const response = await ai.models.generateContent({
526
- model,
527
- contents,
528
- config: geminiConfig
529
- });
721
+ const response = await Promise.race([
722
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
723
+ makeAbortRacePromise(ctl.signal)
724
+ ]);
530
725
  return {
531
726
  content: response.text ?? "",
532
727
  model,
@@ -534,32 +729,37 @@ function createGeminiProvider(config) {
534
729
  latencyMs: Date.now() - start
535
730
  };
536
731
  } catch (err) {
537
- throw normalizeGeminiError(err);
732
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
733
+ } finally {
734
+ ctl.dispose();
538
735
  }
539
- }, retryOpts);
736
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
540
737
  }
541
738
  async function* stream(messages, options) {
542
739
  const model = options?.model ?? config.model;
543
740
  const { system, contents } = buildGeminiContents(messages);
741
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
742
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
544
743
  const geminiConfig = {};
545
744
  if (system !== void 0) geminiConfig.systemInstruction = system;
546
745
  const maxTokens = options?.maxTokens ?? config.maxTokens;
547
746
  if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
548
747
  const temperature = options?.temperature ?? config.temperature;
549
748
  if (temperature !== void 0) geminiConfig.temperature = temperature;
749
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
550
750
  let sdkStream;
551
751
  try {
552
- sdkStream = await ai.models.generateContentStream({
553
- model,
554
- contents,
555
- config: geminiConfig
556
- });
752
+ sdkStream = await Promise.race([
753
+ ai.models.generateContentStream({ model, contents, config: geminiConfig }),
754
+ makeAbortRacePromise(ctl.signal)
755
+ ]);
557
756
  } catch (err) {
558
- throw normalizeGeminiError(err);
757
+ ctl.dispose();
758
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
559
759
  }
560
760
  let finalUsage;
561
761
  try {
562
- for await (const chunk of sdkStream) {
762
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER3)) {
563
763
  const text = chunk.text;
564
764
  if (text !== void 0 && text.length > 0) {
565
765
  yield { token: text };
@@ -569,7 +769,9 @@ function createGeminiProvider(config) {
569
769
  }
570
770
  }
571
771
  } catch (err) {
572
- throw normalizeGeminiError(err);
772
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
773
+ } finally {
774
+ ctl.dispose();
573
775
  }
574
776
  if (finalUsage !== void 0) {
575
777
  yield { token: "", usage: finalUsage };
@@ -585,8 +787,10 @@ function createGeminiProvider(config) {
585
787
  ];
586
788
  const model = options?.model ?? config.model;
587
789
  const { system, contents } = buildGeminiContents(augmentedMessages);
790
+ const effectiveTimeoutMs = options?.timeoutMs ?? configTimeoutMs;
588
791
  const start = Date.now();
589
792
  const rawResponse = await withRetry(async () => {
793
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
590
794
  try {
591
795
  const geminiConfig = {
592
796
  // Instruct Gemini to return JSON directly
@@ -597,15 +801,16 @@ function createGeminiProvider(config) {
597
801
  if (maxTokens !== void 0) geminiConfig.maxOutputTokens = maxTokens;
598
802
  const temperature = options?.temperature ?? config.temperature;
599
803
  if (temperature !== void 0) geminiConfig.temperature = temperature;
600
- return await ai.models.generateContent({
601
- model,
602
- contents,
603
- config: geminiConfig
604
- });
804
+ return await Promise.race([
805
+ ai.models.generateContent({ model, contents, config: geminiConfig }),
806
+ makeAbortRacePromise(ctl.signal)
807
+ ]);
605
808
  } catch (err) {
606
- throw normalizeGeminiError(err);
809
+ throw normalizeGeminiError(classifyAbort(err, ctl.abortReason(), PROVIDER3));
810
+ } finally {
811
+ ctl.dispose();
607
812
  }
608
- }, retryOpts);
813
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
609
814
  const rawContent = rawResponse.text ?? "";
610
815
  let parsed;
611
816
  try {
@@ -703,8 +908,10 @@ function createOpenAIProvider(config) {
703
908
  async function complete(messages, options) {
704
909
  const model = options?.model ?? config.model;
705
910
  const openAIMessages = buildOpenAIMessages(messages);
911
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
706
912
  const start = Date.now();
707
913
  return withRetry(async () => {
914
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
708
915
  try {
709
916
  const params = {
710
917
  model,
@@ -715,7 +922,7 @@ function createOpenAIProvider(config) {
715
922
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
716
923
  const temperature = options?.temperature ?? config.temperature;
717
924
  if (temperature !== void 0) params.temperature = temperature;
718
- const response = await client.chat.completions.create(params);
925
+ const response = await client.chat.completions.create(params, { signal: ctl.signal });
719
926
  const content = response.choices.map((c) => c.message.content ?? "").join("");
720
927
  return {
721
928
  content,
@@ -724,13 +931,17 @@ function createOpenAIProvider(config) {
724
931
  latencyMs: Date.now() - start
725
932
  };
726
933
  } catch (err) {
727
- throw normalizeOpenAIError(err);
934
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
935
+ } finally {
936
+ ctl.dispose();
728
937
  }
729
- }, retryOpts);
938
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
730
939
  }
731
940
  async function* stream(messages, options) {
732
941
  const model = options?.model ?? config.model;
733
942
  const openAIMessages = buildOpenAIMessages(messages);
943
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
944
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
734
945
  const params = {
735
946
  model,
736
947
  messages: openAIMessages,
@@ -741,15 +952,17 @@ function createOpenAIProvider(config) {
741
952
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
742
953
  const temperature = options?.temperature ?? config.temperature;
743
954
  if (temperature !== void 0) params.temperature = temperature;
955
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
744
956
  let sdkStream;
745
957
  try {
746
- sdkStream = await client.chat.completions.create(params);
958
+ sdkStream = await client.chat.completions.create(params, { signal: ctl.signal });
747
959
  } catch (err) {
748
- throw normalizeOpenAIError(err);
960
+ ctl.dispose();
961
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
749
962
  }
750
963
  let finalUsage;
751
964
  try {
752
- for await (const chunk of sdkStream) {
965
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER4)) {
753
966
  const delta = chunk.choices[0]?.delta.content;
754
967
  if (delta !== void 0 && delta !== null && delta.length > 0) {
755
968
  yield { token: delta };
@@ -759,7 +972,9 @@ function createOpenAIProvider(config) {
759
972
  }
760
973
  }
761
974
  } catch (err) {
762
- throw normalizeOpenAIError(err);
975
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
976
+ } finally {
977
+ ctl.dispose();
763
978
  }
764
979
  if (finalUsage !== void 0) {
765
980
  yield { token: "", usage: finalUsage };
@@ -773,8 +988,10 @@ function createOpenAIProvider(config) {
773
988
  const augmentedMessages = [jsonSystemInstruction, ...messages];
774
989
  const model = options?.model ?? config.model;
775
990
  const openAIMessages = buildOpenAIMessages(augmentedMessages);
991
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
776
992
  const start = Date.now();
777
993
  const rawResponse = await withRetry(async () => {
994
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
778
995
  try {
779
996
  const params = {
780
997
  model,
@@ -786,11 +1003,13 @@ function createOpenAIProvider(config) {
786
1003
  if (maxTokens !== void 0) params.max_tokens = maxTokens;
787
1004
  const temperature = options?.temperature ?? config.temperature;
788
1005
  if (temperature !== void 0) params.temperature = temperature;
789
- return await client.chat.completions.create(params);
1006
+ return await client.chat.completions.create(params, { signal: ctl.signal });
790
1007
  } catch (err) {
791
- throw normalizeOpenAIError(err);
1008
+ throw normalizeOpenAIError(classifyAbort(err, ctl.abortReason(), PROVIDER4));
1009
+ } finally {
1010
+ ctl.dispose();
792
1011
  }
793
- }, retryOpts);
1012
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
794
1013
  const rawContent = rawResponse.choices[0]?.message.content ?? "";
795
1014
  let parsed;
796
1015
  try {
@@ -828,39 +1047,241 @@ function createOpenAIProvider(config) {
828
1047
  };
829
1048
  }
830
1049
 
831
- // src/providers/stubs.ts
832
- function rejectingStream(err) {
833
- const rejected = Promise.reject(err);
834
- rejected.catch(() => void 0);
1050
+ // src/providers/perplexity.ts
1051
+ import OpenAI3 from "openai";
1052
+ var PROVIDER5 = "perplexity";
1053
+ var PERPLEXITY_BASE_URL = "https://api.perplexity.ai";
1054
+ function normalizeUsage5(usage) {
1055
+ const inputTokens = usage?.prompt_tokens ?? 0;
1056
+ const outputTokens = usage?.completion_tokens ?? 0;
835
1057
  return {
836
- next: () => rejected,
837
- return: () => Promise.resolve({ value: void 0, done: true }),
838
- throw: () => Promise.reject(err),
839
- [Symbol.asyncIterator]() {
840
- return this;
841
- },
842
- [Symbol.asyncDispose]: async () => void 0
1058
+ inputTokens,
1059
+ outputTokens,
1060
+ totalTokens: usage?.total_tokens ?? inputTokens + outputTokens
843
1061
  };
844
1062
  }
845
- function notImplemented(provider) {
846
- const err = new LlmError({
847
- message: `[dlabs-toolkit] Provider '${provider}' is not yet implemented. Anthropic, OpenAI, Gemini, and DeepSeek are available; Perplexity ships in a later week.`,
848
- provider,
849
- retryable: false
1063
+ function buildMessages2(messages) {
1064
+ return messages.map((m) => ({
1065
+ role: m.role,
1066
+ content: m.content
1067
+ }));
1068
+ }
1069
+ function extractCitations(response) {
1070
+ const rawCitations = response.citations;
1071
+ if (rawCitations === void 0 || rawCitations.length === 0) return void 0;
1072
+ const seen = /* @__PURE__ */ new Set();
1073
+ const deduped = [];
1074
+ for (const url of rawCitations) {
1075
+ if (!seen.has(url)) {
1076
+ seen.add(url);
1077
+ deduped.push({ url });
1078
+ }
1079
+ }
1080
+ return deduped.length > 0 ? deduped : void 0;
1081
+ }
1082
+ function extractProviderOptions(providerOptions) {
1083
+ if (providerOptions === void 0) return {};
1084
+ return { ...providerOptions };
1085
+ }
1086
+ function normalizePerplexityError(err) {
1087
+ if (err instanceof LlmError) return err;
1088
+ if (typeof OpenAI3.APIConnectionError === "function" && err instanceof OpenAI3.APIConnectionError) {
1089
+ return new LlmError({
1090
+ message: err.message,
1091
+ provider: PROVIDER5,
1092
+ retryable: true,
1093
+ cause: err
1094
+ });
1095
+ }
1096
+ if (typeof OpenAI3.APIError === "function" && err instanceof OpenAI3.APIError) {
1097
+ const status = err.status;
1098
+ if (status !== void 0) {
1099
+ const retryable = [429, 502, 503, 504].includes(status) || status >= 500;
1100
+ return new LlmError({
1101
+ message: err.message,
1102
+ provider: PROVIDER5,
1103
+ statusCode: status,
1104
+ retryable,
1105
+ cause: err
1106
+ });
1107
+ }
1108
+ return new LlmError({ message: err.message, provider: PROVIDER5, retryable: false, cause: err });
1109
+ }
1110
+ return normalizeThrownError(err, PROVIDER5);
1111
+ }
1112
+ function createPerplexityProvider(config) {
1113
+ const client = new OpenAI3({
1114
+ apiKey: config.apiKey,
1115
+ baseURL: PERPLEXITY_BASE_URL,
1116
+ timeout: config.timeoutMs ?? 3e4,
1117
+ maxRetries: 0
1118
+ // Retries managed by withRetry
850
1119
  });
1120
+ const retryOpts = {
1121
+ maxRetries: config.maxRetries ?? 3,
1122
+ baseDelayMs: config.baseDelayMs ?? 1e3,
1123
+ provider: PROVIDER5
1124
+ };
1125
+ async function complete(messages, options) {
1126
+ const model = options?.model ?? config.model;
1127
+ const chatMessages = buildMessages2(messages);
1128
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1129
+ const start = Date.now();
1130
+ const extraParams = extractProviderOptions(options?.providerOptions);
1131
+ return withRetry(async () => {
1132
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1133
+ try {
1134
+ const params = {
1135
+ model,
1136
+ messages: chatMessages,
1137
+ stream: false,
1138
+ ...extraParams
1139
+ };
1140
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1141
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1142
+ const temperature = options?.temperature ?? config.temperature;
1143
+ if (temperature !== void 0) params.temperature = temperature;
1144
+ const rawResponse = await client.chat.completions.create(
1145
+ params,
1146
+ { signal: ctl.signal }
1147
+ );
1148
+ const response = rawResponse;
1149
+ const content = response.choices.map((c) => c.message.content ?? "").join("");
1150
+ const result = {
1151
+ content,
1152
+ model: response.model,
1153
+ usage: normalizeUsage5(response.usage),
1154
+ latencyMs: Date.now() - start
1155
+ };
1156
+ const citations = extractCitations(response);
1157
+ if (citations !== void 0) result.citations = citations;
1158
+ return result;
1159
+ } catch (err) {
1160
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1161
+ } finally {
1162
+ ctl.dispose();
1163
+ }
1164
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1165
+ }
1166
+ async function* stream(messages, options) {
1167
+ const model = options?.model ?? config.model;
1168
+ const chatMessages = buildMessages2(messages);
1169
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1170
+ const stallMs = options?.streamStallTimeoutMs ?? config.streamStallTimeoutMs ?? 3e4;
1171
+ const extraParams = extractProviderOptions(options?.providerOptions);
1172
+ const params = {
1173
+ model,
1174
+ messages: chatMessages,
1175
+ stream: true,
1176
+ stream_options: { include_usage: true },
1177
+ ...extraParams
1178
+ };
1179
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1180
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1181
+ const temperature = options?.temperature ?? config.temperature;
1182
+ if (temperature !== void 0) params.temperature = temperature;
1183
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1184
+ let sdkStream;
1185
+ try {
1186
+ sdkStream = await client.chat.completions.create(
1187
+ params,
1188
+ { signal: ctl.signal }
1189
+ );
1190
+ } catch (err) {
1191
+ ctl.dispose();
1192
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1193
+ }
1194
+ let finalUsage;
1195
+ try {
1196
+ for await (const chunk of withStallTimeout(sdkStream, stallMs, ctl, PROVIDER5)) {
1197
+ const delta = chunk.choices[0]?.delta.content;
1198
+ if (delta !== void 0 && delta !== null && delta.length > 0) {
1199
+ yield { token: delta };
1200
+ }
1201
+ if (chunk.usage !== void 0 && chunk.usage !== null) {
1202
+ finalUsage = normalizeUsage5(chunk.usage);
1203
+ }
1204
+ }
1205
+ } catch (err) {
1206
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1207
+ } finally {
1208
+ ctl.dispose();
1209
+ }
1210
+ if (finalUsage !== void 0) {
1211
+ yield { token: "", usage: finalUsage };
1212
+ }
1213
+ }
1214
+ async function structured(messages, schema, options) {
1215
+ const jsonSystemInstruction = {
1216
+ role: "system",
1217
+ content: "You must respond with valid JSON only. No explanations, no markdown code fences, no extra text. Your entire response must be valid JSON that can be parsed with JSON.parse()."
1218
+ };
1219
+ const augmentedMessages = [jsonSystemInstruction, ...messages];
1220
+ const model = options?.model ?? config.model;
1221
+ const chatMessages = buildMessages2(augmentedMessages);
1222
+ const effectiveTimeoutMs = options?.timeoutMs ?? config.timeoutMs ?? 3e4;
1223
+ const start = Date.now();
1224
+ const extraParams = extractProviderOptions(options?.providerOptions);
1225
+ const rawResponse = await withRetry(async () => {
1226
+ const ctl = createAttemptController(options?.signal, effectiveTimeoutMs);
1227
+ try {
1228
+ const params = {
1229
+ model,
1230
+ messages: chatMessages,
1231
+ stream: false,
1232
+ ...extraParams
1233
+ };
1234
+ const maxTokens = options?.maxTokens ?? config.maxTokens;
1235
+ if (maxTokens !== void 0) params.max_tokens = maxTokens;
1236
+ const temperature = options?.temperature ?? config.temperature;
1237
+ if (temperature !== void 0) params.temperature = temperature;
1238
+ return await client.chat.completions.create(
1239
+ params,
1240
+ { signal: ctl.signal }
1241
+ );
1242
+ } catch (err) {
1243
+ throw normalizePerplexityError(classifyAbort(err, ctl.abortReason(), PROVIDER5));
1244
+ } finally {
1245
+ ctl.dispose();
1246
+ }
1247
+ }, mergeRetryOptsWithSignal(retryOpts, options?.signal));
1248
+ const rawContent = rawResponse.choices[0]?.message.content ?? "";
1249
+ let parsed;
1250
+ try {
1251
+ const cleaned = rawContent.replace(/<think>[\s\S]*?<\/think>/i, "").replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim();
1252
+ parsed = JSON.parse(cleaned);
1253
+ } catch (err) {
1254
+ throw new LlmError({
1255
+ message: `Perplexity structured output: response is not valid JSON. Raw: ${rawContent.slice(0, 200)}`,
1256
+ provider: PROVIDER5,
1257
+ retryable: false,
1258
+ cause: err
1259
+ });
1260
+ }
1261
+ let data;
1262
+ try {
1263
+ data = schema.parse(parsed);
1264
+ } catch (err) {
1265
+ throw new LlmError({
1266
+ message: `Perplexity structured output: response failed schema validation. ${String(err)}`,
1267
+ provider: PROVIDER5,
1268
+ retryable: false,
1269
+ cause: err
1270
+ });
1271
+ }
1272
+ return {
1273
+ data,
1274
+ usage: normalizeUsage5(rawResponse.usage),
1275
+ latencyMs: Date.now() - start
1276
+ };
1277
+ }
851
1278
  return {
852
- get config() {
853
- throw err;
854
- },
855
- complete: () => Promise.reject(err),
856
- stream: () => rejectingStream(err),
857
- structured: () => Promise.reject(err)
1279
+ config,
1280
+ complete,
1281
+ stream,
1282
+ structured
858
1283
  };
859
1284
  }
860
- function createPerplexityProvider(config) {
861
- void config;
862
- return notImplemented("perplexity");
863
- }
864
1285
 
865
1286
  // src/client.ts
866
1287
  function createClient(config) {