@diabolicallabs/llm-client 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,12 +1,12 @@
1
1
  # @diabolicallabs/llm-client
2
2
 
3
- Unified LLM API across Anthropic, OpenAI, Google Gemini, and DeepSeek. Single interface for completion, streaming, and structured output. All provider errors are normalized into a consistent `LlmError` shape. © Diabolical Labs
3
+ Unified LLM API across Anthropic, OpenAI, Google Gemini, DeepSeek, and Perplexity. Single interface for completion, streaming, and structured output. All provider errors are normalized into a consistent `LlmError` shape. © Diabolical Labs
4
4
 
5
5
  **Pre-1.0. APIs may change between minor versions.**
6
6
 
7
7
  ## Status
8
8
 
9
- **Published — v0.1.0.** All four providers are implemented. A fifth provider (Perplexity) is a stub and will be implemented in a future release.
9
+ **Published — v0.2.0.** All five providers are implemented. v0.3.0 adds per-call timeouts, caller AbortSignal, and stream stall detection.
10
10
 
11
11
  ## Install
12
12
 
@@ -55,9 +55,79 @@ const result = await client.structured(messages, schema);
55
55
  |---|---|---|
56
56
  | `anthropic` | Implemented | `ANTHROPIC_API_KEY` |
57
57
  | `openai` | Implemented | `OPENAI_API_KEY` |
58
- | `google` | Implemented | `GOOGLE_AI_API_KEY` |
58
+ | `gemini` | Implemented | `GOOGLE_AI_API_KEY` |
59
59
  | `deepseek` | Implemented | `DEEPSEEK_API_KEY` |
60
- | `perplexity` | Stub throws `LlmError` | — |
60
+ | `perplexity` | Implemented | `PERPLEXITY_API_KEY` |
61
+
62
+ ## Perplexity — web-grounded responses
63
+
64
+ The Perplexity provider returns real-time web-grounded answers with source citations. Use it via `createClient` or `createClientFromEnv`:
65
+
66
+ ```typescript
67
+ const client = createClientFromEnv('perplexity', 'sonar');
68
+ const response = await client.complete([
69
+ { role: 'user', content: 'What happened in AI this week?' },
70
+ ]);
71
+
72
+ // Citations are deduplicated by URL
73
+ console.log(response.citations);
74
+ // [{ url: 'https://example.com/article' }, { url: 'https://reuters.com/story' }]
75
+ ```
76
+
77
+ ### Citations
78
+
79
+ `LlmResponse.citations` is populated when Perplexity returns source URLs. It is `undefined` for all other providers.
80
+
81
+ ```typescript
82
+ interface LlmResponse {
83
+ content: string;
84
+ model: string;
85
+ usage: LlmUsage;
86
+ latencyMs: number;
87
+ citations?: Array<{
88
+ url: string;
89
+ title?: string; // Perplexity currently returns URLs only; title is always undefined
90
+ }>;
91
+ }
92
+ ```
93
+
94
+ Citations are deduplicated by URL within a single response. They are **not available in stream mode** — use `complete()` when you need citations.
95
+
96
+ ### Search filters via `providerOptions`
97
+
98
+ Perplexity supports search-specific parameters. Pass them via the `providerOptions` escape hatch on any call:
99
+
100
+ ```typescript
101
+ await client.complete(messages, {
102
+ providerOptions: {
103
+ search_recency_filter: 'week', // 'month' | 'week' | 'day' | 'hour'
104
+ search_domain_filter: ['nytimes.com', 'reuters.com'], // allowlist
105
+ },
106
+ });
107
+ ```
108
+
109
+ `providerOptions` is `Record<string, unknown>` — unknown fields are forwarded to the Perplexity API unchanged, so newly-released filters work without a toolkit update. Other providers ignore `providerOptions`.
110
+
111
+ ### Reasoning models
112
+
113
+ Pass reasoning model IDs as the `model` string:
114
+
115
+ ```typescript
116
+ const client = createClientFromEnv('perplexity', 'sonar-reasoning-pro');
117
+ ```
118
+
119
+ Available models (verified 2026-05-08):
120
+
121
+ | Model | Notes |
122
+ |---|---|
123
+ | `sonar` | Lightweight search model. Default. |
124
+ | `sonar-pro` | Advanced search, more citations. |
125
+ | `sonar-reasoning-pro` | Chain-of-thought reasoning. Replaces deprecated `sonar-reasoning`. |
126
+ | `sonar-deep-research` | Exhaustive research. Perplexity docs indicate async job support — treat as experimental with this toolkit. |
127
+
128
+ `structured()` with `sonar-reasoning-pro` works correctly — reasoning tokens (`<think>...</think>`) are stripped before JSON parsing.
129
+
130
+ `sonar-deep-research` is accepted as a model string. If Perplexity's API returns an incompatible async response shape, the call will throw a clear `LlmError`. In that case, use `sonar-reasoning-pro` instead, or wait for a future deep-research-specific brief.
61
131
 
62
132
  ## API
63
133
 
@@ -70,17 +140,114 @@ Creates an `LlmClient` for the given provider.
70
140
  Reads the API key from the environment automatically:
71
141
  - `anthropic` → `ANTHROPIC_API_KEY`
72
142
  - `openai` → `OPENAI_API_KEY`
73
- - `google` → `GOOGLE_AI_API_KEY`
143
+ - `gemini` → `GOOGLE_AI_API_KEY`
74
144
  - `deepseek` → `DEEPSEEK_API_KEY`
145
+ - `perplexity` → `PERPLEXITY_API_KEY`
75
146
 
76
147
  ### `LlmClient` interface
77
148
 
78
149
  | Method | Description |
79
150
  |---|---|
80
- | `complete(messages, options?)` | Non-streaming completion. Returns `LlmResponse`. |
81
- | `stream(messages, options?)` | Streaming — async generator of `LlmStreamChunk`. Final chunk includes `usage`. |
151
+ | `complete(messages, options?)` | Non-streaming completion. Returns `LlmResponse` (includes `citations` for Perplexity). |
152
+ | `stream(messages, options?)` | Streaming — async generator of `LlmStreamChunk`. Final chunk includes `usage`. Citations unavailable. |
82
153
  | `structured(messages, schema, options?)` | Structured output validated against a Zod schema. Returns `LlmStructuredResponse<T>`. |
83
154
 
155
+ All methods accept `LlmCallOptions` as the options parameter:
156
+
157
+ ```typescript
158
+ interface LlmCallOptions {
159
+ model?: string;
160
+ maxTokens?: number;
161
+ temperature?: number;
162
+ timeoutMs?: number; // Per-call timeout (ms). Overrides config.timeoutMs.
163
+ signal?: AbortSignal; // Caller-supplied cancel signal. Never retried.
164
+ streamStallTimeoutMs?: number; // Per-chunk silence timeout for stream(). Default 30000.
165
+ providerOptions?: Record<string, unknown>; // Perplexity search filters, etc.
166
+ }
167
+ ```
168
+
169
+ ## Cancellation, timeouts, stall detection
170
+
171
+ ### Per-call timeout override
172
+
173
+ The default timeout is set at client construction via `config.timeoutMs` (default 30 000 ms). Override it per-call:
174
+
175
+ ```typescript
176
+ const client = createClient({
177
+ provider: 'anthropic',
178
+ model: 'claude-sonnet-4-6',
179
+ apiKey: process.env.ANTHROPIC_API_KEY!,
180
+ timeoutMs: 30_000, // client default
181
+ });
182
+
183
+ // This call gets 90 seconds — useful for sonar-deep-research or long reasoning
184
+ const response = await client.complete(messages, { timeoutMs: 90_000 });
185
+ ```
186
+
187
+ On timeout, `LlmError.kind === 'timeout'` and `retryable === true`. Each retry attempt gets a fresh deadline — the timeout resets per attempt, not across the full retry sequence.
188
+
189
+ ### Caller AbortSignal
190
+
191
+ Pass any `AbortSignal` to cancel an in-flight call immediately:
192
+
193
+ ```typescript
194
+ const ac = new AbortController();
195
+
196
+ // Cancel on user navigation, request supersede, shutdown, etc.
197
+ const responsePromise = client.complete(messages, { signal: ac.signal });
198
+
199
+ // Cancel before the call returns
200
+ ac.abort('user navigated away');
201
+
202
+ try {
203
+ await responsePromise;
204
+ } catch (err) {
205
+ if (err instanceof LlmError && err.kind === 'cancelled') {
206
+ // Gracefully handle the cancellation
207
+ }
208
+ }
209
+ ```
210
+
211
+ - A signal already aborted at call time throws immediately — no SDK call is made, no retry.
212
+ - A mid-call abort propagates to the SDK (Anthropic, OpenAI, DeepSeek, Perplexity) or wins a `Promise.race` (Gemini). `kind === 'cancelled'`, `retryable === false`. Never retried.
213
+
214
+ ### Stream stall detection
215
+
216
+ A stream that emits a first chunk and then silently hangs will stall the consumer indefinitely without this feature. `streamStallTimeoutMs` fires a timer per chunk — if no chunk arrives within the window, the stream is aborted and a `kind: 'stream_stall'` error surfaces:
217
+
218
+ ```typescript
219
+ try {
220
+ for await (const chunk of client.stream(messages, { streamStallTimeoutMs: 10_000 })) {
221
+ process.stdout.write(chunk.token);
222
+ }
223
+ } catch (err) {
224
+ if (err instanceof LlmError && err.kind === 'stream_stall') {
225
+ console.error('stream stalled — retry or fallback');
226
+ }
227
+ }
228
+ ```
229
+
230
+ - Default `streamStallTimeoutMs`: 30 000 ms (set independently of `timeoutMs` — tolerant of reasoning-model think-pauses).
231
+ - The stall timer resets after each chunk arrives, so slow-but-not-stalled streams complete normally.
232
+ - Stall errors are **not retried** — partial output is unsafe to re-issue. The error surfaces to the caller.
233
+
234
+ ### `LlmError.kind` discriminator
235
+
236
+ ```typescript
237
+ type LlmErrorKind = 'cancelled' | 'timeout' | 'stream_stall' | 'http' | 'network' | 'unknown';
238
+
239
+ class LlmError extends Error {
240
+ readonly provider: string;
241
+ readonly statusCode?: number;
242
+ readonly retryable: boolean;
243
+ readonly kind: LlmErrorKind | undefined; // undefined on errors from older paths
244
+ }
245
+ ```
246
+
247
+ ### Gemini cancellation caveat
248
+
249
+ `@google/genai` does not accept a per-call `AbortSignal`. Cancellation uses `Promise.race` — when the internal controller aborts, we stop awaiting, but the SDK's HTTP request continues in the background until the SDK-level timeout fires. The SDK client is constructed with `httpOptions.timeout = configTimeoutMs * 2` as a backstop. This bounds the leaked request to at most 2× the configured timeout. Native signal support will be added when the SDK provides it.
250
+
84
251
  ## Error handling
85
252
 
86
253
  All provider errors are normalized into `LlmError`:
@@ -92,12 +259,12 @@ try {
92
259
  const response = await client.complete(messages);
93
260
  } catch (err) {
94
261
  if (err instanceof LlmError) {
95
- console.error(err.provider, err.statusCode, err.retryable);
262
+ console.error(err.provider, err.statusCode, err.retryable, err.kind);
96
263
  }
97
264
  }
98
265
  ```
99
266
 
100
- Retryable errors (429, 5xx, network failures) are retried automatically with exponential backoff and full jitter before throwing.
267
+ Retryable errors (429, 5xx, network failures, timeout) are retried automatically with exponential backoff and full jitter before throwing. Cancelled and stream-stall errors are never retried.
101
268
 
102
269
  ## Token normalization
103
270
 
package/dist/index.d.ts CHANGED
@@ -2,6 +2,17 @@
2
2
  * Core type definitions for @diabolicallabs/llm-client.
3
3
  * These are the stable public API surface — implementation is in Week 2.
4
4
  * Types here match the spec in briefs/brief-platform.md §4.1 exactly.
5
+ *
6
+ * Week 5 additions:
7
+ * LlmResponse.citations — populated by the Perplexity provider; undefined for all others.
8
+ * LlmCallOptions — per-call options type extracted for reuse; adds providerOptions escape hatch.
9
+ *
10
+ * Week 6 additions (v0.3.0 — abort/timeout/stall):
11
+ * LlmCallOptions.timeoutMs — per-call timeout override (ms); overrides config.timeoutMs.
12
+ * LlmCallOptions.signal — caller-supplied AbortSignal; aborts in-flight call.
13
+ * LlmCallOptions.streamStallTimeoutMs — per-stream stall detection (ms); default 30000.
14
+ * LlmClientConfig.streamStallTimeoutMs — config-level stall default.
15
+ * LlmError.kind — discriminator for error classification.
5
16
  */
6
17
  interface LlmMessage {
7
18
  role: 'system' | 'user' | 'assistant';
@@ -16,6 +27,12 @@ interface LlmClientConfig {
16
27
  maxTokens?: number;
17
28
  temperature?: number;
18
29
  timeoutMs?: number;
30
+ /**
31
+ * Default stall timeout for stream() calls (ms). Fires when no chunk is received
32
+ * for this duration. Independent of timeoutMs — tolerant of reasoning-model think-pauses.
33
+ * Default: 30000.
34
+ */
35
+ streamStallTimeoutMs?: number;
19
36
  }
20
37
  interface LlmUsage {
21
38
  inputTokens: number;
@@ -29,22 +46,76 @@ interface LlmResponse {
29
46
  model: string;
30
47
  usage: LlmUsage;
31
48
  latencyMs: number;
49
+ /**
50
+ * Web citations returned by the Perplexity provider.
51
+ * Populated only when the Perplexity API returns source references.
52
+ * Always undefined for Anthropic, OpenAI, Gemini, and DeepSeek.
53
+ * Deduplicated by URL within a single response.
54
+ */
55
+ citations?: Array<{
56
+ url: string;
57
+ title?: string;
58
+ }>;
59
+ }
60
+ /**
61
+ * Per-call options shared across complete(), stream(), and structured().
62
+ * Extends the standard model/maxTokens/temperature overrides with:
63
+ * timeoutMs — per-call timeout override; overrides config.timeoutMs for this call only.
64
+ * signal — caller-supplied AbortSignal; aborts the in-flight call immediately.
65
+ * A pre-aborted signal throws without making an SDK call (no retry).
66
+ * A mid-call abort throws kind:'cancelled', retryable:false (no retry).
67
+ * streamStallTimeoutMs — per-call stall detection for stream(); overrides config default.
68
+ * providerOptions — generic escape hatch for provider-specific parameters.
69
+ * The Perplexity provider reads search_domain_filter and
70
+ * search_recency_filter from this field; other providers ignore it.
71
+ * Unknown fields are passed through unchanged.
72
+ */
73
+ interface LlmCallOptions extends Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature' | 'timeoutMs'>> {
74
+ /** Caller-supplied AbortSignal. Cancels the in-flight call. Never retried. */
75
+ signal?: AbortSignal;
76
+ /**
77
+ * Per-call stall timeout for stream() in ms. Overrides config.streamStallTimeoutMs.
78
+ * Fires when no chunk arrives within this window. Default: config.streamStallTimeoutMs ?? 30000.
79
+ */
80
+ streamStallTimeoutMs?: number;
81
+ providerOptions?: Record<string, unknown>;
32
82
  }
33
83
  interface LlmStreamChunk {
34
84
  token: string;
35
85
  usage?: LlmUsage;
36
86
  }
87
+ /**
88
+ * Discriminator for LlmError — lets callers branch on error class without
89
+ * parsing message strings.
90
+ *
91
+ * cancelled — AbortSignal fired (caller-initiated). Never retried.
92
+ * timeout — Per-call timeoutMs deadline exceeded. Retried by withRetry.
93
+ * stream_stall — No chunk received within streamStallTimeoutMs. Not retried
94
+ * (partial stream output is unsafe to re-issue).
95
+ * http — Non-retryable HTTP error (4xx excluding 429).
96
+ * network — Retryable network-layer error (ECONNRESET, ETIMEDOUT, etc.).
97
+ * unknown — Unclassified error.
98
+ */
99
+ type LlmErrorKind = 'cancelled' | 'timeout' | 'stream_stall' | 'http' | 'network' | 'unknown';
37
100
  declare class LlmError extends Error {
38
101
  readonly name = "LlmError";
39
102
  readonly provider: string;
40
103
  readonly statusCode: number | undefined;
41
104
  readonly retryable: boolean;
105
+ /**
106
+ * Optional error kind discriminator. Present on errors produced by the abort/timeout/stall
107
+ * machinery (v0.3.0+). May be undefined on errors from providers that pre-date the kind field
108
+ * or on errors that fall through to the generic normalization path.
109
+ * Typed as LlmErrorKind | undefined to satisfy exactOptionalPropertyTypes.
110
+ */
111
+ readonly kind: LlmErrorKind | undefined;
42
112
  readonly cause: unknown;
43
113
  constructor(opts: {
44
114
  message: string;
45
115
  provider: string;
46
116
  statusCode?: number;
47
117
  retryable: boolean;
118
+ kind?: LlmErrorKind;
48
119
  cause?: unknown;
49
120
  });
50
121
  }
@@ -55,11 +126,11 @@ type LlmStructuredResponse<T> = {
55
126
  };
56
127
  interface LlmClient {
57
128
  readonly config: Readonly<LlmClientConfig>;
58
- complete(messages: LlmMessage[], options?: Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature'>>): Promise<LlmResponse>;
59
- stream(messages: LlmMessage[], options?: Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature'>>): AsyncGenerator<LlmStreamChunk>;
129
+ complete(messages: LlmMessage[], options?: LlmCallOptions): Promise<LlmResponse>;
130
+ stream(messages: LlmMessage[], options?: LlmCallOptions): AsyncGenerator<LlmStreamChunk>;
60
131
  structured<T>(messages: LlmMessage[], schema: {
61
132
  parse: (data: unknown) => T;
62
- }, options?: Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature'>>): Promise<LlmStructuredResponse<T>>;
133
+ }, options?: LlmCallOptions): Promise<LlmStructuredResponse<T>>;
63
134
  }
64
135
 
65
136
  /**
@@ -73,15 +144,13 @@ interface LlmClient {
73
144
  * 'openai' → fully implemented (Week 2)
74
145
  * 'gemini' → fully implemented (Week 3)
75
146
  * 'deepseek' → fully implemented (Week 3)
76
- * 'perplexity' → stub, throws "not yet implemented" (later week)
147
+ * 'perplexity' → fully implemented (Week 5) search-grounded, citations, providerOptions
77
148
  */
78
149
 
79
150
  /**
80
151
  * Create an LlmClient for the given provider and config.
81
152
  * Dispatches to the provider-specific implementation.
82
- *
83
- * Anthropic, OpenAI, Gemini, and DeepSeek are fully implemented.
84
- * Perplexity is a type-registered stub that throws "not yet implemented".
153
+ * All five providers are fully implemented.
85
154
  */
86
155
  declare function createClient(config: LlmClientConfig): LlmClient;
87
156
  /**
@@ -92,10 +161,10 @@ declare function createClient(config: LlmClientConfig): LlmClient;
92
161
  * openai → OPENAI_API_KEY
93
162
  * gemini → GOOGLE_AI_API_KEY
94
163
  * deepseek → DEEPSEEK_API_KEY
95
- * perplexity → PERPLEXITY_API_KEY
164
+ * perplexity → PERPLEXITY_API_KEY — recommended default model: 'sonar'
96
165
  *
97
166
  * Throws LlmError if the required env var is not set.
98
167
  */
99
168
  declare function createClientFromEnv(provider: LlmClientConfig['provider'], model: string, overrides?: Partial<Omit<LlmClientConfig, 'provider' | 'model' | 'apiKey'>>): LlmClient;
100
169
 
101
- export { type LlmClient, type LlmClientConfig, LlmError, type LlmMessage, type LlmResponse, type LlmStreamChunk, type LlmStructuredResponse, type LlmUsage, createClient, createClientFromEnv };
170
+ export { type LlmCallOptions, type LlmClient, type LlmClientConfig, LlmError, type LlmMessage, type LlmResponse, type LlmStreamChunk, type LlmStructuredResponse, type LlmUsage, createClient, createClientFromEnv };