@diabolicallabs/llm-client 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,7 +6,7 @@ Unified LLM API across Anthropic, OpenAI, Google Gemini, DeepSeek, and Perplexit
6
6
 
7
7
  ## Status
8
8
 
9
- **Published — v0.2.0.** All five providers are implemented. Perplexity adds web-grounded responses with citation extraction and search filters.
9
+ **Published — v0.3.0.** All five providers implemented. v0.4.0 adds native strict structured outputs (OpenAI json_schema, Anthropic tool-use, Gemini responseSchema) triggered automatically when a Zod 4 schema is passed.
10
10
 
11
11
  ## Install
12
12
 
@@ -42,13 +42,74 @@ for await (const chunk of client.stream([{ role: 'user', content: 'Hello' }])) {
42
42
  process.stdout.write(chunk.token);
43
43
  }
44
44
 
45
- // Structured output (Zod schema)
45
+ // Structured output Zod 4 schema triggers strict native mode automatically
46
46
  import { z } from 'zod';
47
47
  const schema = z.object({ name: z.string(), score: z.number() });
48
48
  const result = await client.structured(messages, schema);
49
49
  // result.data is typed as { name: string; score: number }
50
+ // result.model and result.id are populated (v0.4.0+)
50
51
  ```
51
52
 
53
+ ## Strict structured outputs (v0.4.0)
54
+
55
+ Pass a **Zod 4** schema to `structured()` and the toolkit automatically routes to the strictest native path available for each provider. No opt-in flag required.
56
+
57
+ ```typescript
58
+ import { z } from 'zod';
59
+ const schema = z.object({
60
+ topic: z.string(),
61
+ bullets: z.array(z.string()),
62
+ });
63
+
64
+ const result = await client.structured(messages, schema);
65
+ // result.data — typed and Zod-validated
66
+ // result.model — model ID used (always present, v0.4.0+)
67
+ // result.id — provider request ID for tracing (OpenAI + Anthropic)
68
+ // result.citations — Perplexity citations if any
69
+ ```
70
+
71
+ ### How detection works
72
+
73
+ The toolkit checks for Zod 4's internal `_zod` marker at runtime. If the schema is a Zod 4 instance, it converts to JSON Schema using Zod 4's built-in `z.toJSONSchema()` and routes to the native path. If the schema is anything else (plain `{ parse }` object, Zod 3, etc.), it falls back to the v0.3.0 system-prompt path.
74
+
75
+ ### Schema-feature support matrix
76
+
77
+ | Provider | Native mode | What's enforced | Known limits |
78
+ |---|---|---|---|
79
+ | OpenAI (`gpt-5.x`) | `response_format: { type: 'json_schema', strict: true }` | Schema structure guaranteed; model cannot produce off-schema output | No `format`, `pattern`, or recursive schemas (`z.lazy()`). Throws at conversion time with clear message. |
80
+ | Anthropic | Tool-use with forced `tool_choice: { type: 'tool', name: 'extract' }` | Model must call the tool; `input` is pre-parsed JSON | Defense-in-depth `schema.parse()` still runs |
81
+ | Gemini | `responseSchema` (OpenAPI 3.0) + `responseMimeType: 'application/json'` | Schema communicated to the model; belt-and-braces fence-strip retained | Tested via mocks only — file an issue if Gemini's API rejects the schema shape |
82
+ | DeepSeek | None (prompt-only, API limitation) | System-prompt nudge + schema.parse() | Same as v0.3.0 |
83
+ | Perplexity | None (prompt-only, API limitation) | System-prompt nudge + `<think>` strip + schema.parse() | Same as v0.3.0; `citations` propagated to structured response |
84
+
85
+ ### Prompt-mode escape hatch
86
+
87
+ If your schema uses a feature unsupported in strict mode (e.g. `z.function()`, `z.lazy()`) and you need to keep using it, pass the escape hatch:
88
+
89
+ ```typescript
90
+ const result = await client.structured(messages, schema, {
91
+ providerOptions: { structuredMode: 'prompt' },
92
+ });
93
+ // Forces the v0.3.0 prompt-only path regardless of schema type
94
+ ```
95
+
96
+ Alternatively, catch the `LlmError` thrown during schema conversion and inform the user:
97
+
98
+ ```typescript
99
+ try {
100
+ const result = await client.structured(messages, schema);
101
+ } catch (err) {
102
+ if (err instanceof LlmError && err.kind === 'unknown') {
103
+ // Schema contains an unrepresentable feature — message names it
104
+ console.error(err.message);
105
+ }
106
+ }
107
+ ```
108
+
109
+ ### Zod 3 schemas
110
+
111
+ If a Zod 3 schema is passed, the toolkit throws `LlmError` with a clear "upgrade to Zod 4" message rather than silently falling through to prompt mode. Pass `providerOptions.structuredMode = 'prompt'` if you cannot upgrade immediately.
112
+
52
113
  ## Provider universe
53
114
 
54
115
  | Provider | Status | Env var |
@@ -159,10 +220,95 @@ interface LlmCallOptions {
159
220
  model?: string;
160
221
  maxTokens?: number;
161
222
  temperature?: number;
223
+ timeoutMs?: number; // Per-call timeout (ms). Overrides config.timeoutMs.
224
+ signal?: AbortSignal; // Caller-supplied cancel signal. Never retried.
225
+ streamStallTimeoutMs?: number; // Per-chunk silence timeout for stream(). Default 30000.
162
226
  providerOptions?: Record<string, unknown>; // Perplexity search filters, etc.
163
227
  }
164
228
  ```
165
229
 
230
+ ## Cancellation, timeouts, stall detection
231
+
232
+ ### Per-call timeout override
233
+
234
+ The default timeout is set at client construction via `config.timeoutMs` (default 30 000 ms). Override it per-call:
235
+
236
+ ```typescript
237
+ const client = createClient({
238
+ provider: 'anthropic',
239
+ model: 'claude-sonnet-4-6',
240
+ apiKey: process.env.ANTHROPIC_API_KEY!,
241
+ timeoutMs: 30_000, // client default
242
+ });
243
+
244
+ // This call gets 90 seconds — useful for sonar-deep-research or long reasoning
245
+ const response = await client.complete(messages, { timeoutMs: 90_000 });
246
+ ```
247
+
248
+ On timeout, `LlmError.kind === 'timeout'` and `retryable === true`. Each retry attempt gets a fresh deadline — the timeout resets per attempt, not across the full retry sequence.
249
+
250
+ ### Caller AbortSignal
251
+
252
+ Pass any `AbortSignal` to cancel an in-flight call immediately:
253
+
254
+ ```typescript
255
+ const ac = new AbortController();
256
+
257
+ // Cancel on user navigation, request supersede, shutdown, etc.
258
+ const responsePromise = client.complete(messages, { signal: ac.signal });
259
+
260
+ // Cancel before the call returns
261
+ ac.abort('user navigated away');
262
+
263
+ try {
264
+ await responsePromise;
265
+ } catch (err) {
266
+ if (err instanceof LlmError && err.kind === 'cancelled') {
267
+ // Gracefully handle the cancellation
268
+ }
269
+ }
270
+ ```
271
+
272
+ - A signal already aborted at call time throws immediately — no SDK call is made, no retry.
273
+ - A mid-call abort propagates to the SDK (Anthropic, OpenAI, DeepSeek, Perplexity) or wins a `Promise.race` (Gemini). `kind === 'cancelled'`, `retryable === false`. Never retried.
274
+
275
+ ### Stream stall detection
276
+
277
+ A stream that emits a first chunk and then silently hangs will stall the consumer indefinitely without this feature. `streamStallTimeoutMs` fires a timer per chunk — if no chunk arrives within the window, the stream is aborted and a `kind: 'stream_stall'` error surfaces:
278
+
279
+ ```typescript
280
+ try {
281
+ for await (const chunk of client.stream(messages, { streamStallTimeoutMs: 10_000 })) {
282
+ process.stdout.write(chunk.token);
283
+ }
284
+ } catch (err) {
285
+ if (err instanceof LlmError && err.kind === 'stream_stall') {
286
+ console.error('stream stalled — retry or fallback');
287
+ }
288
+ }
289
+ ```
290
+
291
+ - Default `streamStallTimeoutMs`: 30 000 ms (set independently of `timeoutMs` — tolerant of reasoning-model think-pauses).
292
+ - The stall timer resets after each chunk arrives, so slow-but-not-stalled streams complete normally.
293
+ - Stall errors are **not retried** — partial output is unsafe to re-issue. The error surfaces to the caller.
294
+
295
+ ### `LlmError.kind` discriminator
296
+
297
+ ```typescript
298
+ type LlmErrorKind = 'cancelled' | 'timeout' | 'stream_stall' | 'http' | 'network' | 'unknown';
299
+
300
+ class LlmError extends Error {
301
+ readonly provider: string;
302
+ readonly statusCode?: number;
303
+ readonly retryable: boolean;
304
+ readonly kind: LlmErrorKind | undefined; // undefined on errors from older paths
305
+ }
306
+ ```
307
+
308
+ ### Gemini cancellation caveat
309
+
310
+ `@google/genai` does not accept a per-call `AbortSignal`. Cancellation uses `Promise.race` — when the internal controller aborts, we stop awaiting, but the SDK's HTTP request continues in the background until the SDK-level timeout fires. The SDK client is constructed with `httpOptions.timeout = configTimeoutMs * 2` as a backstop. This bounds the leaked request to at most 2× the configured timeout. Native signal support will be added when the SDK provides it.
311
+
166
312
  ## Error handling
167
313
 
168
314
  All provider errors are normalized into `LlmError`:
@@ -174,12 +320,12 @@ try {
174
320
  const response = await client.complete(messages);
175
321
  } catch (err) {
176
322
  if (err instanceof LlmError) {
177
- console.error(err.provider, err.statusCode, err.retryable);
323
+ console.error(err.provider, err.statusCode, err.retryable, err.kind);
178
324
  }
179
325
  }
180
326
  ```
181
327
 
182
- Retryable errors (429, 5xx, network failures) are retried automatically with exponential backoff and full jitter before throwing.
328
+ Retryable errors (429, 5xx, network failures, timeout) are retried automatically with exponential backoff and full jitter before throwing. Cancelled and stream-stall errors are never retried.
183
329
 
184
330
  ## Token normalization
185
331
 
package/dist/index.d.ts CHANGED
@@ -6,6 +6,19 @@
6
6
  * Week 5 additions:
7
7
  * LlmResponse.citations — populated by the Perplexity provider; undefined for all others.
8
8
  * LlmCallOptions — per-call options type extracted for reuse; adds providerOptions escape hatch.
9
+ *
10
+ * Week 6 additions (v0.3.0 — abort/timeout/stall):
11
+ * LlmCallOptions.timeoutMs — per-call timeout override (ms); overrides config.timeoutMs.
12
+ * LlmCallOptions.signal — caller-supplied AbortSignal; aborts in-flight call.
13
+ * LlmCallOptions.streamStallTimeoutMs — per-stream stall detection (ms); default 30000.
14
+ * LlmClientConfig.streamStallTimeoutMs — config-level stall default.
15
+ * LlmError.kind — discriminator for error classification.
16
+ *
17
+ * v0.4.0 additions (strict structured outputs):
18
+ * LlmStructuredResponse.model — model ID actually used (always populated).
19
+ * LlmStructuredResponse.id — provider request ID where available (debugging).
20
+ * LlmStructuredResponse.citations — web citations from Perplexity structured responses.
21
+ * LlmClient.structured JSDoc — Zod 4 trigger and structuredMode escape hatch.
9
22
  */
10
23
  interface LlmMessage {
11
24
  role: 'system' | 'user' | 'assistant';
@@ -20,6 +33,12 @@ interface LlmClientConfig {
20
33
  maxTokens?: number;
21
34
  temperature?: number;
22
35
  timeoutMs?: number;
36
+ /**
37
+ * Default stall timeout for stream() calls (ms). Fires when no chunk is received
38
+ * for this duration. Independent of timeoutMs — tolerant of reasoning-model think-pauses.
39
+ * Default: 30000.
40
+ */
41
+ streamStallTimeoutMs?: number;
23
42
  }
24
43
  interface LlmUsage {
25
44
  inputTokens: number;
@@ -47,41 +66,116 @@ interface LlmResponse {
47
66
  /**
48
67
  * Per-call options shared across complete(), stream(), and structured().
49
68
  * Extends the standard model/maxTokens/temperature overrides with:
50
- * providerOptions generic escape hatch for provider-specific parameters.
51
- * The Perplexity provider reads search_domain_filter and
52
- * search_recency_filter from this field; other providers ignore it.
53
- * Unknown fields are passed through unchanged.
69
+ * timeoutMs per-call timeout override; overrides config.timeoutMs for this call only.
70
+ * signal — caller-supplied AbortSignal; aborts the in-flight call immediately.
71
+ * A pre-aborted signal throws without making an SDK call (no retry).
72
+ * A mid-call abort throws kind:'cancelled', retryable:false (no retry).
73
+ * streamStallTimeoutMs — per-call stall detection for stream(); overrides config default.
74
+ * providerOptions — generic escape hatch for provider-specific parameters.
75
+ * The Perplexity provider reads search_domain_filter and
76
+ * search_recency_filter from this field; other providers ignore it.
77
+ * Unknown fields are passed through unchanged.
54
78
  */
55
- interface LlmCallOptions extends Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature'>> {
79
+ interface LlmCallOptions extends Partial<Pick<LlmClientConfig, 'model' | 'maxTokens' | 'temperature' | 'timeoutMs'>> {
80
+ /** Caller-supplied AbortSignal. Cancels the in-flight call. Never retried. */
81
+ signal?: AbortSignal;
82
+ /**
83
+ * Per-call stall timeout for stream() in ms. Overrides config.streamStallTimeoutMs.
84
+ * Fires when no chunk arrives within this window. Default: config.streamStallTimeoutMs ?? 30000.
85
+ */
86
+ streamStallTimeoutMs?: number;
56
87
  providerOptions?: Record<string, unknown>;
57
88
  }
58
89
  interface LlmStreamChunk {
59
90
  token: string;
60
91
  usage?: LlmUsage;
61
92
  }
93
+ /**
94
+ * Discriminator for LlmError — lets callers branch on error class without
95
+ * parsing message strings.
96
+ *
97
+ * cancelled — AbortSignal fired (caller-initiated). Never retried.
98
+ * timeout — Per-call timeoutMs deadline exceeded. Retried by withRetry.
99
+ * stream_stall — No chunk received within streamStallTimeoutMs. Not retried
100
+ * (partial stream output is unsafe to re-issue).
101
+ * http — Non-retryable HTTP error (4xx excluding 429).
102
+ * network — Retryable network-layer error (ECONNRESET, ETIMEDOUT, etc.).
103
+ * unknown — Unclassified error.
104
+ */
105
+ type LlmErrorKind = 'cancelled' | 'timeout' | 'stream_stall' | 'http' | 'network' | 'unknown';
62
106
  declare class LlmError extends Error {
63
107
  readonly name = "LlmError";
64
108
  readonly provider: string;
65
109
  readonly statusCode: number | undefined;
66
110
  readonly retryable: boolean;
111
+ /**
112
+ * Optional error kind discriminator. Present on errors produced by the abort/timeout/stall
113
+ * machinery (v0.3.0+). May be undefined on errors from providers that pre-date the kind field
114
+ * or on errors that fall through to the generic normalization path.
115
+ * Typed as LlmErrorKind | undefined to satisfy exactOptionalPropertyTypes.
116
+ */
117
+ readonly kind: LlmErrorKind | undefined;
67
118
  readonly cause: unknown;
68
119
  constructor(opts: {
69
120
  message: string;
70
121
  provider: string;
71
122
  statusCode?: number;
72
123
  retryable: boolean;
124
+ kind?: LlmErrorKind;
73
125
  cause?: unknown;
74
126
  });
75
127
  }
128
+ /**
129
+ * Structured output response.
130
+ *
131
+ * v0.4.0 — additive fields:
132
+ * model — model ID reported by the provider (always present).
133
+ * id — provider request / message ID for tracing and debugging.
134
+ * Populated by OpenAI (response.id) and Anthropic (response.id).
135
+ * Undefined for Gemini, DeepSeek, and Perplexity.
136
+ * citations — web citations propagated from Perplexity structured calls.
137
+ * Undefined for all other providers.
138
+ */
76
139
  type LlmStructuredResponse<T> = {
77
140
  data: T;
141
+ model: string;
142
+ id?: string;
78
143
  usage: LlmUsage;
79
144
  latencyMs: number;
145
+ citations?: Array<{
146
+ url: string;
147
+ title?: string;
148
+ }>;
80
149
  };
81
150
  interface LlmClient {
82
151
  readonly config: Readonly<LlmClientConfig>;
83
152
  complete(messages: LlmMessage[], options?: LlmCallOptions): Promise<LlmResponse>;
84
153
  stream(messages: LlmMessage[], options?: LlmCallOptions): AsyncGenerator<LlmStreamChunk>;
154
+ /**
155
+ * Structured output — parses and validates the response against a schema.
156
+ *
157
+ * **Strict native mode (v0.4.0+):**
158
+ * Pass a Zod 4 schema to automatically opt into the provider's strictest native
159
+ * structured-output path:
160
+ * - OpenAI: `response_format: { type: 'json_schema', strict: true }` (gpt-5.x family)
161
+ * - Anthropic: forced tool-use with `tool_choice: { type: 'tool', name: 'extract' }`
162
+ * - Gemini: `responseSchema` populated in GenerateContentConfig
163
+ *
164
+ * **Prompt-only fallback:**
165
+ * If the schema is not a Zod 4 instance, or if
166
+ * `options.providerOptions.structuredMode === 'prompt'` is set, the v0.3.0
167
+ * system-prompt + parse path is used instead. This is the escape hatch for:
168
+ * - Zod 4 schemas that use unrepresentable features (z.function(), z.lazy(), etc.)
169
+ * - Non-Zod schema objects that satisfy the narrow `{ parse }` interface
170
+ * - DeepSeek and Perplexity (no native schema mode — always prompt-only)
171
+ *
172
+ * **Defense-in-depth:** schema.parse() is called on the parsed result even
173
+ * after a native strict-mode call, to catch truncation or partial outputs.
174
+ *
175
+ * @param schema - A Zod 4 schema (triggers strict mode) or any `{ parse }` interface.
176
+ * Using a narrower interface than ZodType avoids a hard zod dependency at
177
+ * the types level.
178
+ */
85
179
  structured<T>(messages: LlmMessage[], schema: {
86
180
  parse: (data: unknown) => T;
87
181
  }, options?: LlmCallOptions): Promise<LlmStructuredResponse<T>>;