extrait 0.5.6 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # extrait
2
2
 
3
- Structured JSON extraction from LLMs with validation, repair, and streaming.
3
+ High-level LLM text generation and structured JSON extraction with validation, repair, and streaming.
4
4
 
5
5
  <p align="left">
6
6
  <a href="https://www.npmjs.com/package/extrait">
@@ -68,6 +68,7 @@ console.log(result.data);
68
68
  These examples cover the most common usage patterns in the repository.
69
69
 
70
70
  - [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
71
+ - [`examples/generate.ts`](examples/generate.ts) - High-level text generation
71
72
  - [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
72
73
  - [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
73
74
  - [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
@@ -76,6 +77,7 @@ These examples cover the most common usage patterns in the repository.
76
77
 
77
78
  ```bash
78
79
  bun run dev simple "Bun.js runtime"
80
+ bun run dev generate "Bun.js runtime"
79
81
  bun run dev streaming
80
82
  bun run dev calculator-tool
81
83
  ```
@@ -107,6 +109,8 @@ const llm = createLLM({
107
109
  mode: "loose" | "strict", // loose allows repair
108
110
  selfHeal: 1, // optional retry attempts
109
111
  debug: false, // optional structured debug output
112
+ // or:
113
+ // debug: { enabled: true, verbose: true },
110
114
  systemPrompt: "You are a helpful assistant.",
111
115
  timeout: {
112
116
  request: 30_000,
@@ -219,7 +223,17 @@ const result = await llm.structured(
219
223
  stream: {
220
224
  to: "stdout",
221
225
  onData: (event) => {
222
- console.log("Partial data:", event.data);
226
+ if (event.delta.text) {
227
+ console.log("New visible text:", event.delta.text);
228
+ }
229
+ if (event.delta.reasoning) {
230
+ console.log("New reasoning text:", event.delta.reasoning);
231
+ }
232
+
233
+ console.log("Current visible text:", event.snapshot.text);
234
+ console.log("Current reasoning:", event.snapshot.reasoning);
235
+ console.log("Current structured snapshot:", event.snapshot.data);
236
+
223
237
  if (event.done) {
224
238
  console.log("Streaming done.");
225
239
  }
@@ -227,6 +241,7 @@ const result = await llm.structured(
227
241
  },
228
242
  request: {
229
243
  signal: AbortSignal.timeout(30_000), // optional AbortSignal
244
+ reasoningEffort: "medium", // optional reasoning effort hint
230
245
  },
231
246
  timeout: {
232
247
  request: 30_000, // ms per LLM HTTP request
@@ -238,6 +253,21 @@ const result = await llm.structured(
238
253
 
239
254
  `prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
240
255
 
256
+ In `stream.onData`, the event is split into two layers:
257
+
258
+ - `event.delta.text` is only the newly received visible text since the previous event.
259
+ - `event.delta.reasoning` is only the newly received reasoning text since the previous event.
260
+ - `event.snapshot.text` is the full visible text accumulated so far.
261
+ - `event.snapshot.reasoning` is the full normalized reasoning accumulated so far.
262
+ - `event.snapshot.data` is the best structured JSON snapshot that can be parsed from the stream so far. It may stay unchanged while `event.delta.text` continues to grow.
263
+
264
+ Typical usage is:
265
+
266
+ - render `event.delta.text` directly to a terminal or chat UI
267
+ - optionally render `event.delta.reasoning` in a separate reasoning panel
268
+ - use `event.snapshot.data` to drive partial structured UI state
269
+ - use `event.snapshot.text` / `event.snapshot.reasoning` when you need the full accumulated state instead of only the latest increment
270
+
241
271
  You can also pass provider request options through `request`:
242
272
 
243
273
  ```typescript
@@ -254,6 +284,89 @@ const result = await llm.structured(
254
284
  );
255
285
  ```
256
286
 
287
+ ### Making Text Calls
288
+
289
+ `generate()` is the high-level API for non-structured generation. It accepts the same prompt shapes as `structured()`, but does not inject any schema or parse the output.
290
+
291
+ ```typescript
292
+ // Simple prompt
293
+ const result = await llm.generate(
294
+ prompt`Write a short summary of ${topic}.`
295
+ );
296
+
297
+ // Multi-message prompt
298
+ const result = await llm.generate(
299
+ prompt()
300
+ .system`You are a concise assistant.`
301
+ .user`Summarize: """${text}"""`
302
+ );
303
+
304
+ // Raw messages payload
305
+ const result = await llm.generate({
306
+ prompt: {
307
+ messages: [
308
+ { role: "user", content: "Say hello in one sentence." },
309
+ ],
310
+ },
311
+ });
312
+ ```
313
+
314
+ Streaming mirrors `structured()`, except the snapshot only contains `text` and `reasoning`:
315
+
316
+ ```typescript
317
+ const result = await llm.generate(
318
+ prompt`Explain ${topic} in one short paragraph.`,
319
+ {
320
+ stream: {
321
+ enabled: true,
322
+ onData: (event) => {
323
+ process.stdout.write(event.delta.text);
324
+
325
+ console.log("Full text so far:", event.snapshot.text);
326
+ console.log("Full reasoning so far:", event.snapshot.reasoning);
327
+
328
+ if (event.done) {
329
+ console.log("Streaming done.");
330
+ }
331
+ },
332
+ },
333
+ }
334
+ );
335
+ ```
336
+
337
+ Provider request options and MCP tools still go through `request`:
338
+
339
+ ```typescript
340
+ const result = await llm.generate(
341
+ prompt`Use tools if needed and answer the user clearly.`,
342
+ {
343
+ request: {
344
+ temperature: 0,
345
+ maxTokens: 800,
346
+ reasoningEffort: "medium",
347
+ mcpClients: [calculatorMCP],
348
+ maxToolRounds: 8,
349
+ },
350
+ }
351
+ );
352
+ ```
353
+
354
+ On `openai-compatible`, this is sent as `reasoning_effort`, with `max` mapped to `xhigh`. On `anthropic-compatible`, this is sent as `output_config.effort` and auto-enables `thinking: { type: "adaptive" }`.
355
+
356
+ For existing history or multi-turn conversations, pass `messages` directly:
357
+
358
+ ```typescript
359
+ const messages = conversation("You are a helpful assistant.", [
360
+ { role: "user", text: "What is the speed of light?" },
361
+ { role: "assistant", text: "Approximately 299,792 km/s in a vacuum." },
362
+ { role: "user", text: "How long does light take to reach Earth from the Sun?" },
363
+ ]);
364
+
365
+ const result = await llm.generate({ prompt: { messages } });
366
+ ```
367
+
368
+ Use `llm.adapter.complete(...)` or `llm.adapter.stream(...)` only when you need the raw low-level provider interface.
369
+
257
370
  ### Images (multimodal)
258
371
 
259
372
  Use `images()` to build base64 image content blocks for vision-capable models.
@@ -310,8 +423,8 @@ const messages = conversation("You are a helpful assistant.", [
310
423
  { role: "user", text: "How long does light take to reach Earth from the Sun?" },
311
424
  ]);
312
425
 
313
- // Pass to adapter directly
314
- const response = await llm.adapter.complete({ messages });
426
+ // High-level text generation
427
+ const response = await llm.generate({ prompt: { messages } });
315
428
 
316
429
  // Or to structured extraction
317
430
  const result = await llm.structured(Schema, { messages });
@@ -331,13 +444,43 @@ const messages = conversation("You are a vision assistant.", [
331
444
 
332
445
  ### Result Object
333
446
 
334
- Successful structured calls return validated data plus the raw response and trace metadata.
447
+ Successful `generate()` calls return normalized text/reasoning plus request metadata:
448
+
449
+ ```typescript
450
+ {
451
+ text: string,
452
+ reasoning: string,
453
+ attempts: GenerateAttempt[],
454
+ usage?: {
455
+ inputTokens?: number,
456
+ outputTokens?: number,
457
+ totalTokens?: number,
458
+ cost?: number,
459
+ },
460
+ finishReason?: string,
461
+ }
462
+ ```
463
+
464
+ Each `attempts` entry includes:
465
+
466
+ ```typescript
467
+ {
468
+ attempt: number,
469
+ via: "complete" | "stream",
470
+ text: string,
471
+ reasoning: string,
472
+ usage?: LLMUsage,
473
+ finishReason?: string,
474
+ }
475
+ ```
476
+
477
+ Successful `structured()` calls return validated data plus normalized text/reasoning and trace metadata.
335
478
 
336
479
  ```typescript
337
480
  {
338
481
  data: T, // Validated data matching schema
339
- raw: string, // Raw LLM response
340
- thinkBlocks: ThinkBlock[], // Extracted <think> blocks
482
+ text: string, // Visible model text, without inline <think> blocks
483
+ reasoning: string, // Normalized reasoning across dedicated fields and inline <think>
341
484
  json: unknown | null, // Parsed JSON before validation
342
485
  attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
343
486
  usage?: {
@@ -357,8 +500,8 @@ Each `attempts` entry includes:
357
500
  attempt: number,
358
501
  selfHeal: boolean,
359
502
  via: "complete" | "stream",
360
- raw: string,
361
- thinkBlocks: ThinkBlock[],
503
+ text: string,
504
+ reasoning: string,
362
505
  json: unknown | null,
363
506
  candidates: string[],
364
507
  repairLog: string[],
@@ -370,6 +513,8 @@ Each `attempts` entry includes:
370
513
  }
371
514
  ```
372
515
 
516
+ Legacy inline `<think>...</think>` blocks are still supported, but the high-level `structured()` API now folds them into `reasoning` internally instead of exposing block metadata.
517
+
373
518
  ### Error Handling
374
519
 
375
520
  Catch `StructuredParseError` when repair and validation still fail.
@@ -547,6 +692,7 @@ const llm = createLLM({
547
692
  Run repository examples with `bun run dev <example-name>`.
548
693
 
549
694
  Available examples:
695
+ - `generate` - High-level text generation ([generate.ts](examples/generate.ts))
550
696
  - `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
551
697
  - `streaming-with-tools` - Real text streaming with MCP tools + self-check ([streaming-with-tools.ts](examples/streaming-with-tools.ts))
552
698
  - `abort-signal` - Start a generation then cancel quickly with `AbortSignal` ([abort-signal.ts](examples/abort-signal.ts))
@@ -563,6 +709,7 @@ Available examples:
563
709
 
564
710
  Pass arguments after the example name:
565
711
  ```bash
712
+ bun run dev generate "Why Bun is fast"
566
713
  bun run dev streaming
567
714
  bun run dev streaming-with-tools
568
715
  bun run dev abort-signal 120 "JSON cancellation demo"
@@ -582,6 +729,9 @@ These environment variables are used across the examples and common client setup
582
729
  - `LLM_MODEL` - Model name (default: `gpt-5-nano`)
583
730
  - `LLM_API_KEY` - API key for the provider
584
731
  - `STRUCTURED_DEBUG=1` - Enable debug output
732
+ By default, structured debug prints `text` (public visible output) and
733
+ `reasoning` (normalized reasoning). `parseSource` (the internal source used by
734
+ parsing and self-heal) is only printed when `debug.verbose` is enabled.
585
735
 
586
736
  ## Testing
587
737
 
@@ -0,0 +1,79 @@
1
+ import type { LLMAdapter, LLMMessage, LLMRequest, LLMUsage, MCPToolClient, StructuredDebugOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptValue, StructuredTimeoutOptions, ThinkBlock } from "./types";
2
+ export type PromptRequestOptions = Omit<LLMRequest, "prompt" | "systemPrompt" | "messages">;
3
+ export interface StreamDelta {
4
+ text: string;
5
+ reasoning: string;
6
+ }
7
+ export interface NormalizedStreamConfig<TSnapshot> {
8
+ enabled: boolean;
9
+ onData?: (event: {
10
+ delta: StreamDelta;
11
+ snapshot: TSnapshot;
12
+ done: boolean;
13
+ usage?: LLMUsage;
14
+ finishReason?: string;
15
+ }) => void;
16
+ to?: "stdout";
17
+ }
18
+ export interface NormalizedDebugConfig {
19
+ enabled: boolean;
20
+ colors: boolean;
21
+ verbose: boolean;
22
+ logger: (line: string) => void;
23
+ }
24
+ export interface NormalizedModelOutput {
25
+ text: string;
26
+ reasoning: string;
27
+ thinkBlocks: ThinkBlock[];
28
+ parseSource: string;
29
+ }
30
+ export interface ModelCallOptions<TSnapshot, TTraceEvent> {
31
+ prompt?: string;
32
+ messages?: LLMMessage[];
33
+ systemPrompt?: string;
34
+ request?: PromptRequestOptions;
35
+ stream: NormalizedStreamConfig<TSnapshot>;
36
+ observe?: (event: TTraceEvent) => void;
37
+ buildEvent: (input: {
38
+ stage: "llm.request" | "llm.response" | "llm.stream.delta" | "llm.stream.data";
39
+ message: string;
40
+ details?: unknown;
41
+ }) => TTraceEvent;
42
+ buildSnapshot: (input: NormalizedModelOutput) => TSnapshot;
43
+ debug: NormalizedDebugConfig;
44
+ debugLabel: string;
45
+ attempt: number;
46
+ selfHeal: boolean;
47
+ selfHealEnabled: boolean;
48
+ timeout?: StructuredTimeoutOptions;
49
+ }
50
+ export interface ModelCallResult {
51
+ text: string;
52
+ reasoning: string;
53
+ thinkBlocks: ThinkBlock[];
54
+ parseSource: string;
55
+ via: "complete" | "stream";
56
+ usage?: LLMUsage;
57
+ finishReason?: string;
58
+ }
59
+ export declare function resolvePrompt(prompt: StructuredPromptBuilder, context: StructuredPromptContext): StructuredPromptPayload;
60
+ export declare function normalizePromptValue(value: StructuredPromptValue, _context: StructuredPromptContext): StructuredPromptPayload;
61
+ export declare function normalizePromptPayload(value: StructuredPromptPayload): StructuredPromptPayload;
62
+ export declare function applyPromptOutdent(payload: StructuredPromptPayload, enabled: boolean): StructuredPromptPayload;
63
+ export declare function applyOutdentToOptionalPrompt(value: string | undefined, enabled: boolean): string | undefined;
64
+ export declare function mergeSystemPrompts(primary?: string, secondary?: string): string | undefined;
65
+ export declare function normalizeStreamConfig<TSnapshot>(option: boolean | {
66
+ enabled?: boolean;
67
+ onData?: NormalizedStreamConfig<TSnapshot>["onData"];
68
+ to?: "stdout";
69
+ } | undefined): NormalizedStreamConfig<TSnapshot>;
70
+ export declare function normalizeDebugConfig(option: StructuredDebugOptions | boolean | undefined): NormalizedDebugConfig;
71
+ export declare function withToolTimeout(client: MCPToolClient, toolTimeoutMs: number): MCPToolClient;
72
+ export declare function applyToolTimeout(clients: MCPToolClient[], toolTimeoutMs: number): MCPToolClient[];
73
+ export declare function callModel<TSnapshot, TTraceEvent>(adapter: LLMAdapter, options: ModelCallOptions<TSnapshot, TTraceEvent>): Promise<ModelCallResult>;
74
+ export declare function normalizeModelOutput(text: string, dedicatedReasoning?: string): NormalizedModelOutput;
75
+ export declare function composeParseSource(text: string, reasoning?: string): string;
76
+ export declare function aggregateUsage<T extends {
77
+ usage?: LLMUsage;
78
+ }>(attempts: T[]): LLMUsage | undefined;
79
+ export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
@@ -0,0 +1,3 @@
1
+ import type { GenerateCallOptions, GenerateOptions, GenerateResult, LLMAdapter, StructuredPromptBuilder } from "./types";
2
+ export declare function generate(adapter: LLMAdapter, prompt: StructuredPromptBuilder, options?: GenerateCallOptions): Promise<GenerateResult>;
3
+ export declare function generate(adapter: LLMAdapter, options: GenerateOptions): Promise<GenerateResult>;