extrait 0.5.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -9
- package/dist/generate-shared.d.ts +79 -0
- package/dist/generate.d.ts +3 -0
- package/dist/index.cjs +870 -520
- package/dist/index.d.ts +2 -1
- package/dist/index.js +870 -520
- package/dist/llm.d.ts +18 -2
- package/dist/structured.d.ts +4 -4
- package/dist/types.d.ts +82 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# extrait
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
High-level LLM text generation and structured JSON extraction with validation, repair, and streaming.
|
|
4
4
|
|
|
5
5
|
<p align="left">
|
|
6
6
|
<a href="https://www.npmjs.com/package/extrait">
|
|
@@ -68,6 +68,7 @@ console.log(result.data);
|
|
|
68
68
|
These examples cover the most common usage patterns in the repository.
|
|
69
69
|
|
|
70
70
|
- [`examples/simple.ts`](examples/simple.ts) - Basic structured output with streaming
|
|
71
|
+
- [`examples/generate.ts`](examples/generate.ts) - High-level text generation
|
|
71
72
|
- [`examples/streaming.ts`](examples/streaming.ts) - Real-time partial output and snapshot updates
|
|
72
73
|
- [`examples/calculator-tool.ts`](examples/calculator-tool.ts) - Structured extraction with MCP tools
|
|
73
74
|
- [`examples/conversation.ts`](examples/conversation.ts) - Multi-turn prompts and multimodal content
|
|
@@ -76,6 +77,7 @@ These examples cover the most common usage patterns in the repository.
|
|
|
76
77
|
|
|
77
78
|
```bash
|
|
78
79
|
bun run dev simple "Bun.js runtime"
|
|
80
|
+
bun run dev generate "Bun.js runtime"
|
|
79
81
|
bun run dev streaming
|
|
80
82
|
bun run dev calculator-tool
|
|
81
83
|
```
|
|
@@ -107,6 +109,8 @@ const llm = createLLM({
|
|
|
107
109
|
mode: "loose" | "strict", // loose allows repair
|
|
108
110
|
selfHeal: 1, // optional retry attempts
|
|
109
111
|
debug: false, // optional structured debug output
|
|
112
|
+
// or:
|
|
113
|
+
// debug: { enabled: true, verbose: true },
|
|
110
114
|
systemPrompt: "You are a helpful assistant.",
|
|
111
115
|
timeout: {
|
|
112
116
|
request: 30_000,
|
|
@@ -219,7 +223,17 @@ const result = await llm.structured(
|
|
|
219
223
|
stream: {
|
|
220
224
|
to: "stdout",
|
|
221
225
|
onData: (event) => {
|
|
222
|
-
|
|
226
|
+
if (event.delta.text) {
|
|
227
|
+
console.log("New visible text:", event.delta.text);
|
|
228
|
+
}
|
|
229
|
+
if (event.delta.reasoning) {
|
|
230
|
+
console.log("New reasoning text:", event.delta.reasoning);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
console.log("Current visible text:", event.snapshot.text);
|
|
234
|
+
console.log("Current reasoning:", event.snapshot.reasoning);
|
|
235
|
+
console.log("Current structured snapshot:", event.snapshot.data);
|
|
236
|
+
|
|
223
237
|
if (event.done) {
|
|
224
238
|
console.log("Streaming done.");
|
|
225
239
|
}
|
|
@@ -227,6 +241,7 @@ const result = await llm.structured(
|
|
|
227
241
|
},
|
|
228
242
|
request: {
|
|
229
243
|
signal: AbortSignal.timeout(30_000), // optional AbortSignal
|
|
244
|
+
reasoningEffort: "medium", // optional reasoning effort hint
|
|
230
245
|
},
|
|
231
246
|
timeout: {
|
|
232
247
|
request: 30_000, // ms per LLM HTTP request
|
|
@@ -238,6 +253,21 @@ const result = await llm.structured(
|
|
|
238
253
|
|
|
239
254
|
`prompt()` builds an ordered `messages` payload. Use ``prompt`...` `` for a single string prompt, or the fluent builder for multi-turn conversations. The `LLMMessage` type is exported if you need to type your own message arrays.
|
|
240
255
|
|
|
256
|
+
In `stream.onData`, the event is split into two layers:
|
|
257
|
+
|
|
258
|
+
- `event.delta.text` is only the newly received visible text since the previous event.
|
|
259
|
+
- `event.delta.reasoning` is only the newly received reasoning text since the previous event.
|
|
260
|
+
- `event.snapshot.text` is the full visible text accumulated so far.
|
|
261
|
+
- `event.snapshot.reasoning` is the full normalized reasoning accumulated so far.
|
|
262
|
+
- `event.snapshot.data` is the best structured JSON snapshot that can be parsed from the stream so far. It may stay unchanged while `event.delta.text` continues to grow.
|
|
263
|
+
|
|
264
|
+
Typical usage is:
|
|
265
|
+
|
|
266
|
+
- render `event.delta.text` directly to a terminal or chat UI
|
|
267
|
+
- optionally render `event.delta.reasoning` in a separate reasoning panel
|
|
268
|
+
- use `event.snapshot.data` to drive partial structured UI state
|
|
269
|
+
- use `event.snapshot.text` / `event.snapshot.reasoning` when you need the full accumulated state instead of only the latest increment
|
|
270
|
+
|
|
241
271
|
You can also pass provider request options through `request`:
|
|
242
272
|
|
|
243
273
|
```typescript
|
|
@@ -254,6 +284,89 @@ const result = await llm.structured(
|
|
|
254
284
|
);
|
|
255
285
|
```
|
|
256
286
|
|
|
287
|
+
### Making Text Calls
|
|
288
|
+
|
|
289
|
+
`generate()` is the high-level API for non-structured generation. It accepts the same prompt shapes as `structured()`, but does not inject any schema or parse the output.
|
|
290
|
+
|
|
291
|
+
```typescript
|
|
292
|
+
// Simple prompt
|
|
293
|
+
const result = await llm.generate(
|
|
294
|
+
prompt`Write a short summary of ${topic}.`
|
|
295
|
+
);
|
|
296
|
+
|
|
297
|
+
// Multi-message prompt
|
|
298
|
+
const result = await llm.generate(
|
|
299
|
+
prompt()
|
|
300
|
+
.system`You are a concise assistant.`
|
|
301
|
+
.user`Summarize: """${text}"""`
|
|
302
|
+
);
|
|
303
|
+
|
|
304
|
+
// Raw messages payload
|
|
305
|
+
const result = await llm.generate({
|
|
306
|
+
prompt: {
|
|
307
|
+
messages: [
|
|
308
|
+
{ role: "user", content: "Say hello in one sentence." },
|
|
309
|
+
],
|
|
310
|
+
},
|
|
311
|
+
});
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Streaming mirrors `structured()`, except the snapshot only contains `text` and `reasoning`:
|
|
315
|
+
|
|
316
|
+
```typescript
|
|
317
|
+
const result = await llm.generate(
|
|
318
|
+
prompt`Explain ${topic} in one short paragraph.`,
|
|
319
|
+
{
|
|
320
|
+
stream: {
|
|
321
|
+
enabled: true,
|
|
322
|
+
onData: (event) => {
|
|
323
|
+
process.stdout.write(event.delta.text);
|
|
324
|
+
|
|
325
|
+
console.log("Full text so far:", event.snapshot.text);
|
|
326
|
+
console.log("Full reasoning so far:", event.snapshot.reasoning);
|
|
327
|
+
|
|
328
|
+
if (event.done) {
|
|
329
|
+
console.log("Streaming done.");
|
|
330
|
+
}
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
}
|
|
334
|
+
);
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
Provider request options and MCP tools still go through `request`:
|
|
338
|
+
|
|
339
|
+
```typescript
|
|
340
|
+
const result = await llm.generate(
|
|
341
|
+
prompt`Use tools if needed and answer the user clearly.`,
|
|
342
|
+
{
|
|
343
|
+
request: {
|
|
344
|
+
temperature: 0,
|
|
345
|
+
maxTokens: 800,
|
|
346
|
+
reasoningEffort: "medium",
|
|
347
|
+
mcpClients: [calculatorMCP],
|
|
348
|
+
maxToolRounds: 8,
|
|
349
|
+
},
|
|
350
|
+
}
|
|
351
|
+
);
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
On `openai-compatible`, this is sent as `reasoning_effort`, with `max` mapped to `xhigh`. On `anthropic-compatible`, this is sent as `output_config.effort` and auto-enables `thinking: { type: "adaptive" }`.
|
|
355
|
+
|
|
356
|
+
For existing history or multi-turn conversations, pass `messages` directly:
|
|
357
|
+
|
|
358
|
+
```typescript
|
|
359
|
+
const messages = conversation("You are a helpful assistant.", [
|
|
360
|
+
{ role: "user", text: "What is the speed of light?" },
|
|
361
|
+
{ role: "assistant", text: "Approximately 299,792 km/s in a vacuum." },
|
|
362
|
+
{ role: "user", text: "How long does light take to reach Earth from the Sun?" },
|
|
363
|
+
]);
|
|
364
|
+
|
|
365
|
+
const result = await llm.generate({ prompt: { messages } });
|
|
366
|
+
```
|
|
367
|
+
|
|
368
|
+
Use `llm.adapter.complete(...)` or `llm.adapter.stream(...)` only when you need the raw low-level provider interface.
|
|
369
|
+
|
|
257
370
|
### Images (multimodal)
|
|
258
371
|
|
|
259
372
|
Use `images()` to build base64 image content blocks for vision-capable models.
|
|
@@ -310,8 +423,8 @@ const messages = conversation("You are a helpful assistant.", [
|
|
|
310
423
|
{ role: "user", text: "How long does light take to reach Earth from the Sun?" },
|
|
311
424
|
]);
|
|
312
425
|
|
|
313
|
-
//
|
|
314
|
-
const response = await llm.
|
|
426
|
+
// High-level text generation
|
|
427
|
+
const response = await llm.generate({ prompt: { messages } });
|
|
315
428
|
|
|
316
429
|
// Or to structured extraction
|
|
317
430
|
const result = await llm.structured(Schema, { messages });
|
|
@@ -331,13 +444,43 @@ const messages = conversation("You are a vision assistant.", [
|
|
|
331
444
|
|
|
332
445
|
### Result Object
|
|
333
446
|
|
|
334
|
-
Successful
|
|
447
|
+
Successful `generate()` calls return normalized text/reasoning plus request metadata:
|
|
448
|
+
|
|
449
|
+
```typescript
|
|
450
|
+
{
|
|
451
|
+
text: string,
|
|
452
|
+
reasoning: string,
|
|
453
|
+
attempts: GenerateAttempt[],
|
|
454
|
+
usage?: {
|
|
455
|
+
inputTokens?: number,
|
|
456
|
+
outputTokens?: number,
|
|
457
|
+
totalTokens?: number,
|
|
458
|
+
cost?: number,
|
|
459
|
+
},
|
|
460
|
+
finishReason?: string,
|
|
461
|
+
}
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
Each `attempts` entry includes:
|
|
465
|
+
|
|
466
|
+
```typescript
|
|
467
|
+
{
|
|
468
|
+
attempt: number,
|
|
469
|
+
via: "complete" | "stream",
|
|
470
|
+
text: string,
|
|
471
|
+
reasoning: string,
|
|
472
|
+
usage?: LLMUsage,
|
|
473
|
+
finishReason?: string,
|
|
474
|
+
}
|
|
475
|
+
```
|
|
476
|
+
|
|
477
|
+
Successful `structured()` calls return validated data plus normalized text/reasoning and trace metadata.
|
|
335
478
|
|
|
336
479
|
```typescript
|
|
337
480
|
{
|
|
338
481
|
data: T, // Validated data matching schema
|
|
339
|
-
|
|
340
|
-
|
|
482
|
+
text: string, // Visible model text, without inline <think> blocks
|
|
483
|
+
reasoning: string, // Normalized reasoning across dedicated fields and inline <think>
|
|
341
484
|
json: unknown | null, // Parsed JSON before validation
|
|
342
485
|
attempts: StructuredAttempt<T>[], // One entry per parse / self-heal attempt
|
|
343
486
|
usage?: {
|
|
@@ -357,8 +500,8 @@ Each `attempts` entry includes:
|
|
|
357
500
|
attempt: number,
|
|
358
501
|
selfHeal: boolean,
|
|
359
502
|
via: "complete" | "stream",
|
|
360
|
-
|
|
361
|
-
|
|
503
|
+
text: string,
|
|
504
|
+
reasoning: string,
|
|
362
505
|
json: unknown | null,
|
|
363
506
|
candidates: string[],
|
|
364
507
|
repairLog: string[],
|
|
@@ -370,6 +513,8 @@ Each `attempts` entry includes:
|
|
|
370
513
|
}
|
|
371
514
|
```
|
|
372
515
|
|
|
516
|
+
Legacy inline `<think>...</think>` blocks are still supported, but the high-level `structured()` API now folds them into `reasoning` internally instead of exposing block metadata.
|
|
517
|
+
|
|
373
518
|
### Error Handling
|
|
374
519
|
|
|
375
520
|
Catch `StructuredParseError` when repair and validation still fail.
|
|
@@ -485,6 +630,14 @@ const result = await llm.structured(
|
|
|
485
630
|
},
|
|
486
631
|
// Optional: transform tool arguments before the tool is called
|
|
487
632
|
transformToolArguments: (args, call) => args,
|
|
633
|
+
// Optional: transform the full MCP call payload, including _meta
|
|
634
|
+
transformToolCallParams: (params, call) => ({
|
|
635
|
+
...params,
|
|
636
|
+
_meta: {
|
|
637
|
+
source: "extrait-docs",
|
|
638
|
+
clientId: call.clientId,
|
|
639
|
+
},
|
|
640
|
+
}),
|
|
488
641
|
// Optional: custom error message when an unknown tool is called
|
|
489
642
|
unknownToolError: (toolName) => `Tool "${toolName}" is not available.`,
|
|
490
643
|
},
|
|
@@ -494,6 +647,18 @@ const result = await llm.structured(
|
|
|
494
647
|
await mcpClient.close?.();
|
|
495
648
|
```
|
|
496
649
|
|
|
650
|
+
`transformToolArguments()` only receives the tool input object. `transformToolCallParams()` runs after it and receives the full `MCPCallToolParams` payload that will be sent to the MCP client:
|
|
651
|
+
|
|
652
|
+
```typescript
|
|
653
|
+
type MCPCallToolParams = {
|
|
654
|
+
name: string;
|
|
655
|
+
arguments?: Record<string, unknown>;
|
|
656
|
+
_meta?: Record<string, unknown>;
|
|
657
|
+
};
|
|
658
|
+
```
|
|
659
|
+
|
|
660
|
+
Use `transformToolCallParams()` when you need to attach MCP-specific metadata, override the final remote tool name, or otherwise change the full request passed to `client.callTool()`. This hook is exported as `LLMToolCallParamsTransformer`.
|
|
661
|
+
|
|
497
662
|
### Timeouts
|
|
498
663
|
|
|
499
664
|
Use `timeout` to set per-request and per-tool-call time limits without managing `AbortSignal` manually.
|
|
@@ -527,6 +692,7 @@ const llm = createLLM({
|
|
|
527
692
|
Run repository examples with `bun run dev <example-name>`.
|
|
528
693
|
|
|
529
694
|
Available examples:
|
|
695
|
+
- `generate` - High-level text generation ([generate.ts](examples/generate.ts))
|
|
530
696
|
- `streaming` - Real LLM streaming + snapshot self-check ([streaming.ts](examples/streaming.ts))
|
|
531
697
|
- `streaming-with-tools` - Real text streaming with MCP tools + self-check ([streaming-with-tools.ts](examples/streaming-with-tools.ts))
|
|
532
698
|
- `abort-signal` - Start a generation then cancel quickly with `AbortSignal` ([abort-signal.ts](examples/abort-signal.ts))
|
|
@@ -543,6 +709,7 @@ Available examples:
|
|
|
543
709
|
|
|
544
710
|
Pass arguments after the example name:
|
|
545
711
|
```bash
|
|
712
|
+
bun run dev generate "Why Bun is fast"
|
|
546
713
|
bun run dev streaming
|
|
547
714
|
bun run dev streaming-with-tools
|
|
548
715
|
bun run dev abort-signal 120 "JSON cancellation demo"
|
|
@@ -562,6 +729,9 @@ These environment variables are used across the examples and common client setup
|
|
|
562
729
|
- `LLM_MODEL` - Model name (default: `gpt-5-nano`)
|
|
563
730
|
- `LLM_API_KEY` - API key for the provider
|
|
564
731
|
- `STRUCTURED_DEBUG=1` - Enable debug output
|
|
732
|
+
By default, structured debug prints `text` (public visible output) and
|
|
733
|
+
`reasoning` (normalized reasoning). `parseSource` (the internal source used by
|
|
734
|
+
parsing and self-heal) is only printed when `debug.verbose` is enabled.
|
|
565
735
|
|
|
566
736
|
## Testing
|
|
567
737
|
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import type { LLMAdapter, LLMMessage, LLMRequest, LLMUsage, MCPToolClient, StructuredDebugOptions, StructuredPromptBuilder, StructuredPromptContext, StructuredPromptPayload, StructuredPromptValue, StructuredTimeoutOptions, ThinkBlock } from "./types";
|
|
2
|
+
export type PromptRequestOptions = Omit<LLMRequest, "prompt" | "systemPrompt" | "messages">;
|
|
3
|
+
export interface StreamDelta {
|
|
4
|
+
text: string;
|
|
5
|
+
reasoning: string;
|
|
6
|
+
}
|
|
7
|
+
export interface NormalizedStreamConfig<TSnapshot> {
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
onData?: (event: {
|
|
10
|
+
delta: StreamDelta;
|
|
11
|
+
snapshot: TSnapshot;
|
|
12
|
+
done: boolean;
|
|
13
|
+
usage?: LLMUsage;
|
|
14
|
+
finishReason?: string;
|
|
15
|
+
}) => void;
|
|
16
|
+
to?: "stdout";
|
|
17
|
+
}
|
|
18
|
+
export interface NormalizedDebugConfig {
|
|
19
|
+
enabled: boolean;
|
|
20
|
+
colors: boolean;
|
|
21
|
+
verbose: boolean;
|
|
22
|
+
logger: (line: string) => void;
|
|
23
|
+
}
|
|
24
|
+
export interface NormalizedModelOutput {
|
|
25
|
+
text: string;
|
|
26
|
+
reasoning: string;
|
|
27
|
+
thinkBlocks: ThinkBlock[];
|
|
28
|
+
parseSource: string;
|
|
29
|
+
}
|
|
30
|
+
export interface ModelCallOptions<TSnapshot, TTraceEvent> {
|
|
31
|
+
prompt?: string;
|
|
32
|
+
messages?: LLMMessage[];
|
|
33
|
+
systemPrompt?: string;
|
|
34
|
+
request?: PromptRequestOptions;
|
|
35
|
+
stream: NormalizedStreamConfig<TSnapshot>;
|
|
36
|
+
observe?: (event: TTraceEvent) => void;
|
|
37
|
+
buildEvent: (input: {
|
|
38
|
+
stage: "llm.request" | "llm.response" | "llm.stream.delta" | "llm.stream.data";
|
|
39
|
+
message: string;
|
|
40
|
+
details?: unknown;
|
|
41
|
+
}) => TTraceEvent;
|
|
42
|
+
buildSnapshot: (input: NormalizedModelOutput) => TSnapshot;
|
|
43
|
+
debug: NormalizedDebugConfig;
|
|
44
|
+
debugLabel: string;
|
|
45
|
+
attempt: number;
|
|
46
|
+
selfHeal: boolean;
|
|
47
|
+
selfHealEnabled: boolean;
|
|
48
|
+
timeout?: StructuredTimeoutOptions;
|
|
49
|
+
}
|
|
50
|
+
export interface ModelCallResult {
|
|
51
|
+
text: string;
|
|
52
|
+
reasoning: string;
|
|
53
|
+
thinkBlocks: ThinkBlock[];
|
|
54
|
+
parseSource: string;
|
|
55
|
+
via: "complete" | "stream";
|
|
56
|
+
usage?: LLMUsage;
|
|
57
|
+
finishReason?: string;
|
|
58
|
+
}
|
|
59
|
+
export declare function resolvePrompt(prompt: StructuredPromptBuilder, context: StructuredPromptContext): StructuredPromptPayload;
|
|
60
|
+
export declare function normalizePromptValue(value: StructuredPromptValue, _context: StructuredPromptContext): StructuredPromptPayload;
|
|
61
|
+
export declare function normalizePromptPayload(value: StructuredPromptPayload): StructuredPromptPayload;
|
|
62
|
+
export declare function applyPromptOutdent(payload: StructuredPromptPayload, enabled: boolean): StructuredPromptPayload;
|
|
63
|
+
export declare function applyOutdentToOptionalPrompt(value: string | undefined, enabled: boolean): string | undefined;
|
|
64
|
+
export declare function mergeSystemPrompts(primary?: string, secondary?: string): string | undefined;
|
|
65
|
+
export declare function normalizeStreamConfig<TSnapshot>(option: boolean | {
|
|
66
|
+
enabled?: boolean;
|
|
67
|
+
onData?: NormalizedStreamConfig<TSnapshot>["onData"];
|
|
68
|
+
to?: "stdout";
|
|
69
|
+
} | undefined): NormalizedStreamConfig<TSnapshot>;
|
|
70
|
+
export declare function normalizeDebugConfig(option: StructuredDebugOptions | boolean | undefined): NormalizedDebugConfig;
|
|
71
|
+
export declare function withToolTimeout(client: MCPToolClient, toolTimeoutMs: number): MCPToolClient;
|
|
72
|
+
export declare function applyToolTimeout(clients: MCPToolClient[], toolTimeoutMs: number): MCPToolClient[];
|
|
73
|
+
export declare function callModel<TSnapshot, TTraceEvent>(adapter: LLMAdapter, options: ModelCallOptions<TSnapshot, TTraceEvent>): Promise<ModelCallResult>;
|
|
74
|
+
export declare function normalizeModelOutput(text: string, dedicatedReasoning?: string): NormalizedModelOutput;
|
|
75
|
+
export declare function composeParseSource(text: string, reasoning?: string): string;
|
|
76
|
+
export declare function aggregateUsage<T extends {
|
|
77
|
+
usage?: LLMUsage;
|
|
78
|
+
}>(attempts: T[]): LLMUsage | undefined;
|
|
79
|
+
export declare function mergeUsage(base: LLMUsage | undefined, next: LLMUsage | undefined): LLMUsage | undefined;
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { GenerateCallOptions, GenerateOptions, GenerateResult, LLMAdapter, StructuredPromptBuilder } from "./types";
|
|
2
|
+
export declare function generate(adapter: LLMAdapter, prompt: StructuredPromptBuilder, options?: GenerateCallOptions): Promise<GenerateResult>;
|
|
3
|
+
export declare function generate(adapter: LLMAdapter, options: GenerateOptions): Promise<GenerateResult>;
|