extrait 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/types.d.ts CHANGED
@@ -1,468 +1,4 @@
1
- import type { z } from "zod";
2
- export type StructuredMode = "loose" | "strict";
3
- export type HTTPHeaders = Record<string, string>;
4
- export interface ExtractionCandidate {
5
- id: string;
6
- source: "fenced" | "scan" | "raw";
7
- content: string;
8
- language?: string | null;
9
- parseHint?: ExtractionParseHint;
10
- start: number;
11
- end: number;
12
- score: number;
13
- }
14
- export interface ExtractionParseHint {
15
- success: boolean;
16
- parsed: unknown | null;
17
- repaired: string | null;
18
- usedRepair: boolean;
19
- stage: "parse" | "repair";
20
- error: string;
21
- }
22
- export interface ExtractionHeuristicsOptions {
23
- firstPassMin: number;
24
- firstPassCap: number;
25
- firstPassMultiplier: number;
26
- secondPassMin: number;
27
- secondPassCap: number;
28
- secondPassMultiplier: number;
29
- hintMaxLength: number;
30
- }
31
- export interface ExtractJsonCandidatesOptions {
32
- maxCandidates?: number;
33
- acceptArrays?: boolean;
34
- allowRepairHints?: boolean;
35
- heuristics?: Partial<ExtractionHeuristicsOptions>;
36
- }
37
- export interface ParseTraceEvent {
38
- stage: "extract" | "repair" | "parse" | "validate" | "result";
39
- level: "info" | "error";
40
- message: string;
41
- candidateId?: string;
42
- details?: unknown;
43
- }
44
- export interface ParseLLMOutputOptions {
45
- repair?: boolean;
46
- maxCandidates?: number;
47
- acceptArrays?: boolean;
48
- extraction?: Partial<ExtractionHeuristicsOptions>;
49
- onTrace?: (event: ParseTraceEvent) => void;
50
- }
51
- export interface PipelineError {
52
- stage: "extract" | "repair" | "parse" | "validate" | "llm" | "self-heal";
53
- message: string;
54
- candidateId?: string;
55
- details?: unknown;
56
- }
57
- export interface CandidateDiagnostics {
58
- candidateId: string;
59
- source: ExtractionCandidate["source"];
60
- usedRepair: boolean;
61
- parseSuccess: boolean;
62
- validationSuccess: boolean;
63
- selected: boolean;
64
- stage: "repair" | "parse" | "validate" | "success";
65
- message?: string;
66
- zodIssues?: z.core.$ZodIssue[];
67
- }
68
- export interface ThinkBlock {
69
- id: string;
70
- content: string;
71
- raw: string;
72
- start: number;
73
- end: number;
74
- }
75
- export interface ThinkDiagnostics {
76
- unterminatedCount: number;
77
- nestedCount: number;
78
- hiddenChars: number;
79
- }
80
- export interface ParseLLMOutputResult<T> {
81
- success: boolean;
82
- data: T | null;
83
- raw: string;
84
- sanitizedRaw: string;
85
- thinkBlocks: ThinkBlock[];
86
- thinkDiagnostics: ThinkDiagnostics;
87
- parsed: unknown | null;
88
- candidate: ExtractionCandidate | null;
89
- repaired: string | null;
90
- candidates: ExtractionCandidate[];
91
- diagnostics: CandidateDiagnostics[];
92
- errors: PipelineError[];
93
- zodIssues: z.core.$ZodIssue[];
94
- }
95
- export interface MCPToolSchema {
96
- type?: string;
97
- properties?: Record<string, unknown>;
98
- required?: string[];
99
- [key: string]: unknown;
100
- }
101
- export interface MCPToolDescriptor {
102
- name: string;
103
- description?: string;
104
- inputSchema?: MCPToolSchema;
105
- }
106
- export interface MCPListToolsResult {
107
- tools: MCPToolDescriptor[];
108
- nextCursor?: string;
109
- }
110
- export interface MCPCallToolParams {
111
- name: string;
112
- arguments?: Record<string, unknown>;
113
- _meta?: Record<string, unknown>;
114
- }
115
- export interface MCPToolClient {
116
- id: string;
117
- listTools(params?: {
118
- cursor?: string;
119
- }): Promise<MCPListToolsResult>;
120
- callTool(params: MCPCallToolParams): Promise<unknown>;
121
- close?(): Promise<void>;
122
- }
123
- export interface LLMTextContent {
124
- type: "text";
125
- text: string;
126
- }
127
- export interface LLMImageContent {
128
- type: "image_url";
129
- image_url: {
130
- url: string;
131
- };
132
- }
133
- export type LLMMessageContent = string | (LLMTextContent | LLMImageContent)[];
134
- export interface LLMToolCallRef {
135
- id: string;
136
- type: "function";
137
- function: {
138
- name: string;
139
- arguments: string;
140
- };
141
- }
142
- export interface ReasoningBlock {
143
- turnIndex: number;
144
- text: string;
145
- }
146
- export interface StreamTurnTransition {
147
- turnIndex: number;
148
- kind: "reasoningComplete" | "toolCallsEmit" | "toolResultsReceived" | "streamEnd";
149
- reasoningText?: string;
150
- toolCalls?: LLMToolCall[];
151
- }
152
- export interface LLMMessage {
153
- role: "system" | "user" | "assistant" | "tool";
154
- content: LLMMessageContent;
155
- [key: string]: unknown;
156
- }
157
- export type LLMReasoningEffort = "none" | "minimal" | "low" | "medium" | "high" | "max";
158
- export interface LLMRequest {
159
- prompt?: string;
160
- systemPrompt?: string;
161
- messages?: LLMMessage[];
162
- temperature?: number;
163
- reasoningEffort?: LLMReasoningEffort;
164
- maxTokens?: number;
165
- mcpClients?: MCPToolClient[];
166
- toolChoice?: LLMToolChoice;
167
- parallelToolCalls?: boolean;
168
- maxToolRounds?: number;
169
- onToolExecution?: (execution: LLMToolExecution) => void;
170
- transformToolOutput?: LLMToolOutputTransformer;
171
- transformToolArguments?: LLMToolArgumentsTransformer;
172
- transformToolCallParams?: LLMToolCallParamsTransformer;
173
- unknownToolError?: (toolName: string) => string;
174
- toolDebug?: boolean | LLMToolDebugOptions;
175
- onTurnTransition?: (transition: StreamTurnTransition) => void;
176
- body?: Record<string, unknown>;
177
- signal?: AbortSignal;
178
- }
179
- export interface LLMUsage {
180
- inputTokens?: number;
181
- outputTokens?: number;
182
- totalTokens?: number;
183
- cost?: number;
184
- }
185
- export interface LLMResponse {
186
- text: string;
187
- reasoning?: string;
188
- reasoningBlocks?: ReasoningBlock[];
189
- raw?: unknown;
190
- usage?: LLMUsage;
191
- finishReason?: string;
192
- toolCalls?: LLMToolCall[];
193
- toolExecutions?: LLMToolExecution[];
194
- }
195
- export interface LLMStreamChunk {
196
- textDelta: string;
197
- reasoningDelta?: string;
198
- turnIndex?: number;
199
- toolCalls?: LLMToolCall[];
200
- raw?: unknown;
201
- done?: boolean;
202
- usage?: LLMUsage;
203
- finishReason?: string;
204
- }
205
- export interface LLMStreamCallbacks {
206
- onStart?: () => void;
207
- onToken?: (token: string) => void;
208
- onChunk?: (chunk: LLMStreamChunk) => void;
209
- onComplete?: (response: LLMResponse) => void;
210
- }
211
- export interface EmbeddingRequest {
212
- input: string | string[];
213
- model?: string;
214
- dimensions?: number;
215
- body?: Record<string, unknown>;
216
- }
217
- export interface EmbeddingResult {
218
- embeddings: number[][];
219
- model: string;
220
- usage?: LLMUsage;
221
- raw?: unknown;
222
- }
223
- export interface LLMAdapter {
224
- provider?: string;
225
- model?: string;
226
- complete(request: LLMRequest): Promise<LLMResponse>;
227
- stream?(request: LLMRequest, callbacks?: LLMStreamCallbacks): Promise<LLMResponse>;
228
- embed?(request: EmbeddingRequest): Promise<EmbeddingResult>;
229
- }
230
- export interface LLMToolCall {
231
- id: string;
232
- type: string;
233
- name?: string;
234
- arguments?: unknown;
235
- output?: unknown;
236
- error?: string;
237
- }
238
- export interface LLMToolExecution {
239
- callId: string;
240
- type: string;
241
- name?: string;
242
- clientId?: string;
243
- remoteName?: string;
244
- arguments?: unknown;
245
- output?: unknown;
246
- error?: string;
247
- round?: number;
248
- provider?: string;
249
- model?: string;
250
- handledLocally: boolean;
251
- startedAt: string;
252
- durationMs?: number;
253
- }
254
- export type LLMToolOutputTransformer = (output: unknown, execution: Omit<LLMToolExecution, "output" | "durationMs">) => unknown | Promise<unknown>;
255
- export type LLMToolArgumentsTransformer = (args: Record<string, unknown>, context: {
256
- name: string;
257
- remoteName: string;
258
- clientId: string;
259
- }) => Record<string, unknown> | Promise<Record<string, unknown>>;
260
- export type LLMToolCallParamsTransformer = (params: MCPCallToolParams, context: {
261
- name: string;
262
- remoteName: string;
263
- clientId: string;
264
- }) => MCPCallToolParams | Promise<MCPCallToolParams>;
265
- export interface LLMToolDebugOptions {
266
- enabled?: boolean;
267
- logger?: (line: string) => void;
268
- includeRequest?: boolean;
269
- includeResult?: boolean;
270
- includeResultOnError?: boolean;
271
- pretty?: boolean;
272
- }
273
- export type LLMToolChoice = "none" | "auto" | "required" | {
274
- type: "function";
275
- function: {
276
- name: string;
277
- };
278
- } | Record<string, unknown>;
279
- export interface StructuredTraceEvent {
280
- stage: "llm.request" | "llm.response" | "llm.stream.delta" | "llm.stream.data" | "parse" | "self-heal" | "result";
281
- attempt: number;
282
- selfHeal: boolean;
283
- message: string;
284
- details?: unknown;
285
- }
286
- export interface GenerateTraceEvent {
287
- stage: "llm.request" | "llm.response" | "llm.stream.delta" | "llm.stream.data" | "result";
288
- attempt: number;
289
- message: string;
290
- details?: unknown;
291
- }
292
- export interface StructuredPromptContext {
293
- mode: StructuredMode;
294
- }
295
- export interface StructuredPromptPayload {
296
- prompt?: string;
297
- systemPrompt?: string;
298
- messages?: LLMMessage[];
299
- }
300
- export interface StructuredPromptResolver {
301
- resolvePrompt(context: StructuredPromptContext): StructuredPromptPayload;
302
- }
303
- export type StructuredPromptValue = string | StructuredPromptPayload | StructuredPromptResolver;
304
- export type StructuredPromptBuilder = StructuredPromptValue | ((context: StructuredPromptContext) => StructuredPromptValue);
305
- export interface StructuredDebugOptions {
306
- enabled?: boolean;
307
- colors?: boolean;
308
- verbose?: boolean;
309
- logger?: (line: string) => void;
310
- }
311
- export interface StructuredSelfHealOptions {
312
- enabled?: boolean;
313
- maxAttempts?: number;
314
- stopOnNoProgress?: boolean;
315
- maxContextChars?: number;
316
- }
317
- export type StructuredSelfHealInput = boolean | number | StructuredSelfHealOptions;
318
- export interface StructuredTimeoutOptions {
319
- /** Timeout in ms for each LLM HTTP request. Creates an AbortSignal.timeout internally if no signal is already provided. */
320
- request?: number;
321
- /** Timeout in ms for each MCP tool call. */
322
- tool?: number;
323
- }
324
- export type StructuredStreamData<T> = T extends Array<infer TItem> ? Array<StructuredStreamData<TItem>> : T extends object ? {
325
- [K in keyof T]?: StructuredStreamData<T[K]> | null;
326
- } : T | null;
327
- export interface StructuredStreamDelta {
328
- text: string;
329
- reasoning: string;
330
- }
331
- export interface StructuredStreamSnapshot<T = unknown> {
332
- text: string;
333
- reasoning: string;
334
- reasoningBlocks?: ReasoningBlock[];
335
- data: StructuredStreamData<T> | null;
336
- }
337
- export interface StructuredStreamEvent<T = unknown> {
338
- delta: StructuredStreamDelta;
339
- snapshot: StructuredStreamSnapshot<T>;
340
- done: boolean;
341
- usage?: LLMUsage;
342
- finishReason?: string;
343
- turnIndex?: number;
344
- toolCalls?: LLMToolCall[];
345
- }
346
- export interface StructuredStreamOptions<T = unknown> {
347
- enabled?: boolean;
348
- onData?: (event: StructuredStreamEvent<T>) => void;
349
- onTurnTransition?: (transition: StreamTurnTransition) => void;
350
- to?: "stdout";
351
- }
352
- export type StructuredStreamInput<T = unknown> = boolean | StructuredStreamOptions<T>;
353
- export interface GenerateStreamDelta {
354
- text: string;
355
- reasoning: string;
356
- }
357
- export interface GenerateStreamSnapshot {
358
- text: string;
359
- reasoning: string;
360
- reasoningBlocks?: ReasoningBlock[];
361
- }
362
- export interface GenerateStreamEvent {
363
- delta: GenerateStreamDelta;
364
- snapshot: GenerateStreamSnapshot;
365
- done: boolean;
366
- usage?: LLMUsage;
367
- finishReason?: string;
368
- turnIndex?: number;
369
- toolCalls?: LLMToolCall[];
370
- }
371
- export interface GenerateStreamOptions {
372
- enabled?: boolean;
373
- onData?: (event: GenerateStreamEvent) => void;
374
- onTurnTransition?: (transition: StreamTurnTransition) => void;
375
- to?: "stdout";
376
- }
377
- export type GenerateStreamInput = boolean | GenerateStreamOptions;
378
- export interface GenerateCallOptions {
379
- outdent?: boolean;
380
- stream?: GenerateStreamInput;
381
- debug?: boolean | StructuredDebugOptions;
382
- observe?: (event: GenerateTraceEvent) => void;
383
- systemPrompt?: string;
384
- request?: Omit<LLMRequest, "prompt" | "systemPrompt" | "messages">;
385
- timeout?: StructuredTimeoutOptions;
386
- }
387
- export interface GenerateOptions extends GenerateCallOptions {
388
- prompt: StructuredPromptBuilder;
389
- }
390
- export interface StructuredCallOptions<TSchema extends z.ZodTypeAny> {
391
- mode?: StructuredMode;
392
- outdent?: boolean;
393
- parse?: ParseLLMOutputOptions;
394
- selfHeal?: StructuredSelfHealInput;
395
- stream?: StructuredStreamInput<z.infer<TSchema>>;
396
- debug?: boolean | StructuredDebugOptions;
397
- observe?: (event: StructuredTraceEvent) => void;
398
- systemPrompt?: string;
399
- request?: Omit<LLMRequest, "prompt" | "systemPrompt" | "messages">;
400
- schemaInstruction?: string;
401
- timeout?: StructuredTimeoutOptions;
402
- }
403
- export interface StructuredOptions<TSchema extends z.ZodTypeAny> extends StructuredCallOptions<TSchema> {
404
- schema: TSchema;
405
- prompt: StructuredPromptBuilder;
406
- }
407
- export interface StructuredAttempt<T> {
408
- attempt: number;
409
- selfHeal: boolean;
410
- via: "complete" | "stream";
411
- text: string;
412
- reasoning: string;
413
- json: unknown | null;
414
- candidates: string[];
415
- repairLog: string[];
416
- zodIssues: z.core.$ZodIssue[];
417
- success: boolean;
418
- usage?: LLMUsage;
419
- finishReason?: string;
420
- reasoningBlocks?: ReasoningBlock[];
421
- parsed: ParseLLMOutputResult<T>;
422
- }
423
- export interface GenerateAttempt {
424
- attempt: number;
425
- via: "complete" | "stream";
426
- text: string;
427
- reasoning: string;
428
- usage?: LLMUsage;
429
- finishReason?: string;
430
- reasoningBlocks?: ReasoningBlock[];
431
- }
432
- export interface StructuredResult<T> {
433
- data: T;
434
- text: string;
435
- reasoning: string;
436
- json: unknown | null;
437
- attempts: StructuredAttempt<T>[];
438
- usage?: LLMUsage;
439
- finishReason?: string;
440
- reasoningBlocks?: ReasoningBlock[];
441
- }
442
- export interface GenerateResult {
443
- text: string;
444
- reasoning: string;
445
- attempts: GenerateAttempt[];
446
- usage?: LLMUsage;
447
- finishReason?: string;
448
- reasoningBlocks?: ReasoningBlock[];
449
- }
450
- export interface StructuredError {
451
- name: "StructuredParseError";
452
- text: string;
453
- reasoning: string;
454
- candidates: string[];
455
- zodIssues?: z.core.$ZodIssue[];
456
- repairLog?: string[];
457
- attempt: number;
458
- }
459
- export interface MarkdownCodeBlock {
460
- language: string | null;
461
- code: string;
462
- start: number;
463
- end: number;
464
- }
465
- export interface MarkdownCodeOptions {
466
- language?: string;
467
- firstOnly?: boolean;
468
- }
1
+ export type * from "./type-definitions/parse";
2
+ export type * from "./type-definitions/llm";
3
+ export type * from "./type-definitions/structured";
4
+ export type * from "./type-definitions/markdown";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "extrait",
3
- "version": "0.7.0",
3
+ "version": "0.7.2",
4
4
  "repository": {
5
5
  "type": "git",
6
6
  "url": "git+https://github.com/tterrasson/extrait.git"
@@ -13,7 +13,7 @@
13
13
  "zod": "^4.4.3"
14
14
  },
15
15
  "devDependencies": {
16
- "@types/bun": "^1.3.13",
16
+ "@types/bun": "^1.3.14",
17
17
  "@types/sharp": "^0.32.0",
18
18
  "typescript": "^5.9.3"
19
19
  },
@@ -51,7 +51,7 @@
51
51
  "lint": "bunx tsc -p tsconfig.lint.json",
52
52
  "prepublishOnly": "bun run lint && bun run build",
53
53
  "test": "bun test tests/ --reporter=dots --only-failures",
54
- "typecheck": "bunx tsc --noEmit",
54
+ "typecheck": "bunx tsc -p tsconfig.typecheck.json --noEmit",
55
55
  "pack": "bun run build && npm pack"
56
56
  },
57
57
  "type": "module",