llmbic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/LICENSE +21 -0
- package/README.md +351 -0
- package/dist/extractor.d.ts +19 -0
- package/dist/extractor.d.ts.map +1 -0
- package/dist/extractor.js +96 -0
- package/dist/extractor.js.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -0
- package/dist/merge.d.ts +76 -0
- package/dist/merge.d.ts.map +1 -0
- package/dist/merge.js +230 -0
- package/dist/merge.js.map +1 -0
- package/dist/prompt.d.ts +50 -0
- package/dist/prompt.d.ts.map +1 -0
- package/dist/prompt.js +205 -0
- package/dist/prompt.js.map +1 -0
- package/dist/rules.d.ts +73 -0
- package/dist/rules.d.ts.map +1 -0
- package/dist/rules.js +118 -0
- package/dist/rules.js.map +1 -0
- package/dist/types/extractor.types.d.ts +72 -0
- package/dist/types/extractor.types.d.ts.map +1 -0
- package/dist/types/extractor.types.js +2 -0
- package/dist/types/extractor.types.js.map +1 -0
- package/dist/types/logger.types.d.ts +12 -0
- package/dist/types/logger.types.d.ts.map +1 -0
- package/dist/types/logger.types.js +2 -0
- package/dist/types/logger.types.js.map +1 -0
- package/dist/types/merge.types.d.ts +159 -0
- package/dist/types/merge.types.d.ts.map +1 -0
- package/dist/types/merge.types.js +2 -0
- package/dist/types/merge.types.js.map +1 -0
- package/dist/types/prompt.types.d.ts +22 -0
- package/dist/types/prompt.types.d.ts.map +1 -0
- package/dist/types/prompt.types.js +2 -0
- package/dist/types/prompt.types.js.map +1 -0
- package/dist/types/provider.types.d.ts +21 -0
- package/dist/types/provider.types.d.ts.map +1 -0
- package/dist/types/provider.types.js +2 -0
- package/dist/types/provider.types.js.map +1 -0
- package/dist/types/rule.types.d.ts +38 -0
- package/dist/types/rule.types.d.ts.map +1 -0
- package/dist/types/rule.types.js +2 -0
- package/dist/types/rule.types.js.map +1 -0
- package/dist/types/validate.types.d.ts +25 -0
- package/dist/types/validate.types.d.ts.map +1 -0
- package/dist/types/validate.types.js +2 -0
- package/dist/types/validate.types.js.map +1 -0
- package/dist/validate.d.ts +57 -0
- package/dist/validate.d.ts.map +1 -0
- package/dist/validate.js +46 -0
- package/dist/validate.js.map +1 -0
- package/package.json +59 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [1.0.0] — 2026-04-15
|
|
9
|
+
|
|
10
|
+
Initial public release.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `createExtractor(config)` — factory binding a Zod schema, deterministic rules and an optional LLM fallback into an extractor with `extract`, `extractSync`, `prompt`, `parse` and `merge` methods. Covers both one-shot async extraction and 4-step batch flows (extractSync → prompt → external LLM call → parse → merge).
|
|
15
|
+
- `rule` namespace — `rule.create(field, extractFn)`, `rule.regex(field, pattern, score, transform?)`, `rule.confidence(value, score)`, `rule.apply(content, rules, schema, logger?)`. Deterministic rules are pure synchronous functions returning typed matches with a confidence score in `[0, 1]`.
|
|
16
|
+
- `merge` namespace — `merge.apply(schema, rulesResult, llmResult, content, options?)` fuses rules output with LLM output, detects per-field conflicts, runs normalizers, re-validates against the Zod schema, and runs custom validators. `merge.defaultFieldPolicy` exposes the built-in fusion rules.
|
|
17
|
+
- `prompt` namespace — `prompt.build(schema, partial, options?)` emits an `LlmRequest` (`systemPrompt`, `userContent`, `responseSchema`, `knownValues`) restricted to fields missing from the deterministic pass. `prompt.parse(raw, missing, schema)` is a permissive parser that validates each field individually via Zod, drops invalid or unexpected keys, and never throws.
|
|
18
|
+
- `validator` namespace — `validator.of<T>()` returns `{ field, crossField }` factories bound to the data shape `T`, so predicates are fully typed from the field name.
|
|
19
|
+
- `LlmProvider` contract — single-method interface (`complete(request) → { values }`) consumers implement to wire any backend (OpenAI, Anthropic, Ollama, custom HTTP, ...). No vendor SDK is pulled into the import graph.
|
|
20
|
+
- Per-field confidence scoring, conflict detection (`flag` / `prefer-rule` / `prefer-llm` strategies), and extraction metadata (`durationMs`, rule/LLM field counts).
|
|
21
|
+
- Full TypeScript `.d.ts` output with JSDoc on every public type, method and configuration field.
|
|
22
|
+
- Example wiring a local Ollama runtime under `examples/ollama.ts`.
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicolas Smeets <contact@devpixel.be>
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
# Llmbic
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/llmbic)
|
|
4
|
+
[](https://github.com/devpixel-be/llmbic/actions/workflows/ci.yml)
|
|
5
|
+
[](./LICENSE)
|
|
6
|
+
[](https://nodejs.org)
|
|
7
|
+
|
|
8
|
+
Hybrid data extraction — deterministic rules + LLM fallback, with per-field confidence scoring.
|
|
9
|
+
|
|
10
|
+
The name folds **LLM** into [*lambic*](https://en.wikipedia.org/wiki/Lambic), the Belgian beer made by blending wild fermentation with a controlled process. Same idea here: LLMs are unpredictable, rules are rigid, and the mix produces something reliable.
|
|
11
|
+
|
|
12
|
+
## Why
|
|
13
|
+
|
|
14
|
+
Extracting structured data from unstructured text is a solved problem — until you need it to be *reliable*. Rules (regex, parsers) are deterministic but brittle. LLMs understand context but hallucinate. Neither is enough alone.
|
|
15
|
+
|
|
16
|
+
Llmbic combines both: deterministic rules extract what they can with full confidence, the LLM fills in the gaps, and a merge layer detects conflicts between the two. Every field carries a confidence score. You know exactly what's trustworthy and what needs review.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install llmbic
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
Llmbic has a single dependency: [Zod](https://zod.dev). No vendor SDK is pulled in — you bring your own LLM provider via the 1-method `LlmProvider` interface (see "Writing a provider" below).
|
|
25
|
+
|
|
26
|
+
## Quick start
|
|
27
|
+
|
|
28
|
+
### Rules-only (no LLM, no network)
|
|
29
|
+
|
|
30
|
+
```typescript
|
|
31
|
+
import { z } from 'zod';
|
|
32
|
+
import { createExtractor, rule, confidence } from 'llmbic';
|
|
33
|
+
|
|
34
|
+
const InvoiceSchema = z.object({
|
|
35
|
+
total: z.number().nullable(),
|
|
36
|
+
currency: z.string().nullable(),
|
|
37
|
+
vendor: z.string().nullable(),
|
|
38
|
+
date: z.string().nullable(),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
const extractor = createExtractor({
|
|
42
|
+
schema: InvoiceSchema,
|
|
43
|
+
rules: [
|
|
44
|
+
rule('total', (text) => {
|
|
45
|
+
const m = text.match(/Total[:\s]*(\d[\d.,\s]+)\s*€/i);
|
|
46
|
+
if (!m) return null;
|
|
47
|
+
return confidence(parseFloat(m[1].replace(/[\s.]/g, '').replace(',', '.')), 1.0);
|
|
48
|
+
}),
|
|
49
|
+
rule('currency', (text) => {
|
|
50
|
+
if (/€|EUR/i.test(text)) return confidence('EUR', 1.0);
|
|
51
|
+
if (/\$|USD/i.test(text)) return confidence('USD', 1.0);
|
|
52
|
+
return null;
|
|
53
|
+
}),
|
|
54
|
+
],
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const result = await extractor.extract(markdownContent);
|
|
58
|
+
|
|
59
|
+
console.log(result.data);
|
|
60
|
+
// { total: 1250.00, currency: 'EUR', vendor: null, date: null }
|
|
61
|
+
|
|
62
|
+
console.log(result.confidence);
|
|
63
|
+
// { total: 1.0, currency: 1.0, vendor: null, date: null }
|
|
64
|
+
|
|
65
|
+
console.log(result.missing);
|
|
66
|
+
// ['vendor', 'date']
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Rules + LLM
|
|
70
|
+
|
|
71
|
+
```typescript
|
|
72
|
+
import { createExtractor, rule, confidence } from 'llmbic';
|
|
73
|
+
import type { LlmProvider } from 'llmbic';
|
|
74
|
+
import OpenAI from 'openai';
|
|
75
|
+
|
|
76
|
+
const client = new OpenAI();
|
|
77
|
+
const provider: LlmProvider = {
|
|
78
|
+
async complete(request) {
|
|
79
|
+
const response = await client.chat.completions.create({
|
|
80
|
+
model: 'gpt-4o-mini',
|
|
81
|
+
messages: [
|
|
82
|
+
{ role: 'system', content: request.systemPrompt },
|
|
83
|
+
{ role: 'user', content: request.userContent },
|
|
84
|
+
],
|
|
85
|
+
response_format: {
|
|
86
|
+
type: 'json_schema',
|
|
87
|
+
json_schema: { name: 'extraction', strict: true, schema: request.responseSchema },
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
return { values: JSON.parse(response.choices[0].message.content!) };
|
|
91
|
+
},
|
|
92
|
+
};
|
|
93
|
+
|
|
94
|
+
const extractor = createExtractor({
|
|
95
|
+
schema: InvoiceSchema,
|
|
96
|
+
rules: [
|
|
97
|
+
// ... same rules as above
|
|
98
|
+
],
|
|
99
|
+
llm: {
|
|
100
|
+
provider,
|
|
101
|
+
systemPrompt: 'Extract invoice data from the following document.',
|
|
102
|
+
},
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
const result = await extractor.extract(markdownContent);
|
|
106
|
+
|
|
107
|
+
console.log(result.data);
|
|
108
|
+
// { total: 1250.00, currency: 'EUR', vendor: 'Acme Corp', date: '2026-04-14' }
|
|
109
|
+
|
|
110
|
+
console.log(result.confidence);
|
|
111
|
+
// { total: 1.0, currency: 1.0, vendor: 0.7, date: 0.7 }
|
|
112
|
+
|
|
113
|
+
console.log(result.conflicts);
|
|
114
|
+
// [] — no disagreement between rules and LLM
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Batch / async mode (for OpenAI Batch API, job queues, etc.)
|
|
118
|
+
|
|
119
|
+
When you manage the LLM call yourself (batching, polling, custom transport), use the 4-step API:
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
// Step 1 — Deterministic extraction (sync, instant)
|
|
123
|
+
const partial = extractor.extractSync(markdown);
|
|
124
|
+
|
|
125
|
+
// Step 2 — Build the LLM request (you send it however you want)
|
|
126
|
+
const llmRequest = extractor.prompt(markdown, partial);
|
|
127
|
+
// → { systemPrompt, userContent, responseSchema, knownValues }
|
|
128
|
+
|
|
129
|
+
// ... submit to OpenAI Batch API, poll later, get the response ...
|
|
130
|
+
|
|
131
|
+
// Step 3 — Parse the raw LLM response
|
|
132
|
+
const llmResult = extractor.parse(rawJsonResponse);
|
|
133
|
+
|
|
134
|
+
// Step 4 — Merge everything (fusion + conflict detection + validation)
|
|
135
|
+
const result = extractor.merge(partial, llmResult, markdown);
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Features
|
|
139
|
+
|
|
140
|
+
### Per-field confidence scoring
|
|
141
|
+
|
|
142
|
+
Every field in the result carries a confidence score (0.0–1.0):
|
|
143
|
+
|
|
144
|
+
| Source | Confidence |
|
|
145
|
+
|--------|-----------|
|
|
146
|
+
| Deterministic rule, exact match | 1.0 |
|
|
147
|
+
| Deterministic rule, partial match | 0.7–0.9 (you decide) |
|
|
148
|
+
| LLM only | configurable default (0.7) |
|
|
149
|
+
| Rule + LLM agree | 1.0 |
|
|
150
|
+
| Rule + LLM disagree | 0.3 (flagged as conflict) |
|
|
151
|
+
| No source | `null` |
|
|
152
|
+
|
|
153
|
+
### Conflict detection
|
|
154
|
+
|
|
155
|
+
When a rule and the LLM extract different values for the same field, Llmbic flags it:
|
|
156
|
+
|
|
157
|
+
```typescript
|
|
158
|
+
result.conflicts;
|
|
159
|
+
// [{ field: 'total', ruleValue: 1250, ruleConfidence: 1.0, llmValue: 1520 }]
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Three conflict strategies: `'flag'` (default — keep rule value, record conflict), `'prefer-rule'`, or `'prefer-llm'`.
|
|
163
|
+
|
|
164
|
+
### Normalizers
|
|
165
|
+
|
|
166
|
+
Post-merge transformations. Run in sequence, receive the merged data + original content:
|
|
167
|
+
|
|
168
|
+
```typescript
|
|
169
|
+
const extractor = createExtractor({
|
|
170
|
+
schema: MySchema,
|
|
171
|
+
rules: [...],
|
|
172
|
+
normalizers: [
|
|
173
|
+
(data, content) => {
|
|
174
|
+
// Fix a known data quality issue
|
|
175
|
+
if (data.price && data.price < 100) data.price = null;
|
|
176
|
+
return data;
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
});
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Validators (invariants)
|
|
183
|
+
|
|
184
|
+
Check the final output for logical consistency:
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
import { validators } from 'llmbic';
|
|
188
|
+
|
|
189
|
+
const { field, crossField } = validators<MySchemaShape>();
|
|
190
|
+
|
|
191
|
+
const extractor = createExtractor({
|
|
192
|
+
schema: MySchema,
|
|
193
|
+
rules: [...],
|
|
194
|
+
validators: [
|
|
195
|
+
field('price', 'price_positive', (v) => v === null || v > 0, 'Price must be positive'),
|
|
196
|
+
crossField('date_format', (d) => !d.date || /^\d{4}-\d{2}-\d{2}$/.test(d.date), 'Date must be YYYY-MM-DD'),
|
|
197
|
+
],
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
result.validation;
|
|
201
|
+
// { valid: true, violations: [] }
|
|
202
|
+
// or { valid: false, violations: [{ field: 'price', rule: 'price_positive', message: '...', severity: 'error' }] }
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Writing a provider
|
|
206
|
+
|
|
207
|
+
Llmbic does not ship vendor-specific adapters. The `LlmProvider` contract is a single method — wiring to any backend (OpenAI, Anthropic, Ollama, vLLM, Gemini, custom HTTP, ...) is ~10 lines you write and own.
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
import type { LlmProvider } from 'llmbic';
|
|
211
|
+
|
|
212
|
+
const provider: LlmProvider = {
|
|
213
|
+
async complete(request) {
|
|
214
|
+
const response = await fetch('https://api.example.com/v1/complete', {
|
|
215
|
+
method: 'POST',
|
|
216
|
+
body: JSON.stringify({
|
|
217
|
+
system: request.systemPrompt,
|
|
218
|
+
user: request.userContent,
|
|
219
|
+
schema: request.responseSchema,
|
|
220
|
+
}),
|
|
221
|
+
});
|
|
222
|
+
const data = await response.json();
|
|
223
|
+
return { values: data.output };
|
|
224
|
+
},
|
|
225
|
+
};
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Ready-made snippets for common backends:
|
|
229
|
+
|
|
230
|
+
**OpenAI** (Chat Completions + Structured Outputs):
|
|
231
|
+
|
|
232
|
+
```typescript
|
|
233
|
+
const client = new OpenAI();
|
|
234
|
+
const provider: LlmProvider = {
|
|
235
|
+
async complete(request) {
|
|
236
|
+
const response = await client.chat.completions.create({
|
|
237
|
+
model: 'gpt-4o-mini',
|
|
238
|
+
messages: [
|
|
239
|
+
{ role: 'system', content: request.systemPrompt },
|
|
240
|
+
{ role: 'user', content: request.userContent },
|
|
241
|
+
],
|
|
242
|
+
response_format: {
|
|
243
|
+
type: 'json_schema',
|
|
244
|
+
json_schema: { name: 'extraction', strict: true, schema: request.responseSchema },
|
|
245
|
+
},
|
|
246
|
+
});
|
|
247
|
+
return { values: JSON.parse(response.choices[0].message.content!) };
|
|
248
|
+
},
|
|
249
|
+
};
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
**Anthropic** (Messages API + forced tool):
|
|
253
|
+
|
|
254
|
+
```typescript
|
|
255
|
+
const client = new Anthropic();
|
|
256
|
+
const provider: LlmProvider = {
|
|
257
|
+
async complete(request) {
|
|
258
|
+
const response = await client.messages.create({
|
|
259
|
+
model: 'claude-haiku-4-5-20251001',
|
|
260
|
+
max_tokens: 4096,
|
|
261
|
+
system: request.systemPrompt,
|
|
262
|
+
messages: [{ role: 'user', content: request.userContent }],
|
|
263
|
+
tools: [{ name: 'extraction', input_schema: request.responseSchema }],
|
|
264
|
+
tool_choice: { type: 'tool', name: 'extraction' },
|
|
265
|
+
});
|
|
266
|
+
const toolUse = response.content.find((b) => b.type === 'tool_use');
|
|
267
|
+
return { values: toolUse!.input as Record<string, unknown> };
|
|
268
|
+
},
|
|
269
|
+
};
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Ollama** (native `format` — JSON Schema, requires Ollama 0.5+):
|
|
273
|
+
|
|
274
|
+
```typescript
|
|
275
|
+
const client = new Ollama();
|
|
276
|
+
const provider: LlmProvider = {
|
|
277
|
+
async complete(request) {
|
|
278
|
+
const response = await client.chat({
|
|
279
|
+
model: 'llama3.1',
|
|
280
|
+
messages: [
|
|
281
|
+
{ role: 'system', content: request.systemPrompt },
|
|
282
|
+
{ role: 'user', content: request.userContent },
|
|
283
|
+
],
|
|
284
|
+
format: request.responseSchema,
|
|
285
|
+
});
|
|
286
|
+
return { values: JSON.parse(response.message.content) };
|
|
287
|
+
},
|
|
288
|
+
};
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Observability (token usage, latency, cost accounting) is out of scope — wrap the `complete` call in whatever telemetry you already use.
|
|
292
|
+
|
|
293
|
+
## Design decisions
|
|
294
|
+
|
|
295
|
+
- **One dependency** — Zod only. No vendor SDK ever enters the import graph; you bring your own LLM provider (see "Writing a provider").
|
|
296
|
+
- **No retry** — If the LLM returns invalid data, `parse()` does best-effort parsing (valid fields kept, invalid ignored). Retry is an orchestration concern.
|
|
297
|
+
- **No streaming** — Llmbic works with complete results. Streaming is a transport concern.
|
|
298
|
+
- **No chunking** — One content = one extraction. If your content is too long, split it before calling Llmbic.
|
|
299
|
+
- **Normalizers mutate** — For pragmatic reasons, normalizers receive and return the same object. The `merge()` function copies the data first, so the original is never modified.
|
|
300
|
+
- **Rules are sync** — Extraction rules are pure synchronous functions. If you need async lookups, do them before creating the rule.
|
|
301
|
+
|
|
302
|
+
## API reference
|
|
303
|
+
|
|
304
|
+
### `createExtractor(config)`
|
|
305
|
+
|
|
306
|
+
Creates an extractor instance. Config:
|
|
307
|
+
|
|
308
|
+
| Field | Type | Required | Description |
|
|
309
|
+
|-------|------|----------|-------------|
|
|
310
|
+
| `schema` | `ZodObject` | yes | Output schema |
|
|
311
|
+
| `rules` | `ExtractionRule[]` | yes | Deterministic extraction rules |
|
|
312
|
+
| `llm` | `{ provider, systemPrompt, defaultConfidence? }` | no | LLM configuration. Omit for rules-only mode. |
|
|
313
|
+
| `normalizers` | `Normalizer[]` | no | Post-merge transformations |
|
|
314
|
+
| `validators` | `Validator[]` | no | Output invariants |
|
|
315
|
+
| `conflictStrategy` | `'flag' \| 'prefer-rule' \| 'prefer-llm'` | no | Default: `'flag'` |
|
|
316
|
+
| `logger` | `Logger` | no | Injectable logger (compatible with Pino, Winston, console) |
|
|
317
|
+
|
|
318
|
+
### `rule(field, extractFn)`
|
|
319
|
+
|
|
320
|
+
Factory to create an `ExtractionRule`.
|
|
321
|
+
|
|
322
|
+
### `confidence(value, score)`
|
|
323
|
+
|
|
324
|
+
Factory to create a `RuleMatch` with a confidence score.
|
|
325
|
+
|
|
326
|
+
### `validators<T>()`
|
|
327
|
+
|
|
328
|
+
Factory bound to the data shape `T`. Returns `{ field, crossField }`:
|
|
329
|
+
|
|
330
|
+
- `field(name, rule, checkFn, message, severity?)` — single-field validator. `checkFn` receives the precise type of the field (`T[name]`).
|
|
331
|
+
- `crossField(rule, checkFn, message, severity?)` — whole-object validator, produces a violation without a `field` property.
|
|
332
|
+
|
|
333
|
+
Binding `T` once lets TypeScript infer each field's type from the field name, so predicates are fully typed without manual annotations.
|
|
334
|
+
|
|
335
|
+
### Extractor methods
|
|
336
|
+
|
|
337
|
+
| Method | Sync | Description |
|
|
338
|
+
|--------|------|-------------|
|
|
339
|
+
| `extract(content)` | async | Full pipeline: rules → LLM → merge → validate |
|
|
340
|
+
| `extractSync(content)` | sync | Rules only. Returns partial result + missing fields. |
|
|
341
|
+
| `prompt(content, partial)` | sync | Builds LLM prompt for missing fields only. |
|
|
342
|
+
| `parse(raw)` | sync | Parses raw LLM JSON response. |
|
|
343
|
+
| `merge(partial, llmResult, content)` | sync | Merges rules + LLM, detects conflicts, normalizes, validates. |
|
|
344
|
+
|
|
345
|
+
## License
|
|
346
|
+
|
|
347
|
+
MIT
|
|
348
|
+
|
|
349
|
+
## Contributing
|
|
350
|
+
|
|
351
|
+
See [CONTRIBUTING.md](./CONTRIBUTING.md).
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { Extractor, ExtractorConfig } from './types/extractor.types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Bind a schema, deterministic rules and an optional LLM fallback into an
|
|
5
|
+
* {@link Extractor}. The returned object exposes the extraction pipeline as
|
|
6
|
+
* pre-configured methods; call sites stop having to thread `schema`,
|
|
7
|
+
* `rules` and provider wiring through every step.
|
|
8
|
+
*
|
|
9
|
+
* {@link Extractor.extract} runs {@link rule.apply}, then — when an LLM is
|
|
10
|
+
* configured and some fields are still missing — asks the provider for those
|
|
11
|
+
* fields only, parses the response with {@link prompt.parse} and fuses
|
|
12
|
+
* everything through {@link merge.apply}.
|
|
13
|
+
*
|
|
14
|
+
* @typeParam S - A Zod object schema describing the target data shape.
|
|
15
|
+
* @param config - Schema, deterministic rules, and optional LLM fallback.
|
|
16
|
+
* @returns An {@link Extractor} bound to `config.schema`.
|
|
17
|
+
*/
|
|
18
|
+
export declare function createExtractor<S extends z.ZodObject<z.ZodRawShape>>(config: ExtractorConfig<S>): Extractor<z.infer<S>>;
|
|
19
|
+
//# sourceMappingURL=extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAI7B,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AA8C7E;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,eAAe,CAAC,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EAClE,MAAM,EAAE,eAAe,CAAC,CAAC,CAAC,GACzB,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAuDvB"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { rule } from './rules.js';
|
|
2
|
+
import { merge } from './merge.js';
|
|
3
|
+
import { prompt } from './prompt.js';
|
|
4
|
+
/**
|
|
5
|
+
* Reconstruct a {@link RulesResult} from a `partial` that was previously
|
|
6
|
+
* produced by `extractSync` (i.e. a rules-only merge). Avoids re-running the
|
|
7
|
+
* rules during a deferred {@link Extractor.merge} call, since the partial
|
|
8
|
+
* already carries every rule value and its confidence.
|
|
9
|
+
*/
|
|
10
|
+
function rulesResultFromPartial(partial, allFields) {
|
|
11
|
+
const values = {};
|
|
12
|
+
const confidence = {};
|
|
13
|
+
for (const field of allFields) {
|
|
14
|
+
const value = partial.data[field];
|
|
15
|
+
if (value === null) {
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
values[field] = value;
|
|
19
|
+
const fieldConfidence = partial.confidence[field];
|
|
20
|
+
if (fieldConfidence !== null) {
|
|
21
|
+
confidence[field] = fieldConfidence;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return { values, confidence, missing: [...partial.missing] };
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Stamp `result.meta.durationMs` with the wall-clock elapsed since `startedAt`.
|
|
28
|
+
* Used by every {@link Extractor} method that returns an {@link ExtractionResult},
|
|
29
|
+
* so consumers see real timings instead of the placeholder `0` left by
|
|
30
|
+
* {@link merge.apply}.
|
|
31
|
+
*/
|
|
32
|
+
function stampDuration(result, startedAt) {
|
|
33
|
+
return {
|
|
34
|
+
...result,
|
|
35
|
+
meta: { ...result.meta, durationMs: performance.now() - startedAt },
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Bind a schema, deterministic rules and an optional LLM fallback into an
|
|
40
|
+
* {@link Extractor}. The returned object exposes the extraction pipeline as
|
|
41
|
+
* pre-configured methods; call sites stop having to thread `schema`,
|
|
42
|
+
* `rules` and provider wiring through every step.
|
|
43
|
+
*
|
|
44
|
+
* {@link Extractor.extract} runs {@link rule.apply}, then — when an LLM is
|
|
45
|
+
* configured and some fields are still missing — asks the provider for those
|
|
46
|
+
* fields only, parses the response with {@link prompt.parse} and fuses
|
|
47
|
+
* everything through {@link merge.apply}.
|
|
48
|
+
*
|
|
49
|
+
* @typeParam S - A Zod object schema describing the target data shape.
|
|
50
|
+
* @param config - Schema, deterministic rules, and optional LLM fallback.
|
|
51
|
+
* @returns An {@link Extractor} bound to `config.schema`.
|
|
52
|
+
*/
|
|
53
|
+
export function createExtractor(config) {
|
|
54
|
+
const allFields = Object.keys(config.schema.shape);
|
|
55
|
+
if (allFields.length === 0) {
|
|
56
|
+
throw new Error('createExtractor: schema must declare at least one field');
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
async extract(content) {
|
|
60
|
+
const startedAt = performance.now();
|
|
61
|
+
const rulesResult = rule.apply(content, config.rules, config.schema);
|
|
62
|
+
const partial = merge.apply(config.schema, rulesResult, null, content);
|
|
63
|
+
if (!config.llm || partial.missing.length === 0) {
|
|
64
|
+
return stampDuration(partial, startedAt);
|
|
65
|
+
}
|
|
66
|
+
const request = prompt.build(config.schema, partial, content, {
|
|
67
|
+
systemPrompt: config.llm.systemPrompt,
|
|
68
|
+
});
|
|
69
|
+
const completion = await config.llm.provider.complete(request);
|
|
70
|
+
const llmResult = prompt.parse(config.schema, partial.missing, completion.values);
|
|
71
|
+
const final = merge.apply(config.schema, rulesResult, llmResult, content);
|
|
72
|
+
return stampDuration(final, startedAt);
|
|
73
|
+
},
|
|
74
|
+
extractSync(content) {
|
|
75
|
+
const startedAt = performance.now();
|
|
76
|
+
const rulesResult = rule.apply(content, config.rules, config.schema);
|
|
77
|
+
const partial = merge.apply(config.schema, rulesResult, null, content);
|
|
78
|
+
return stampDuration(partial, startedAt);
|
|
79
|
+
},
|
|
80
|
+
prompt(content, partial) {
|
|
81
|
+
return prompt.build(config.schema, partial, content, {
|
|
82
|
+
systemPrompt: config.llm?.systemPrompt,
|
|
83
|
+
});
|
|
84
|
+
},
|
|
85
|
+
parse(raw) {
|
|
86
|
+
return prompt.parse(config.schema, allFields, raw);
|
|
87
|
+
},
|
|
88
|
+
merge(partial, llmResult, content) {
|
|
89
|
+
const startedAt = performance.now();
|
|
90
|
+
const rulesResult = rulesResultFromPartial(partial, allFields);
|
|
91
|
+
const result = merge.apply(config.schema, rulesResult, llmResult, content);
|
|
92
|
+
return stampDuration(result, startedAt);
|
|
93
|
+
},
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAKrC;;;;;GAKG;AACH,SAAS,sBAAsB,CAC7B,OAA4B,EAC5B,SAA+B;IAE/B,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAqC,EAAE,CAAC;IACxD,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACnB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,KAAK,CAAC,GAAG,KAAmB,CAAC;QACpC,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAClD,IAAI,eAAe,KAAK,IAAI,EAAE,CAAC;YAC7B,UAAU,CAAC,KAAK,CAAC,GAAG,eAAe,CAAC;QACtC,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;AAC/D,CAAC;AAED;;;;;GAKG;AACH,SAAS,aAAa,CACpB,MAA2B,EAC3B,SAAiB;IAEjB,OAAO;QACL,GAAG,MAAM;QACT,IAAI,EAAE,EAAE,GAAG,MAAM,CAAC,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE;KACpE,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,UAAU,eAAe,CAC7B,MAA0B;IAG1B,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;IAErE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IAED,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,OAAO;YACnB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrE,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;YAEvE,IAAI,CAAC,MAAM,CAAC,GAAG,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAChD,OAAO,aAAa,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC3C,CAAC;YAED,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE;gBAC5D,YAAY,EAAE,MAAM,CAAC,GAAG,CAAC,YAAY;aACtC,CAAC,CAAC;YACH,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAC/D,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAC5B,MAAM,CAAC,MAAM,EACb,OAAO,CAAC,OAAO,EACf,UAAU,CAAC,MAAM,CAClB,CAAC;YACF,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC1E,OAAO,aAAa,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACzC,CAAC;QAED,WAAW,CAAC,OAAO;YACjB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrE,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;YACvE,OAAO,aAAa,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,CAAC,OAAO,EAAE,OAAO;YACrB,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE;gBACnD,YAAY,EAAE,MAAM,CAAC,GAAG,EAAE,YAAY;aACvC,CAAC,CAAC;QACL,CAAC;QAED,KAAK,CAAC,GAAG;YACP,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;QACrD,CAAC;QAED,KAAK,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO;YAC/B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,sBAAsB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC/D,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YAC3E,OAAO,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Llmbic — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* Re-exports the five namespaces that make up the library (`createExtractor`,
|
|
5
|
+
* `rule`, `merge`, `prompt`, `validator`) and every public type consumers need
|
|
6
|
+
* to describe schemas, rules, providers, normalizers and validators.
|
|
7
|
+
*
|
|
8
|
+
* Llmbic does not ship vendor-specific provider adapters: the {@link LlmProvider}
|
|
9
|
+
* contract is a single-method interface, consumers implement it in ~10 lines
|
|
10
|
+
* using whichever SDK or HTTP client they prefer (see README).
|
|
11
|
+
*/
|
|
12
|
+
export { createExtractor } from './extractor.js';
|
|
13
|
+
export { rule } from './rules.js';
|
|
14
|
+
export { merge } from './merge.js';
|
|
15
|
+
export { prompt } from './prompt.js';
|
|
16
|
+
export { validator } from './validate.js';
|
|
17
|
+
export type { ExtractionRule, RuleMatch, RulesResult, } from './types/rule.types.js';
|
|
18
|
+
export type { Extractor, ExtractorConfig, ExtractorLlmConfig, } from './types/extractor.types.js';
|
|
19
|
+
export type { LlmRequest } from './types/prompt.types.js';
|
|
20
|
+
export type { LlmProvider } from './types/provider.types.js';
|
|
21
|
+
export type { Logger } from './types/logger.types.js';
|
|
22
|
+
export type { Severity, Violation, Validator, } from './types/validate.types.js';
|
|
23
|
+
export type { Conflict, ConflictStrategy, ExtractedData, ExtractionMeta, ExtractionResult, FieldCompare, FieldMergePolicy, FieldMergeResult, LlmResult, MergeApplyOptions, Normalizer, ValidationResult, } from './types/merge.types.js';
|
|
24
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC;AAE1C,YAAY,EACV,cAAc,EACd,SAAS,EACT,WAAW,GACZ,MAAM,uBAAuB,CAAC;AAE/B,YAAY,EACV,SAAS,EACT,eAAe,EACf,kBAAkB,GACnB,MAAM,4BAA4B,CAAC;AAEpC,YAAY,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAC1D,YAAY,EAAE,WAAW,EAAE,MAAM,2BAA2B,CAAC;AAC7D,YAAY,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AAEtD,YAAY,EACV,QAAQ,EACR,SAAS,EACT,SAAS,GACV,MAAM,2BAA2B,CAAC;AAEnC,YAAY,EACV,QAAQ,EACR,gBAAgB,EAChB,aAAa,EACb,cAAc,EACd,gBAAgB,EAChB,YAAY,EACZ,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EACjB,UAAU,EACV,gBAAgB,GACjB,MAAM,wBAAwB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Llmbic — public entry point.
|
|
3
|
+
*
|
|
4
|
+
* Re-exports the five namespaces that make up the library (`createExtractor`,
|
|
5
|
+
* `rule`, `merge`, `prompt`, `validator`) and every public type consumers need
|
|
6
|
+
* to describe schemas, rules, providers, normalizers and validators.
|
|
7
|
+
*
|
|
8
|
+
* Llmbic does not ship vendor-specific provider adapters: the {@link LlmProvider}
|
|
9
|
+
* contract is a single-method interface, consumers implement it in ~10 lines
|
|
10
|
+
* using whichever SDK or HTTP client they prefer (see README).
|
|
11
|
+
*/
|
|
12
|
+
export { createExtractor } from './extractor.js';
|
|
13
|
+
export { rule } from './rules.js';
|
|
14
|
+
export { merge } from './merge.js';
|
|
15
|
+
export { prompt } from './prompt.js';
|
|
16
|
+
export { validator } from './validate.js';
|
|
17
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,SAAS,EAAE,MAAM,eAAe,CAAC"}
|
package/dist/merge.d.ts
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { Logger } from './types/logger.types.js';
|
|
3
|
+
import type { RuleMatch, RulesResult } from './types/rule.types.js';
|
|
4
|
+
import type { ExtractionResult, FieldMergePolicy, FieldMergeResult, LlmResult, MergeApplyOptions } from './types/merge.types.js';
|
|
5
|
+
/**
|
|
6
|
+
* Field-level and object-level merge primitives.
|
|
7
|
+
*
|
|
8
|
+
* For now, only {@link merge.field} is exposed; the top-level object merge
|
|
9
|
+
* will be added in a later slice.
|
|
10
|
+
*/
|
|
11
|
+
export declare const merge: {
|
|
12
|
+
/**
|
|
13
|
+
* Library defaults applied by {@link merge.field} when the caller omits
|
|
14
|
+
* one or more policy fields. Exposed so consumers can reference or spread
|
|
15
|
+
* them (e.g. `{ ...merge.defaultFieldPolicy, strategy: 'prefer-llm' }`).
|
|
16
|
+
*
|
|
17
|
+
* See {@link FieldMergePolicy} for the meaning of each field.
|
|
18
|
+
*/
|
|
19
|
+
defaultFieldPolicy: {
|
|
20
|
+
/** See {@link FieldMergePolicy.strategy}. */
|
|
21
|
+
strategy: "flag";
|
|
22
|
+
/** See {@link FieldMergePolicy.defaultLlmConfidence}. */
|
|
23
|
+
defaultLlmConfidence: number;
|
|
24
|
+
/** See {@link FieldMergePolicy.flaggedConfidence}. */
|
|
25
|
+
flaggedConfidence: number;
|
|
26
|
+
/** See {@link FieldMergePolicy.agreementConfidence}. */
|
|
27
|
+
agreementConfidence: number;
|
|
28
|
+
/** See {@link FieldMergePolicy.compare}. Case-insensitive for strings, strict equality otherwise. */
|
|
29
|
+
compare: (a: unknown, b: unknown) => boolean;
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* Fuse a rule match and an LLM value for a single field, following the
|
|
33
|
+
* provided policy. Returns the kept value, its confidence, and a conflict
|
|
34
|
+
* record if the strategy flagged a disagreement.
|
|
35
|
+
*
|
|
36
|
+
* Any policy field omitted from `policy` falls back to
|
|
37
|
+
* {@link merge.defaultFieldPolicy}.
|
|
38
|
+
*
|
|
39
|
+
* Decision table (in order): rule-only, llm-only, both-null, agree,
|
|
40
|
+
* prefer-rule, prefer-llm, flag (default fallback).
|
|
41
|
+
*
|
|
42
|
+
* @typeParam T - Type of the rule value.
|
|
43
|
+
* @param field - Name of the field being merged.
|
|
44
|
+
* @param ruleMatch - Value proposed by a deterministic rule, or `null` if none.
|
|
45
|
+
* @param llmValue - Value proposed by the LLM, or `null` if none. Cast to `T`
|
|
46
|
+
* without runtime type-check — callers that expose `merge.field` via
|
|
47
|
+
* `merge.apply` rely on the final Zod re-validation to reject invalid LLM values.
|
|
48
|
+
* @param policy - Optional strategy and confidence overrides.
|
|
49
|
+
* @param logger - Optional logger notified of unexpected runtime situations
|
|
50
|
+
* (e.g. an unknown strategy slipped past the type system).
|
|
51
|
+
*/
|
|
52
|
+
field<T>(field: string, ruleMatch: RuleMatch<T> | null, llmValue: unknown, policy?: Partial<FieldMergePolicy>, logger?: Logger): FieldMergeResult<T>;
|
|
53
|
+
/**
|
|
54
|
+
* Walk every field of `schema`, fuse the rules pass result with the LLM
|
|
55
|
+
* result via {@link merge.field}, and produce a typed
|
|
56
|
+
* {@link ExtractionResult}.
|
|
57
|
+
*
|
|
58
|
+
* Passing `llmResult = null` runs in rules-only mode: every field keeps
|
|
59
|
+
* whatever the rules produced and `meta.llmCalled` is `false`.
|
|
60
|
+
*
|
|
61
|
+
* Orchestration only — the three phases (fusion, normalization, validation)
|
|
62
|
+
* each live in their own private helper above.
|
|
63
|
+
*
|
|
64
|
+
* Runtime fields of `meta` (`durationMs`, `tokensUsed`) are populated by
|
|
65
|
+
* later slices; for now `durationMs` is `0`.
|
|
66
|
+
*
|
|
67
|
+
* @typeParam S - A Zod object schema.
|
|
68
|
+
* @param schema - Zod object schema describing the target data shape.
|
|
69
|
+
* @param rulesResult - Output of {@link rule.apply} for the same schema.
|
|
70
|
+
* @param llmResult - Parsed LLM response, or `null` for rules-only mode.
|
|
71
|
+
* @param content - Original text the rules and LLM were derived from; forwarded to normalizers so they can cross-reference the source.
|
|
72
|
+
* @param options - Optional behavior overrides (policy, normalizers, validators, logger).
|
|
73
|
+
*/
|
|
74
|
+
apply<S extends z.ZodObject<z.ZodRawShape>>(schema: S, rulesResult: RulesResult<z.infer<S>>, llmResult: LlmResult | null, content: string, options?: MergeApplyOptions<z.infer<S>>): ExtractionResult<z.infer<S>>;
|
|
75
|
+
};
|
|
76
|
+
//# sourceMappingURL=merge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpE,OAAO,KAAK,EAGV,gBAAgB,EAChB,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,iBAAiB,EAElB,MAAM,wBAAwB,CAAC;AA+GhC;;;;;GAKG;AACH,eAAO,MAAM,KAAK;IAChB;;;;;;OAMG;;QAED,6CAA6C;;QAE7C,yDAAyD;;QAEzD,sDAAsD;;QAEtD,wDAAwD;;QAExD,qGAAqG;qBACxF,OAAO,KAAK,OAAO,KAAG,OAAO;;IAQ5C;;;;;;;;;;;;;;;;;;;;OAoBG;UACG,CAAC,SACE,MAAM,aACF,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,YACpB,OAAO,WACR,OAAO,CAAC,gBAAgB,CAAC,WACzB,MAAM,GACd,gBAAgB,CAAC,CAAC,CAAC;IAgEtB;;;;;;;;;;;;;;;;;;;;OAoBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,UAChC,CAAC,eACI,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aACzB,SAAS,GAAG,IAAI,WAClB,MAAM,YACL,iBAAiB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GACtC,gBAAgB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CAmChC,CAAC"}
|