llmbic 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +36 -7
- package/README.md +74 -32
- package/dist/extractor.d.ts +3 -1
- package/dist/extractor.d.ts.map +1 -1
- package/dist/extractor.js +11 -9
- package/dist/extractor.js.map +1 -1
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/dist/merge.d.ts +6 -3
- package/dist/merge.d.ts.map +1 -1
- package/dist/merge.js +13 -8
- package/dist/merge.js.map +1 -1
- package/dist/prompt.d.ts +2 -2
- package/dist/prompt.js +3 -3
- package/dist/rules.d.ts +18 -5
- package/dist/rules.d.ts.map +1 -1
- package/dist/rules.js +17 -4
- package/dist/rules.js.map +1 -1
- package/dist/types/extractor.types.d.ts +27 -14
- package/dist/types/extractor.types.d.ts.map +1 -1
- package/dist/types/logger.types.d.ts +1 -1
- package/dist/types/merge.types.d.ts +18 -6
- package/dist/types/merge.types.d.ts.map +1 -1
- package/dist/types/provider.types.d.ts +1 -1
- package/dist/types/rule.types.d.ts +18 -3
- package/dist/types/rule.types.d.ts.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,35 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.4.0] - 2026-04-18
|
|
9
|
+
|
|
10
|
+
Non-breaking. Normalizers can now read the same caller-provided `context` the rules see, so post-merge cross-field fix-ups no longer have to be closed over at extractor-declaration time. Typical use: a normalizer that reconciles extracted fields against the `sourceUrl` or per-tenant configuration passed to `extract`.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `Normalizer<T, TContext = unknown>` - second optional generic type parameter describing the shape of the per-call context forwarded to the normalizer's third argument. Defaults to `unknown`, so context-unaware normalizers and legacy code compile unchanged.
|
|
15
|
+
- `Normalizer(data, content, context?)` - third optional argument, left `undefined` when the caller passes no context.
|
|
16
|
+
- `MergeApplyOptions<T, TContext = unknown>` - second optional generic parameter shared with `Normalizer`, surfacing as `normalizers?: Normalizer<T, TContext>[]`.
|
|
17
|
+
- `merge.apply<S, TContext>(schema, rulesResult, llmResult, content, options?, context?)` - sixth optional argument forwarded verbatim to every normalizer.
|
|
18
|
+
- `ExtractorConfig<S, TContext>.normalizers` now types as `Normalizer<z.infer<S>, TContext>[]`, so the context flowing through rules reaches normalizers with the same compile-time shape.
|
|
19
|
+
- `Extractor.merge(partial, llmResult, content, context?)` - fourth optional argument. Rules are still not re-evaluated, but normalizers run here too; accepting `context` keeps them consistent with `Extractor.extract` / `Extractor.extractSync`.
|
|
20
|
+
|
|
21
|
+
## [1.3.0] - 2026-04-18
|
|
22
|
+
|
|
23
|
+
Non-breaking. Rules can now read a caller-provided `context` object alongside `content`, so per-call metadata (locale, tenant configuration, feature flags) no longer has to be captured in rule closures at declaration time.
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- `ExtractionRule<TContext = unknown>` - optional generic type parameter describing the shape of a per-call context forwarded to `extract`. Defaults to `unknown`, so context-unaware rules and legacy code compile unchanged.
|
|
28
|
+
- `ExtractionRule.extract(content, context?)` - second optional argument, forwarded verbatim by `rule.apply` / `Extractor.extract` / `Extractor.extractSync`. Left `undefined` when the caller passes no context.
|
|
29
|
+
- `rule.create<TContext>(field, extract, options?)` - `create` is now generic over `TContext`, so typed contexts flow from the callback signature to the returned `ExtractionRule<TContext>`. `rule.regex` stays context-unaware and remains assignable to any `ExtractionRule<TContext>[]` via contextual parameter contravariance.
|
|
30
|
+
- `rule.apply<S, TContext>(content, rules, schema, logger?, context?)` - fifth optional argument passed through to every rule's `extract` callback.
|
|
31
|
+
- `ExtractorConfig<S, TContext = unknown>` and `Extractor<T, TContext = unknown>` - second optional generic parameter shared with the rules array. `Extractor.extract(content, context?)` and `Extractor.extractSync(content, context?)` forward `context` to `rule.apply`. `Extractor.merge` does not re-evaluate rules and accepts no `context`.
|
|
32
|
+
|
|
33
|
+
### Docs
|
|
34
|
+
|
|
35
|
+
- `CONTRIBUTING.md` - documents the 5-step release procedure (tests, bump, doc, tag, publish) to keep future releases aligned with the SemVer + npm publish lifecycle.
|
|
36
|
+
|
|
8
37
|
## [1.2.0] - 2026-04-16
|
|
9
38
|
|
|
10
39
|
Non-breaking. Production-readiness pass: per-field provenance, per-field merge policy, and pre/post LLM transformers. Token / cost tracking deliberately stays out of scope - `LlmProvider` keeps observability as a caller concern; wrap your `complete` for telemetry.
|
|
@@ -46,18 +75,18 @@ Non-breaking. Unblocks hybrid workflows that rely on nested schemas, agreement/c
|
|
|
46
75
|
- `ExtractorConfig<S>` gains optional `normalizers`, `validators`, `policy`, `logger`.
|
|
47
76
|
- `ExtractorLlmConfig` gains optional `mode`, `crossCheckHints`.
|
|
48
77
|
|
|
49
|
-
## [1.0.0]
|
|
78
|
+
## [1.0.0] - 2026-04-15
|
|
50
79
|
|
|
51
80
|
Initial public release.
|
|
52
81
|
|
|
53
82
|
### Added
|
|
54
83
|
|
|
55
|
-
- `createExtractor(config)`
|
|
56
|
-
- `rule` namespace
|
|
57
|
-
- `merge` namespace
|
|
58
|
-
- `prompt` namespace
|
|
59
|
-
- `validator` namespace
|
|
60
|
-
- `LlmProvider` contract
|
|
84
|
+
- `createExtractor(config)` - factory binding a Zod schema, deterministic rules and an optional LLM fallback into an extractor with `extract`, `extractSync`, `prompt`, `parse` and `merge` methods. Covers both one-shot async extraction and 4-step batch flows (extractSync -> prompt -> external LLM call -> parse -> merge).
|
|
85
|
+
- `rule` namespace - `rule.create(field, extractFn)`, `rule.regex(field, pattern, score, transform?)`, `rule.confidence(value, score)`, `rule.apply(content, rules, schema, logger?)`. Deterministic rules are pure synchronous functions returning typed matches with a confidence score in `[0, 1]`.
|
|
86
|
+
- `merge` namespace - `merge.apply(schema, rulesResult, llmResult, content, options?)` fuses rules output with LLM output, detects per-field conflicts, runs normalizers, re-validates against the Zod schema, and runs custom validators. `merge.defaultFieldPolicy` exposes the built-in fusion rules.
|
|
87
|
+
- `prompt` namespace - `prompt.build(schema, partial, options?)` emits an `LlmRequest` (`systemPrompt`, `userContent`, `responseSchema`, `knownValues`) restricted to fields missing from the deterministic pass. `prompt.parse(raw, missing, schema)` is a permissive parser that validates each field individually via Zod, drops invalid or unexpected keys, and never throws.
|
|
88
|
+
- `validator` namespace - `validator.of<T>()` returns `{ field, crossField }` factories bound to the data shape `T`, so predicates are fully typed from the field name.
|
|
89
|
+
- `LlmProvider` contract - single-method interface (`complete(request) -> { values }`) consumers implement to wire any backend (OpenAI, Anthropic, Ollama, custom HTTP, ...). No vendor SDK is pulled into the import graph.
|
|
61
90
|
- Per-field confidence scoring, conflict detection (`flag` / `prefer-rule` / `prefer-llm` strategies), and extraction metadata (`durationMs`, rule/LLM field counts).
|
|
62
91
|
- Full TypeScript `.d.ts` output with JSDoc on every public type, method and configuration field.
|
|
63
92
|
- Example wiring a local Ollama runtime under `examples/ollama.ts`.
|
package/README.md
CHANGED
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
[](./LICENSE)
|
|
6
6
|
[](https://nodejs.org)
|
|
7
7
|
|
|
8
|
-
Hybrid data extraction
|
|
8
|
+
Hybrid data extraction - deterministic rules + LLM fallback, with per-field confidence scoring.
|
|
9
9
|
|
|
10
10
|
The name folds **LLM** into [*lambic*](https://en.wikipedia.org/wiki/Lambic), the Belgian beer made by blending wild fermentation with a controlled process. Same idea here: LLMs are unpredictable, rules are rigid, and the mix produces something reliable.
|
|
11
11
|
|
|
12
12
|
## Why
|
|
13
13
|
|
|
14
|
-
Extracting structured data from unstructured text is a solved problem
|
|
14
|
+
Extracting structured data from unstructured text is a solved problem - until you need it to be *reliable*. Rules (regex, parsers) are deterministic but brittle. LLMs understand context but hallucinate. Neither is enough alone.
|
|
15
15
|
|
|
16
16
|
Llmbic combines both: deterministic rules extract what they can with full confidence, the LLM fills in the gaps, and a merge layer detects conflicts between the two. Every field carries a confidence score. You know exactly what's trustworthy and what needs review.
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ Llmbic combines both: deterministic rules extract what they can with full confid
|
|
|
21
21
|
npm install llmbic
|
|
22
22
|
```
|
|
23
23
|
|
|
24
|
-
Llmbic has a single dependency: [Zod](https://zod.dev). No vendor SDK is pulled in
|
|
24
|
+
Llmbic has a single dependency: [Zod](https://zod.dev). No vendor SDK is pulled in - you bring your own LLM provider via the 1-method `LlmProvider` interface (see "Writing a provider" below).
|
|
25
25
|
|
|
26
26
|
## Quick start
|
|
27
27
|
|
|
@@ -111,7 +111,7 @@ console.log(result.confidence);
|
|
|
111
111
|
// { total: 1.0, currency: 1.0, vendor: 0.7, date: 0.7 }
|
|
112
112
|
|
|
113
113
|
console.log(result.conflicts);
|
|
114
|
-
// []
|
|
114
|
+
// [] - no disagreement between rules and LLM
|
|
115
115
|
```
|
|
116
116
|
|
|
117
117
|
### Batch / async mode (for OpenAI Batch API, job queues, etc.)
|
|
@@ -119,19 +119,19 @@ console.log(result.conflicts);
|
|
|
119
119
|
When you manage the LLM call yourself (batching, polling, custom transport), use the 4-step API:
|
|
120
120
|
|
|
121
121
|
```typescript
|
|
122
|
-
// Step 1
|
|
122
|
+
// Step 1 - Deterministic extraction (sync, instant)
|
|
123
123
|
const partial = extractor.extractSync(markdown);
|
|
124
124
|
|
|
125
|
-
// Step 2
|
|
125
|
+
// Step 2 - Build the LLM request (you send it however you want)
|
|
126
126
|
const llmRequest = extractor.prompt(markdown, partial);
|
|
127
|
-
//
|
|
127
|
+
// -> { systemPrompt, userContent, responseSchema, knownValues }
|
|
128
128
|
|
|
129
129
|
// ... submit to OpenAI Batch API, poll later, get the response ...
|
|
130
130
|
|
|
131
|
-
// Step 3
|
|
131
|
+
// Step 3 - Parse the raw LLM response
|
|
132
132
|
const llmResult = extractor.parse(rawJsonResponse);
|
|
133
133
|
|
|
134
|
-
// Step 4
|
|
134
|
+
// Step 4 - Merge everything (fusion + conflict detection + validation)
|
|
135
135
|
const result = extractor.merge(partial, llmResult, markdown);
|
|
136
136
|
```
|
|
137
137
|
|
|
@@ -181,12 +181,12 @@ End-to-end runnable example (upload + poll + download + merge): [`examples/opena
|
|
|
181
181
|
|
|
182
182
|
### Per-field confidence scoring
|
|
183
183
|
|
|
184
|
-
Every field in the result carries a confidence score (0.0
|
|
184
|
+
Every field in the result carries a confidence score (0.0-1.0):
|
|
185
185
|
|
|
186
186
|
| Source | Confidence |
|
|
187
187
|
|--------|-----------|
|
|
188
188
|
| Deterministic rule, exact match | 1.0 |
|
|
189
|
-
| Deterministic rule, partial match | 0.7
|
|
189
|
+
| Deterministic rule, partial match | 0.7-0.9 (you decide) |
|
|
190
190
|
| LLM only | configurable default (0.7) |
|
|
191
191
|
| Rule + LLM agree | 1.0 |
|
|
192
192
|
| Rule + LLM disagree | 0.3 (flagged as conflict) |
|
|
@@ -223,7 +223,7 @@ result.conflicts;
|
|
|
223
223
|
// [{ field: 'total', ruleValue: 1250, ruleConfidence: 1.0, llmValue: 1520 }]
|
|
224
224
|
```
|
|
225
225
|
|
|
226
|
-
Three conflict strategies: `'flag'` (default
|
|
226
|
+
Three conflict strategies: `'flag'` (default - keep rule value, record conflict), `'prefer-rule'`, or `'prefer-llm'`.
|
|
227
227
|
|
|
228
228
|
In the default `'fill-gaps'` mode the LLM is only asked about fields the rules could not resolve, so conflicts are impossible. To actually trigger conflict detection, opt into cross-check (see below).
|
|
229
229
|
|
|
@@ -263,6 +263,35 @@ const extractor = createExtractor({
|
|
|
263
263
|
|
|
264
264
|
The merge step now sees two candidates per field and surfaces real disagreements through `result.conflicts`. `crossCheckHints: 'bias'` re-exposes the rule values as hints to save tokens, at the cost of confirmation bias (the LLM tends to agree with what it was shown).
|
|
265
265
|
|
|
266
|
+
### Per-call context
|
|
267
|
+
|
|
268
|
+
Rules can accept a second, caller-provided argument alongside `content` so they can read per-call metadata (locale, tenant configuration, feature flags) without having to capture it in a closure at declaration time:
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
import { createExtractor, rule } from 'llmbic';
|
|
272
|
+
import { z } from 'zod';
|
|
273
|
+
|
|
274
|
+
type Region = { region: 'us' | 'uk' };
|
|
275
|
+
|
|
276
|
+
const schema = z.object({ price: z.string() });
|
|
277
|
+
|
|
278
|
+
const priceRule = rule.create<Region>('price', (content, context) => {
|
|
279
|
+
const pattern = context?.region === 'uk' ? /£\s*(\d+)/ : /\$\s*(\d+)/;
|
|
280
|
+
const match = content.match(pattern);
|
|
281
|
+
return match ? rule.confidence(match[1]!, 1) : null;
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
const extractor = createExtractor<typeof schema, Region>({
|
|
285
|
+
schema,
|
|
286
|
+
rules: [priceRule],
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
await extractor.extract('Listed at £42', { region: 'uk' });
|
|
290
|
+
// -> { price: '42' }
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
The type parameter on `rule.create<TContext>` flows to the generic on `createExtractor<Schema, TContext>` and is enforced at every call site. `context` is forwarded verbatim to every rule's `extract` callback AND to every normalizer as their optional third argument; callbacks that ignore it still compile and work. `extractSync(content, context?)` behaves the same way for batch workflows. `Extractor.merge(partial, llmResult, content, context?)` does not re-evaluate rules (it reuses the partial's values) but normalizers still run, so it accepts the same optional `context`.
|
|
294
|
+
|
|
266
295
|
### Rich schemas
|
|
267
296
|
|
|
268
297
|
The JSON Schema handed to the LLM supports the Zod constructs that show up in real-world extraction targets:
|
|
@@ -274,22 +303,35 @@ The JSON Schema handed to the LLM supports the Zod constructs that show up in re
|
|
|
274
303
|
|
|
275
304
|
### Normalizers
|
|
276
305
|
|
|
277
|
-
Post-merge transformations. Run in sequence, receive the merged data + original content:
|
|
306
|
+
Post-merge transformations. Run in sequence, receive the merged data + original content, and optionally the same per-call `context` the rules see (see [Per-call context](#per-call-context)):
|
|
278
307
|
|
|
279
308
|
```typescript
|
|
280
|
-
|
|
309
|
+
type Ctx = { sourceUrl: string };
|
|
310
|
+
|
|
311
|
+
const extractor = createExtractor<typeof MySchema, Ctx>({
|
|
281
312
|
schema: MySchema,
|
|
282
313
|
rules: [...],
|
|
283
314
|
normalizers: [
|
|
284
315
|
(data, content) => {
|
|
285
|
-
//
|
|
316
|
+
// Rules/content-only normalizer: no context needed
|
|
286
317
|
if (data.price && data.price < 100) data.price = null;
|
|
287
318
|
return data;
|
|
288
319
|
},
|
|
320
|
+
(data, _content, context) => {
|
|
321
|
+
// Context-aware: cross-field fix-up that depends on where the content came from
|
|
322
|
+
if (context && !data.city && /\/liege\//.test(context.sourceUrl)) {
|
|
323
|
+
data.city = 'Liège';
|
|
324
|
+
}
|
|
325
|
+
return data;
|
|
326
|
+
},
|
|
289
327
|
],
|
|
290
328
|
});
|
|
329
|
+
|
|
330
|
+
await extractor.extract(markdown, { sourceUrl: 'https://example.be/liege/123' });
|
|
291
331
|
```
|
|
292
332
|
|
|
333
|
+
Context-unaware normalizers keep working unchanged - the third argument is optional and left `undefined` when the caller passes no context.
|
|
334
|
+
|
|
293
335
|
### Validators (invariants)
|
|
294
336
|
|
|
295
337
|
Check the final output for logical consistency:
|
|
@@ -339,7 +381,7 @@ Common patterns:
|
|
|
339
381
|
|
|
340
382
|
## Writing a provider
|
|
341
383
|
|
|
342
|
-
Llmbic does not ship vendor-specific adapters. The `LlmProvider` contract is a single method
|
|
384
|
+
Llmbic does not ship vendor-specific adapters. The `LlmProvider` contract is a single method - wiring to any backend (OpenAI, Anthropic, Ollama, vLLM, Gemini, custom HTTP, ...) is ~10 lines you write and own.
|
|
343
385
|
|
|
344
386
|
```typescript
|
|
345
387
|
import type { LlmProvider } from 'llmbic';
|
|
@@ -404,7 +446,7 @@ const provider: LlmProvider = {
|
|
|
404
446
|
};
|
|
405
447
|
```
|
|
406
448
|
|
|
407
|
-
**Ollama** (native `format`
|
|
449
|
+
**Ollama** (native `format` - JSON Schema, requires Ollama 0.5+):
|
|
408
450
|
|
|
409
451
|
```typescript
|
|
410
452
|
const client = new Ollama();
|
|
@@ -423,29 +465,29 @@ const provider: LlmProvider = {
|
|
|
423
465
|
};
|
|
424
466
|
```
|
|
425
467
|
|
|
426
|
-
Observability (token usage, latency, cost accounting) is out of scope
|
|
468
|
+
Observability (token usage, latency, cost accounting) is out of scope - wrap the `complete` call in whatever telemetry you already use.
|
|
427
469
|
|
|
428
470
|
## Design decisions
|
|
429
471
|
|
|
430
|
-
- **One dependency**
|
|
431
|
-
- **No retry**
|
|
432
|
-
- **No streaming**
|
|
433
|
-
- **No chunking**
|
|
434
|
-
- **Normalizers mutate**
|
|
435
|
-
- **Rules are sync**
|
|
472
|
+
- **One dependency** - Zod only. No vendor SDK ever enters the import graph; you bring your own LLM provider (see "Writing a provider").
|
|
473
|
+
- **No retry** - If the LLM returns invalid data, `parse()` does best-effort parsing (valid fields kept, invalid ignored). Retry is an orchestration concern.
|
|
474
|
+
- **No streaming** - Llmbic works with complete results. Streaming is a transport concern.
|
|
475
|
+
- **No chunking** - One content = one extraction. If your content is too long, split it before calling Llmbic.
|
|
476
|
+
- **Normalizers mutate** - For pragmatic reasons, normalizers receive and return the same object. The `merge()` function copies the data first, so the original is never modified.
|
|
477
|
+
- **Rules are sync** - Extraction rules are pure synchronous functions. If you need async lookups, do them before creating the rule.
|
|
436
478
|
|
|
437
479
|
## API reference
|
|
438
480
|
|
|
439
481
|
### `createExtractor(config)`
|
|
440
482
|
|
|
441
|
-
Creates an extractor instance. Config:
|
|
483
|
+
Creates an extractor instance. Signature: `createExtractor<S, TContext = unknown>(config)`. `TContext` is optional and describes the per-call context passed to `Extractor.extract(content, context?)`; see [Per-call context](#per-call-context). Config:
|
|
442
484
|
|
|
443
485
|
| Field | Type | Required | Description |
|
|
444
486
|
|-------|------|----------|-------------|
|
|
445
487
|
| `schema` | `ZodObject` | yes | Output schema (drives field enumeration and re-validation). |
|
|
446
|
-
| `rules` | `ExtractionRule[]` | yes | Deterministic extraction rules. |
|
|
488
|
+
| `rules` | `ExtractionRule<TContext>[]` | yes | Deterministic extraction rules. |
|
|
447
489
|
| `llm` | `ExtractorLlmConfig` | no | LLM fallback. Omit for rules-only mode. See below. |
|
|
448
|
-
| `normalizers` | `Normalizer<T>[]` | no | Post-merge transformations, run in declared order. |
|
|
490
|
+
| `normalizers` | `Normalizer<T, TContext>[]` | no | Post-merge transformations, run in declared order. Each normalizer receives `(data, content, context?)`; `TContext` is shared with the rules array. |
|
|
449
491
|
| `validators` | `Validator<ExtractedData<T>>[]` | no | Invariants populating `result.validation`. |
|
|
450
492
|
| `policy` | `Partial<FieldMergePolicy>` | no | Overrides the per-field merge policy (conflict strategy, confidence defaults, equality) for every field. |
|
|
451
493
|
| `policyByField` | `{ [K in keyof T]?: Partial<FieldMergePolicy> }` | no | Per-field overrides applied on top of `policy`. Precedence: defaults < `policy` < `policyByField[field]`. |
|
|
@@ -466,10 +508,10 @@ Creates an extractor instance. Config:
|
|
|
466
508
|
|
|
467
509
|
| Member | Signature | Description |
|
|
468
510
|
|---|---|---|
|
|
469
|
-
| `rule.create` |
|
|
511
|
+
| `rule.create` | `<TContext = unknown>(field, extract, options?) => ExtractionRule<TContext>` | Declare a rule. `extract(content, context?)` returns a `RuleMatch` or `null`. `options.id` sets the stable identifier surfaced in `result.sources`. `TContext` is inferred from the callback when present. |
|
|
470
512
|
| `rule.regex` | `(field, pattern, score, transform?, options?) => ExtractionRule` | Regex-based rule. On match, capture group 1 (or the full match) is fed to `transform`. `options.id` sets the stable identifier surfaced in `result.sources`. |
|
|
471
513
|
| `rule.confidence` | `(value, score) => RuleMatch` | Wrap a value and a confidence score; sugar for custom `extract` callbacks. |
|
|
472
|
-
| `rule.apply` |
|
|
514
|
+
| `rule.apply` | `<S, TContext = unknown>(content, rules, schema, logger?, context?) => RulesResult` | Run every rule, pick the highest-confidence match per field, type-check against the schema. `context` is forwarded verbatim to each rule's `extract` callback. |
|
|
473
515
|
|
|
474
516
|
### `validator.of<T>()`
|
|
475
517
|
|
|
@@ -484,11 +526,11 @@ Binding `T` once lets TypeScript infer each field's type from the field name, so
|
|
|
484
526
|
|
|
485
527
|
| Method | Sync | Description |
|
|
486
528
|
|--------|------|-------------|
|
|
487
|
-
| `extract(content)` | async | Full pipeline: rules -> LLM (if configured) -> merge -> normalize -> validate. |
|
|
488
|
-
| `extractSync(content)` | sync | Rules only. Returns the partial result + `missing` fields. |
|
|
529
|
+
| `extract(content, context?)` | async | Full pipeline: rules -> LLM (if configured) -> merge -> normalize -> validate. `context` is forwarded verbatim to every rule's `extract` callback. |
|
|
530
|
+
| `extractSync(content, context?)` | sync | Rules only. Returns the partial result + `missing` fields. `context` is forwarded verbatim to every rule's `extract` callback. |
|
|
489
531
|
| `prompt(content, partial)` | sync | Builds the LLM request. Covers `partial.missing` in fill-gaps mode, every schema field in cross-check mode. |
|
|
490
532
|
| `parse(raw)` | sync | Parses a raw LLM JSON response, validating each field individually. Never throws. |
|
|
491
|
-
| `merge(partial, llmResult, content)` | sync | Merges rules + LLM, detects conflicts, normalizes, validates. |
|
|
533
|
+
| `merge(partial, llmResult, content, context?)` | sync | Merges rules + LLM, detects conflicts, normalizes, validates. Does not re-evaluate rules, but normalizers still run and receive `context` verbatim. |
|
|
492
534
|
|
|
493
535
|
## License
|
|
494
536
|
|
package/dist/extractor.d.ts
CHANGED
|
@@ -15,11 +15,13 @@ import type { Extractor, ExtractorConfig } from './types/extractor.types.js';
|
|
|
15
15
|
* is parsed with {@link prompt.parse} and fused through {@link merge.apply}.
|
|
16
16
|
*
|
|
17
17
|
* @typeParam S - A Zod object schema describing the target data shape.
|
|
18
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
19
|
+
* every rule's `extract` callback. Defaults to `unknown`.
|
|
18
20
|
* @param config - Schema, deterministic rules, and optional LLM fallback,
|
|
19
21
|
* plus `policy`, `normalizers`, `validators` and `logger` forwarded to
|
|
20
22
|
* every internal {@link merge.apply} call. The logger is also forwarded
|
|
21
23
|
* to {@link rule.apply} so schema-rejection warnings stay visible.
|
|
22
24
|
* @returns An {@link Extractor} bound to `config.schema`.
|
|
23
25
|
*/
|
|
24
|
-
export declare function createExtractor<S extends z.ZodObject<z.ZodRawShape
|
|
26
|
+
export declare function createExtractor<S extends z.ZodObject<z.ZodRawShape>, TContext = unknown>(config: ExtractorConfig<S, TContext>): Extractor<z.infer<S>, TContext>;
|
|
25
27
|
//# sourceMappingURL=extractor.d.ts.map
|
package/dist/extractor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAI7B,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAmD7E
|
|
1
|
+
{"version":3,"file":"extractor.d.ts","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAI7B,OAAO,KAAK,EAAE,SAAS,EAAE,eAAe,EAAE,MAAM,4BAA4B,CAAC;AAmD7E;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,wBAAgB,eAAe,CAC7B,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EACpC,QAAQ,GAAG,OAAO,EAElB,MAAM,EAAE,eAAe,CAAC,CAAC,EAAE,QAAQ,CAAC,GACnC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,CA4EjC"}
|
package/dist/extractor.js
CHANGED
|
@@ -55,6 +55,8 @@ function stampDuration(result, startedAt) {
|
|
|
55
55
|
* is parsed with {@link prompt.parse} and fused through {@link merge.apply}.
|
|
56
56
|
*
|
|
57
57
|
* @typeParam S - A Zod object schema describing the target data shape.
|
|
58
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
59
|
+
* every rule's `extract` callback. Defaults to `unknown`.
|
|
58
60
|
* @param config - Schema, deterministic rules, and optional LLM fallback,
|
|
59
61
|
* plus `policy`, `normalizers`, `validators` and `logger` forwarded to
|
|
60
62
|
* every internal {@link merge.apply} call. The logger is also forwarded
|
|
@@ -79,10 +81,10 @@ export function createExtractor(config) {
|
|
|
79
81
|
logger: config.logger,
|
|
80
82
|
};
|
|
81
83
|
return {
|
|
82
|
-
async extract(content) {
|
|
84
|
+
async extract(content, context) {
|
|
83
85
|
const startedAt = performance.now();
|
|
84
|
-
const rulesResult = rule.apply(content, config.rules, config.schema, config.logger);
|
|
85
|
-
const partial = merge.apply(config.schema, rulesResult, null, content, mergeOptions);
|
|
86
|
+
const rulesResult = rule.apply(content, config.rules, config.schema, config.logger, context);
|
|
87
|
+
const partial = merge.apply(config.schema, rulesResult, null, content, mergeOptions, context);
|
|
86
88
|
const shouldCallLlm = config.llm !== undefined &&
|
|
87
89
|
(buildOptions.mode === 'cross-check' || partial.missing.length > 0);
|
|
88
90
|
if (!shouldCallLlm) {
|
|
@@ -98,13 +100,13 @@ export function createExtractor(config) {
|
|
|
98
100
|
const llmResult = config.llm.transformResponse
|
|
99
101
|
? await config.llm.transformResponse(parsedLlmResult, request)
|
|
100
102
|
: parsedLlmResult;
|
|
101
|
-
const final = merge.apply(config.schema, rulesResult, llmResult, content, mergeOptions);
|
|
103
|
+
const final = merge.apply(config.schema, rulesResult, llmResult, content, mergeOptions, context);
|
|
102
104
|
return stampDuration(final, startedAt);
|
|
103
105
|
},
|
|
104
|
-
extractSync(content) {
|
|
106
|
+
extractSync(content, context) {
|
|
105
107
|
const startedAt = performance.now();
|
|
106
|
-
const rulesResult = rule.apply(content, config.rules, config.schema, config.logger);
|
|
107
|
-
const partial = merge.apply(config.schema, rulesResult, null, content, mergeOptions);
|
|
108
|
+
const rulesResult = rule.apply(content, config.rules, config.schema, config.logger, context);
|
|
109
|
+
const partial = merge.apply(config.schema, rulesResult, null, content, mergeOptions, context);
|
|
108
110
|
return stampDuration(partial, startedAt);
|
|
109
111
|
},
|
|
110
112
|
prompt(content, partial) {
|
|
@@ -113,10 +115,10 @@ export function createExtractor(config) {
|
|
|
113
115
|
parse(raw) {
|
|
114
116
|
return prompt.parse(config.schema, allFields, raw);
|
|
115
117
|
},
|
|
116
|
-
merge(partial, llmResult, content) {
|
|
118
|
+
merge(partial, llmResult, content, context) {
|
|
117
119
|
const startedAt = performance.now();
|
|
118
120
|
const rulesResult = rulesResultFromPartial(partial, allFields);
|
|
119
|
-
const result = merge.apply(config.schema, rulesResult, llmResult, content, mergeOptions);
|
|
121
|
+
const result = merge.apply(config.schema, rulesResult, llmResult, content, mergeOptions, context);
|
|
120
122
|
return stampDuration(result, startedAt);
|
|
121
123
|
},
|
|
122
124
|
};
|
package/dist/extractor.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAKrC;;;;;GAKG;AACH,SAAS,sBAAsB,CAC7B,OAA4B,EAC5B,SAA+B;IAE/B,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAqC,EAAE,CAAC;IACxD,MAAM,SAAS,GAAqC,EAAE,CAAC;IACvD,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACnB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,KAAK,CAAC,GAAG,KAAmB,CAAC;QACpC,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAClD,IAAI,eAAe,KAAK,IAAI,EAAE,CAAC;YAC7B,UAAU,CAAC,KAAK,CAAC,GAAG,eAAe,CAAC;QACtC,CAAC;QACD,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACtC,IAAI,MAAM,KAAK,IAAI,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;YAC1C,SAAS,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;QACnC,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;AAC1E,CAAC;AAED;;;;;GAKG;AACH,SAAS,aAAa,CACpB,MAA2B,EAC3B,SAAiB;IAEjB,OAAO;QACL,GAAG,MAAM;QACT,IAAI,EAAE,EAAE,GAAG,MAAM,CAAC,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE;KACpE,CAAC;AACJ,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"extractor.js","sourceRoot":"","sources":["../src/extractor.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAKrC;;;;;GAKG;AACH,SAAS,sBAAsB,CAC7B,OAA4B,EAC5B,SAA+B;IAE/B,MAAM,MAAM,GAAe,EAAE,CAAC;IAC9B,MAAM,UAAU,GAAqC,EAAE,CAAC;IACxD,MAAM,SAAS,GAAqC,EAAE,CAAC;IACvD,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;QAC9B,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClC,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;YACnB,SAAS;QACX,CAAC;QACD,MAAM,CAAC,KAAK,CAAC,GAAG,KAAmB,CAAC;QACpC,MAAM,eAAe,GAAG,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;QAClD,IAAI,eAAe,KAAK,IAAI,EAAE,CAAC;YAC7B,UAAU,CAAC,KAAK,CAAC,GAAG,eAAe,CAAC;QACtC,CAAC;QACD,MAAM,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACtC,IAAI,MAAM,KAAK,IAAI,IAAI,QAAQ,IAAI,MAAM,EAAE,CAAC;YAC1C,SAAS,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;QACnC,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;AAC1E,CAAC;AAED;;;;;GAKG;AACH,SAAS,aAAa,CACpB,MAA2B,EAC3B,SAAiB;IAEjB,OAAO;QACL,GAAG,MAAM;QACT,IAAI,EAAE,EAAE,GAAG,MAAM,CAAC,IAAI,EAAE,UAAU,EAAE,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,EAAE;KACpE,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,MAAM,UAAU,eAAe,CAI7B,MAAoC;IAGpC,MAAM,SAAS,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;IAErE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,YAAY,GAAG;QACnB,YAAY,EAAE,MAAM,CAAC,GAAG,EAAE,YAAY;QACtC,IAAI,EAAE,MAAM,CAAC,GAAG,EAAE,IAAI,IAAI,WAAW;QACrC,eAAe,EAAE,MAAM,CAAC,GAAG,EAAE,eAAe,IAAI,UAAU;KAClD,CAAC;IAEX,MAAM,YAAY,GAAsC;QACtD,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,aAAa,EAAE,MAAM,CAAC,aAAa;QACnC,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,UAAU,EAAE,MAAM,CAAC,UAAU;QAC7B,MAAM,EAAE,MAAM,CAAC,MAAM;KACtB,CAAC;IAEF,OAAO;QACL,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO;YAC5B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAC7F,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;YAE9F,MAAM,aAAa,GACjB,MAAM,CAAC,GAAG,KAAK,SAAS;gBACxB,CAAC,YAAY,CAAC,IAAI,KAAK,aAAa,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YACtE,IAAI,CAAC,aAAa,EAAE,CAAC;gBACnB,OAAO,aAAa,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC3C,CAAC;YAED,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;YACjF,MAAM,OAAO,GAAG,MAAM,CAAC,GAAI,CAAC,gBAAgB;gBAC1C,CAAC,CAAC,MAAM,MAAM,CAAC,GAAI,CAAC,gBAAgB,CAAC,YAAY,EAAE,OAAO,CAAC;gBAC3D,CAAC,CAAC,YAAY,CAAC;YACjB,MAAM,UAAU,GAAG,MAAM,MAAM,CAAC,GAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;YAChE,MAAM,eAAe,GACnB,YAAY,CAAC,IAAI,KAAK,aAAa,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC;YACpE,MAAM,eAAe,GAAG,MAAM,CAAC,KAAK,CAClC,MAAM,CAAC,MAAM,EACb,eAAe,EACf,UAAU,CAAC,MAAM,CAClB,CAAC;YACF,MAAM,SAAS,GAAG,MAAM,CAAC,GAAI,CAAC,iBAAiB;gBAC7C,CAAC,CAAC,MAAM,MAAM,CAAC,GAAI,CAAC,iBAAiB,CAAC,eAAe,EAAE,OAAO,CAAC;gBAC/D,CAAC,CAAC,eAAe,CAAC;YACpB,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;YACjG,OAAO,aAAa,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACzC,CAAC;QAED,WAAW,CAAC,OAAO,EAAE,OAAO;YAC1B,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAC7F,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;YAC9F,OAAO,aAAa,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;QAC3C,CAAC;QAED,MAAM,CAAC,OAAO,EAAE,OAAO;YACrB,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;QACrE,CAAC;QAED,KAAK,CAAC,GAAG;YACP,OAAO,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,SAAS,EAAE,GAAG,CAAC,CAAC;QACrD,CAAC;QAED,KAAK,CAAC,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,OAAO;YACxC,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;YACpC,MAAM,WAAW,GAAG,sBAAsB,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;YAC/D,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,WAAW,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;YAClG,OAAO,aAAa,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;QAC1C,CAAC;KACF,CAAC;AACJ,CAAC"}
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
package/dist/merge.d.ts
CHANGED
|
@@ -43,7 +43,7 @@ export declare const merge: {
|
|
|
43
43
|
* @param field - Name of the field being merged.
|
|
44
44
|
* @param ruleMatch - Value proposed by a deterministic rule, or `null` if none.
|
|
45
45
|
* @param llmValue - Value proposed by the LLM, or `null` if none. Cast to `T`
|
|
46
|
-
* without runtime type-check
|
|
46
|
+
* without runtime type-check - callers that expose `merge.field` via
|
|
47
47
|
* `merge.apply` rely on the final Zod re-validation to reject invalid LLM values.
|
|
48
48
|
* @param policy - Optional strategy and confidence overrides.
|
|
49
49
|
* @param logger - Optional logger notified of unexpected runtime situations
|
|
@@ -58,19 +58,22 @@ export declare const merge: {
|
|
|
58
58
|
* Passing `llmResult = null` runs in rules-only mode: every field keeps
|
|
59
59
|
* whatever the rules produced and `meta.llmCalled` is `false`.
|
|
60
60
|
*
|
|
61
|
-
* Orchestration only
|
|
61
|
+
* Orchestration only - the three phases (fusion, normalization, validation)
|
|
62
62
|
* each live in their own private helper above.
|
|
63
63
|
*
|
|
64
64
|
* Runtime fields of `meta` (`durationMs`, `tokensUsed`) are populated by
|
|
65
65
|
* later slices; for now `durationMs` is `0`.
|
|
66
66
|
*
|
|
67
67
|
* @typeParam S - A Zod object schema.
|
|
68
|
+
* @typeParam TContext - Shape of the optional context forwarded to every
|
|
69
|
+
* normalizer. Defaults to `unknown`.
|
|
68
70
|
* @param schema - Zod object schema describing the target data shape.
|
|
69
71
|
* @param rulesResult - Output of {@link rule.apply} for the same schema.
|
|
70
72
|
* @param llmResult - Parsed LLM response, or `null` for rules-only mode.
|
|
71
73
|
* @param content - Original text the rules and LLM were derived from; forwarded to normalizers so they can cross-reference the source.
|
|
72
74
|
* @param options - Optional behavior overrides (policy, normalizers, validators, logger).
|
|
75
|
+
* @param context - Optional caller-defined value forwarded to every normalizer's third argument. Left `undefined` when omitted.
|
|
73
76
|
*/
|
|
74
|
-
apply<S extends z.ZodObject<z.ZodRawShape
|
|
77
|
+
apply<S extends z.ZodObject<z.ZodRawShape>, TContext = unknown>(schema: S, rulesResult: RulesResult<z.infer<S>>, llmResult: LlmResult | null, content: string, options?: MergeApplyOptions<z.infer<S>, TContext>, context?: TContext): ExtractionResult<z.infer<S>>;
|
|
75
78
|
};
|
|
76
79
|
//# sourceMappingURL=merge.d.ts.map
|
package/dist/merge.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpE,OAAO,KAAK,EAGV,gBAAgB,EAChB,gBAAgB,EAChB,gBAAgB,EAEhB,SAAS,EACT,iBAAiB,EAElB,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpE,OAAO,KAAK,EAGV,gBAAgB,EAChB,gBAAgB,EAChB,gBAAgB,EAEhB,SAAS,EACT,iBAAiB,EAElB,MAAM,wBAAwB,CAAC;AA0KhC;;;;;GAKG;AACH,eAAO,MAAM,KAAK;IAChB;;;;;;OAMG;;QAED,6CAA6C;;QAE7C,yDAAyD;;QAEzD,sDAAsD;;QAEtD,wDAAwD;;QAExD,qGAAqG;qBACxF,OAAO,KAAK,OAAO,KAAG,OAAO;;IAQ5C;;;;;;;;;;;;;;;;;;;;OAoBG;UACG,CAAC,SACE,MAAM,aACF,SAAS,CAAC,CAAC,CAAC,GAAG,IAAI,YACpB,OAAO,WACR,OAAO,CAAC,gBAAgB,CAAC,WACzB,MAAM,GACd,gBAAgB,CAAC,CAAC,CAAC;IAgEtB;;;;;;;;;;;;;;;;;;;;;;;OAuBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,QAAQ,oBAC1C,CAAC,eACI,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,aACzB,SAAS,GAAG,IAAI,WAClB,MAAM,YACL,iBAAiB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,YACvC,QAAQ,GACjB,gBAAgB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CAqChC,CAAC"}
|
package/dist/merge.js
CHANGED
|
@@ -12,7 +12,7 @@ function fuseAllFields(schemaKeys, rulesResult, llmResult, policy, policyByField
|
|
|
12
12
|
let rulesMatched = 0;
|
|
13
13
|
for (const field of schemaKeys) {
|
|
14
14
|
const hasRuleValue = field in rulesResult.values;
|
|
15
|
-
// hasRuleValue implies confidence[field] is defined
|
|
15
|
+
// hasRuleValue implies confidence[field] is defined - rule.apply only writes
|
|
16
16
|
// to `confidence` when it also writes to `values`.
|
|
17
17
|
const ruleMatch = hasRuleValue
|
|
18
18
|
? {
|
|
@@ -83,12 +83,14 @@ function deriveSource(fused, ruleMatch, llmValue, policy, ruleId) {
|
|
|
83
83
|
/**
|
|
84
84
|
* Apply every configured {@link Normalizer} to the merged data in declared
|
|
85
85
|
* order. Normalizers may mutate their argument; the returned reference is
|
|
86
|
-
* what the rest of the pipeline observes.
|
|
86
|
+
* what the rest of the pipeline observes. The caller-provided `context` is
|
|
87
|
+
* forwarded verbatim to every normalizer (left `undefined` when the caller
|
|
88
|
+
* passed none).
|
|
87
89
|
*/
|
|
88
|
-
function runNormalizers(data, normalizers, content) {
|
|
90
|
+
function runNormalizers(data, normalizers, content, context) {
|
|
89
91
|
let current = data;
|
|
90
92
|
for (const normalizer of normalizers ?? []) {
|
|
91
|
-
current = normalizer(current, content);
|
|
93
|
+
current = normalizer(current, content, context);
|
|
92
94
|
}
|
|
93
95
|
return current;
|
|
94
96
|
}
|
|
@@ -167,7 +169,7 @@ export const merge = {
|
|
|
167
169
|
* @param field - Name of the field being merged.
|
|
168
170
|
* @param ruleMatch - Value proposed by a deterministic rule, or `null` if none.
|
|
169
171
|
* @param llmValue - Value proposed by the LLM, or `null` if none. Cast to `T`
|
|
170
|
-
* without runtime type-check
|
|
172
|
+
* without runtime type-check - callers that expose `merge.field` via
|
|
171
173
|
* `merge.apply` rely on the final Zod re-validation to reject invalid LLM values.
|
|
172
174
|
* @param policy - Optional strategy and confidence overrides.
|
|
173
175
|
* @param logger - Optional logger notified of unexpected runtime situations
|
|
@@ -239,23 +241,26 @@ export const merge = {
|
|
|
239
241
|
* Passing `llmResult = null` runs in rules-only mode: every field keeps
|
|
240
242
|
* whatever the rules produced and `meta.llmCalled` is `false`.
|
|
241
243
|
*
|
|
242
|
-
* Orchestration only
|
|
244
|
+
* Orchestration only - the three phases (fusion, normalization, validation)
|
|
243
245
|
* each live in their own private helper above.
|
|
244
246
|
*
|
|
245
247
|
* Runtime fields of `meta` (`durationMs`, `tokensUsed`) are populated by
|
|
246
248
|
* later slices; for now `durationMs` is `0`.
|
|
247
249
|
*
|
|
248
250
|
* @typeParam S - A Zod object schema.
|
|
251
|
+
* @typeParam TContext - Shape of the optional context forwarded to every
|
|
252
|
+
* normalizer. Defaults to `unknown`.
|
|
249
253
|
* @param schema - Zod object schema describing the target data shape.
|
|
250
254
|
* @param rulesResult - Output of {@link rule.apply} for the same schema.
|
|
251
255
|
* @param llmResult - Parsed LLM response, or `null` for rules-only mode.
|
|
252
256
|
* @param content - Original text the rules and LLM were derived from; forwarded to normalizers so they can cross-reference the source.
|
|
253
257
|
* @param options - Optional behavior overrides (policy, normalizers, validators, logger).
|
|
258
|
+
* @param context - Optional caller-defined value forwarded to every normalizer's third argument. Left `undefined` when omitted.
|
|
254
259
|
*/
|
|
255
|
-
apply(schema, rulesResult, llmResult, content, options) {
|
|
260
|
+
apply(schema, rulesResult, llmResult, content, options, context) {
|
|
256
261
|
const schemaKeys = Object.keys(schema.shape);
|
|
257
262
|
const fusion = fuseAllFields(schemaKeys, rulesResult, llmResult, options?.policy, options?.policyByField, options?.logger);
|
|
258
|
-
const normalized = runNormalizers(fusion.data, options?.normalizers, content);
|
|
263
|
+
const normalized = runNormalizers(fusion.data, options?.normalizers, content, context);
|
|
259
264
|
const violations = collectViolations(schema, normalized, fusion.missing, options?.validators);
|
|
260
265
|
const valid = !violations.some((v) => v.severity === 'error');
|
|
261
266
|
return {
|
package/dist/merge.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"merge.js","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAyBA;;;;GAIG;AACH,SAAS,aAAa,CACpB,UAAuB,EACvB,WAA2B,EAC3B,SAA2B,EAC3B,MAA6C,EAC7C,aAAyE,EACzE,MAA0B;IAE1B,MAAM,IAAI,GAAG,EAAsB,CAAC;IACpC,MAAM,UAAU,GAAG,EAAuC,CAAC;IAC3D,MAAM,OAAO,GAAG,EAA4C,CAAC;IAC7D,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,KAAK,IAAI,WAAW,CAAC,MAAM,CAAC;QACjD,6EAA6E;QAC7E,mDAAmD;QACnD,MAAM,SAAS,GAA8B,YAAY;YACvD,CAAC,CAAC;gBACE,KAAK,EAAE,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC;gBAChC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC,KAAK,CAAW;aACpD;YACH,CAAC,CAAC,IAAI,CAAC;QACT,IAAI,YAAY,EAAE,CAAC;YACjB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,EAAE,MAAM,CAAC,KAAe,CAAC,IAAI,IAAI,CAAC;QAE5D,MAAM,aAAa,GAAG,aAAa,EAAE,CAAC,KAAK,CAAC,CAAC;QAC7C,MAAM,cAAc,GAClB,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,GAAG,aAAa,EAAE,CAAC;QACzE,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC;QAE9C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,SAAS,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;QAExF,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAA0B,CAAC;QAC/C,UAAU,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;QAClF,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,SAAS,YAAY,CACnB,KAAgC,EAChC,SAAoC,EACpC,QAAiB,EACjB,MAA6C,EAC7C,MAA0B;IAE1B,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,EAAE,GAAG,MAAM,IAAI,EAAE,CAAC;IACxB,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACzB,CAAC;IACD,IAAI,QAAQ,KAAK,IAAI,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAChD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACjC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,CAAC,KAAK,EAAE,CAAC;QACpC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACzB,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,EAAE,OAAO,IAAI,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC;IACpE,OAAO,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC;QACvC,CAAC,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,EAAE;QACnC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;AACnC,CAAC;AAED
|
|
1
|
+
{"version":3,"file":"merge.js","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAyBA;;;;GAIG;AACH,SAAS,aAAa,CACpB,UAAuB,EACvB,WAA2B,EAC3B,SAA2B,EAC3B,MAA6C,EAC7C,aAAyE,EACzE,MAA0B;IAE1B,MAAM,IAAI,GAAG,EAAsB,CAAC;IACpC,MAAM,UAAU,GAAG,EAAuC,CAAC;IAC3D,MAAM,OAAO,GAAG,EAA4C,CAAC;IAC7D,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,KAAK,IAAI,WAAW,CAAC,MAAM,CAAC;QACjD,6EAA6E;QAC7E,mDAAmD;QACnD,MAAM,SAAS,GAA8B,YAAY;YACvD,CAAC,CAAC;gBACE,KAAK,EAAE,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC;gBAChC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC,KAAK,CAAW;aACpD;YACH,CAAC,CAAC,IAAI,CAAC;QACT,IAAI,YAAY,EAAE,CAAC;YACjB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,EAAE,MAAM,CAAC,KAAe,CAAC,IAAI,IAAI,CAAC;QAE5D,MAAM,aAAa,GAAG,aAAa,EAAE,CAAC,KAAK,CAAC,CAAC;QAC7C,MAAM,cAAc,GAClB,aAAa,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,MAAM,EAAE,GAAG,aAAa,EAAE,CAAC;QACzE,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,CAAC;QAE9C,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,SAAS,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;QAExF,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAA0B,CAAC;QAC/C,UAAU,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;QACrC,OAAO,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,cAAc,EAAE,MAAM,CAAC,CAAC;QAClF,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,OAAO,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;AACzE,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,SAAS,YAAY,CACnB,KAAgC,EAChC,SAAoC,EACpC,QAAiB,EACjB,MAA6C,EAC7C,MAA0B;IAE1B,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,EAAE,GAAG,MAAM,IAAI,EAAE,CAAC;IACxB,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;QACvB,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACzB,CAAC;IACD,IAAI,QAAQ,KAAK,IAAI,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;QAChD,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACjC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;IACtC,CAAC;IACD,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,CAAC,KAAK,EAAE,CAAC;QACpC,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;IACzB,CAAC;IACD,MAAM,OAAO,GAAG,MAAM,EAAE,OAAO,IAAI,KAAK,CAAC,kBAAkB,CAAC,OAAO,CAAC;IACpE,OAAO,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,QAAQ,CAAC;QACvC,CAAC,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,EAAE,EAAE;QACnC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,EAAE,EAAE,CAAC;AACnC,CAAC;AAED;;;;;;GAMG;AACH,SAAS,cAAc,CACrB,IAAsB,EACtB,WAAkD,EAClD,OAAe,EACf,OAA6B;IAE7B,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,KAAK,MAAM,UAAU,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;QAC3C,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAClD,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CACxB,MAAkC,EAClC,UAA4B,EAC5B,OAAoB,EACpB,UAA8C;IAE9C,MAAM,UAAU,GAAgB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAmB,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACxC,MAAM,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC;YAC/B,MAAM,KAAK,GAAG,OAAO,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;YACpE,IAAI,KAAK,KAAK,SAAS,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,UAAU,CAAC,IAAI,CAAC;gBACd,KAAK;gBACL,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,QAAQ,EAAE,OAAO;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,KAAK,MAAM,SAAS,IAAI,UAAU,IAAI,EAAE,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG;IACnB;;;;;;OAMG;IACH,kBAAkB,EAAE;QAClB,6CAA6C;QAC7C,QAAQ,EAAE,MAAM;QAChB,yDAAyD;QACzD,oBAAoB,EAAE,GAAG;QACzB,sDAAsD;QACtD,iBAAiB,EAAE,GAAG;QACtB,wDAAwD;QACxD,mBAAmB,EAAE,GAAG;QACxB,qGAAqG;QACrG,OAAO,EAAE,CAAC,CAAU,EAAE,CAAU,EAAW,EAAE;YAC3C,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACnD,OAAO,CAAC,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7C,CAAC;YACD,OAAO,CAAC,KAAK,CAAC,CAAC;QACjB,CAAC;KACyB;IAE5B;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CACH,KAAa,EACb,SAA8B,EAC9B,QAAiB,EACjB,MAAkC,EAClC,MAAe;QAEf,MAAM,UAAU,GAAqB,EAAE,GAAG,KAAK,CAAC,kBAAkB,EAAE,GAAG,MAAM,EAAE,CAAC;QAChF,MAAM,aAAa,GAAG,QAAQ,IAAI,IAAI,CAAC;QAEvC,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QAChE,CAAC;QAED,IAAI,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,EAAE,CAAC;YACvD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,UAAU,CAAC,mBAAmB;gBAC1C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;YAC1C,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;YACzC,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,iDAAiD,EAAE;gBAC9D,QAAQ,EAAE,UAAU,CAAC,QAAQ;gBAC7B,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QACD,OAAO;YACL,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,UAAU,EAAE,UAAU,CAAC,iBAAiB;YACxC,QAAQ,EAAE;gBACR,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,KAAK;gBAC1B,cAAc,EAAE,SAAS,CAAC,UAAU;gBACpC,QAAQ,EAAE,aAAa;aACxB;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;OAuBG;IACH,KAAK,CACH,MAAS,EACT,WAAoC,EACpC,SAA2B,EAC3B,OAAe,EACf,OAAiD,EACjD,OAAkB;QAGlB,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;QAE/D,MAAM,MAAM,GAAG,aAAa,CAC1B,UAAU,EACV,WAAW,EACX,SAAS,EACT,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,aAAa,EACtB,OAAO,EAAE,MAAM,CAChB,CAAC;QAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;QAEvF,MAAM,UAAU,GAAG,iBAAiB,CAClC,MAAM,EACN,UAAU,EACV,MAAM,CAAC,OAAO,EACd,OAAO,EAAE,UAAU,CACpB,CAAC;QACF,MAAM,KAAK,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;QAE9D,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,UAAU,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE;YACjC,IAAI,EAAE;gBACJ,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,SAAS,EAAE,SAAS,KAAK,IAAI;gBAC7B,UAAU,EAAE,CAAC;aACd;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
package/dist/prompt.d.ts
CHANGED
|
@@ -39,7 +39,7 @@ export declare const prompt: {
|
|
|
39
39
|
/**
|
|
40
40
|
* Parse a raw LLM response permissively. Accepts either an already-decoded
|
|
41
41
|
* object or a JSON-encoded string. Each field listed in `missing` is
|
|
42
|
-
* validated individually against its Zod schema
|
|
42
|
+
* validated individually against its Zod schema - valid fields flow into
|
|
43
43
|
* `values`, invalid ones are dropped and surfaced as warnings. Keys outside
|
|
44
44
|
* `missing` are dropped as well, with a single aggregated warning so the
|
|
45
45
|
* caller can spot a prompt/provider mismatch.
|
|
@@ -50,7 +50,7 @@ export declare const prompt: {
|
|
|
50
50
|
* @param schema - Zod object schema whose fields back the validation.
|
|
51
51
|
* @param missing - Fields the LLM was expected to produce (typically
|
|
52
52
|
* {@link ExtractionResult.missing}).
|
|
53
|
-
* @param raw - The provider response
|
|
53
|
+
* @param raw - The provider response - object or JSON string.
|
|
54
54
|
*/
|
|
55
55
|
parse<S extends z.ZodObject<z.ZodRawShape>>(schema: S, missing: readonly (keyof z.infer<S>)[], raw: unknown): LlmResult;
|
|
56
56
|
};
|
package/dist/prompt.js
CHANGED
|
@@ -89,7 +89,7 @@ function buildResponseSchema(schema, missing) {
|
|
|
89
89
|
return { type: 'object', properties, required, additionalProperties: false };
|
|
90
90
|
}
|
|
91
91
|
/**
|
|
92
|
-
* Pick the non-null, non-missing entries of the partial result
|
|
92
|
+
* Pick the non-null, non-missing entries of the partial result - the values
|
|
93
93
|
* the deterministic pass has already resolved.
|
|
94
94
|
*/
|
|
95
95
|
function collectKnownValues(data, missing) {
|
|
@@ -233,7 +233,7 @@ export const prompt = {
|
|
|
233
233
|
/**
|
|
234
234
|
* Parse a raw LLM response permissively. Accepts either an already-decoded
|
|
235
235
|
* object or a JSON-encoded string. Each field listed in `missing` is
|
|
236
|
-
* validated individually against its Zod schema
|
|
236
|
+
* validated individually against its Zod schema - valid fields flow into
|
|
237
237
|
* `values`, invalid ones are dropped and surfaced as warnings. Keys outside
|
|
238
238
|
* `missing` are dropped as well, with a single aggregated warning so the
|
|
239
239
|
* caller can spot a prompt/provider mismatch.
|
|
@@ -244,7 +244,7 @@ export const prompt = {
|
|
|
244
244
|
* @param schema - Zod object schema whose fields back the validation.
|
|
245
245
|
* @param missing - Fields the LLM was expected to produce (typically
|
|
246
246
|
* {@link ExtractionResult.missing}).
|
|
247
|
-
* @param raw - The provider response
|
|
247
|
+
* @param raw - The provider response - object or JSON string.
|
|
248
248
|
*/
|
|
249
249
|
parse(schema, missing, raw) {
|
|
250
250
|
const missingKeys = missing;
|
package/dist/rules.d.ts
CHANGED
|
@@ -10,18 +10,23 @@ export declare const rule: {
|
|
|
10
10
|
* Declare a deterministic extraction rule targeting a single schema field.
|
|
11
11
|
*
|
|
12
12
|
* The `extract` callback receives the raw content and must return either a
|
|
13
|
-
* {@link RuleMatch} or `null` when the rule does not apply.
|
|
13
|
+
* {@link RuleMatch} or `null` when the rule does not apply. It may also
|
|
14
|
+
* accept an optional caller-defined `context` forwarded verbatim by
|
|
15
|
+
* {@link rule.apply} / {@link Extractor.extract}.
|
|
14
16
|
*
|
|
17
|
+
* @typeParam TContext - Shape of the optional context forwarded to
|
|
18
|
+
* `extract`. Defaults to `unknown`.
|
|
15
19
|
* @param field - Name of the schema field the rule writes to.
|
|
16
|
-
* @param extract - Callback that inspects the content and
|
|
20
|
+
* @param extract - Callback that inspects the content (and optional context)
|
|
21
|
+
* and proposes a value.
|
|
17
22
|
* @param options - Optional rule metadata. `id` is surfaced in
|
|
18
23
|
* `ExtractionResult.sources` when this rule produces the kept value;
|
|
19
24
|
* defaults to `${field}#${declarationIndex}`.
|
|
20
25
|
* @returns An {@link ExtractionRule} ready to be passed to {@link rule.apply}.
|
|
21
26
|
*/
|
|
22
|
-
create(field: string, extract: (content: string) => RuleMatch<unknown> | null, options?: {
|
|
27
|
+
create<TContext = unknown>(field: string, extract: (content: string, context?: TContext) => RuleMatch<unknown> | null, options?: {
|
|
23
28
|
id?: string;
|
|
24
|
-
}): ExtractionRule
|
|
29
|
+
}): ExtractionRule<TContext>;
|
|
25
30
|
/**
|
|
26
31
|
* Shortcut to build a regex-based {@link ExtractionRule}. On match, the
|
|
27
32
|
* value is taken from capture group 1 (or the full match if none), then
|
|
@@ -68,13 +73,21 @@ export declare const rule: {
|
|
|
68
73
|
* - Values failing the per-field Zod `safeParse` are discarded and the
|
|
69
74
|
* field falls back to `missing`. An optional logger receives a warning.
|
|
70
75
|
*
|
|
76
|
+
* The optional `context` is forwarded verbatim to every rule's `extract`
|
|
77
|
+
* callback. Rules that declare a narrower `ExtractionRule<TContext>` than
|
|
78
|
+
* the one passed here still compile thanks to contextual parameter
|
|
79
|
+
* contravariance; rules that ignore `context` keep working unchanged.
|
|
80
|
+
*
|
|
71
81
|
* @typeParam S - A Zod object schema.
|
|
82
|
+
* @typeParam TContext - Shape of the optional context forwarded to rules.
|
|
72
83
|
* @param content - Raw content to extract from (typically markdown or text).
|
|
73
84
|
* @param rules - Deterministic rules to evaluate.
|
|
74
85
|
* @param schema - Zod object schema describing the target data shape.
|
|
75
86
|
* @param logger - Optional logger notified when a value is rejected.
|
|
87
|
+
* @param context - Optional caller-defined value forwarded to every rule's
|
|
88
|
+
* `extract` callback. Left `undefined` when omitted.
|
|
76
89
|
* @returns The deterministic extraction result (values, confidence, missing).
|
|
77
90
|
*/
|
|
78
|
-
apply<S extends z.ZodObject<z.ZodRawShape
|
|
91
|
+
apply<S extends z.ZodObject<z.ZodRawShape>, TContext = unknown>(content: string, rules: ExtractionRule<TContext>[], schema: S, logger?: Logger, context?: TContext): RulesResult<z.infer<S>>;
|
|
79
92
|
};
|
|
80
93
|
//# sourceMappingURL=rules.d.ts.map
|
package/dist/rules.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpF;;;GAGG;AACH,eAAO,MAAM,IAAI;IACf
|
|
1
|
+
{"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpF;;;GAGG;AACH,eAAO,MAAM,IAAI;IACf;;;;;;;;;;;;;;;;;OAiBG;WACI,QAAQ,mBACN,MAAM,WACJ,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,KAAK,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,YACjE;QAAE,EAAE,CAAC,EAAE,MAAM,CAAA;KAAE,GACxB,cAAc,CAAC,QAAQ,CAAC;IAI3B;;;;;;;;;;;OAWG;UACG,CAAC,kBACE,MAAM,WACJ,MAAM,mBACE,MAAM,cACX,CAAC,KAAK,EAAE,gBAAgB,KAAK,CAAC,YAChC;QAAE,EAAE,CAAC,EAAE,MAAM,CAAA;KAAE,GACxB,cAAc;IAYjB;;;;;;;;;;;;;;;;;OAiBG;eACQ,CAAC,SAAS,CAAC,SAAS,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC;IAIpD;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,QAAQ,qBACzC,MAAM,SACR,cAAc,CAAC,QAAQ,CAAC,EAAE,UACzB,CAAC,WACA,MAAM,YACL,QAAQ,GACjB,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CAwC3B,CAAC"}
|
package/dist/rules.js
CHANGED
|
@@ -7,10 +7,15 @@ export const rule = {
|
|
|
7
7
|
* Declare a deterministic extraction rule targeting a single schema field.
|
|
8
8
|
*
|
|
9
9
|
* The `extract` callback receives the raw content and must return either a
|
|
10
|
-
* {@link RuleMatch} or `null` when the rule does not apply.
|
|
10
|
+
* {@link RuleMatch} or `null` when the rule does not apply. It may also
|
|
11
|
+
* accept an optional caller-defined `context` forwarded verbatim by
|
|
12
|
+
* {@link rule.apply} / {@link Extractor.extract}.
|
|
11
13
|
*
|
|
14
|
+
* @typeParam TContext - Shape of the optional context forwarded to
|
|
15
|
+
* `extract`. Defaults to `unknown`.
|
|
12
16
|
* @param field - Name of the schema field the rule writes to.
|
|
13
|
-
* @param extract - Callback that inspects the content and
|
|
17
|
+
* @param extract - Callback that inspects the content (and optional context)
|
|
18
|
+
* and proposes a value.
|
|
14
19
|
* @param options - Optional rule metadata. `id` is surfaced in
|
|
15
20
|
* `ExtractionResult.sources` when this rule produces the kept value;
|
|
16
21
|
* defaults to `${field}#${declarationIndex}`.
|
|
@@ -75,14 +80,22 @@ export const rule = {
|
|
|
75
80
|
* - Values failing the per-field Zod `safeParse` are discarded and the
|
|
76
81
|
* field falls back to `missing`. An optional logger receives a warning.
|
|
77
82
|
*
|
|
83
|
+
* The optional `context` is forwarded verbatim to every rule's `extract`
|
|
84
|
+
* callback. Rules that declare a narrower `ExtractionRule<TContext>` than
|
|
85
|
+
* the one passed here still compile thanks to contextual parameter
|
|
86
|
+
* contravariance; rules that ignore `context` keep working unchanged.
|
|
87
|
+
*
|
|
78
88
|
* @typeParam S - A Zod object schema.
|
|
89
|
+
* @typeParam TContext - Shape of the optional context forwarded to rules.
|
|
79
90
|
* @param content - Raw content to extract from (typically markdown or text).
|
|
80
91
|
* @param rules - Deterministic rules to evaluate.
|
|
81
92
|
* @param schema - Zod object schema describing the target data shape.
|
|
82
93
|
* @param logger - Optional logger notified when a value is rejected.
|
|
94
|
+
* @param context - Optional caller-defined value forwarded to every rule's
|
|
95
|
+
* `extract` callback. Left `undefined` when omitted.
|
|
83
96
|
* @returns The deterministic extraction result (values, confidence, missing).
|
|
84
97
|
*/
|
|
85
|
-
apply(content, rules, schema, logger) {
|
|
98
|
+
apply(content, rules, schema, logger, context) {
|
|
86
99
|
const schemaKeys = Object.keys(schema.shape);
|
|
87
100
|
const values = {};
|
|
88
101
|
const confidenceMap = {};
|
|
@@ -93,7 +106,7 @@ export const rule = {
|
|
|
93
106
|
if (!schemaKeys.includes(field)) {
|
|
94
107
|
continue;
|
|
95
108
|
}
|
|
96
|
-
const match = candidate.extract(content);
|
|
109
|
+
const match = candidate.extract(content, context);
|
|
97
110
|
if (match === null) {
|
|
98
111
|
continue;
|
|
99
112
|
}
|
package/dist/rules.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rules.js","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,MAAM,CAAC,MAAM,IAAI,GAAG;IAClB
|
|
1
|
+
{"version":3,"file":"rules.js","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,MAAM,CAAC,MAAM,IAAI,GAAG;IAClB;;;;;;;;;;;;;;;;;OAiBG;IACH,MAAM,CACJ,KAAa,EACb,OAA2E,EAC3E,OAAyB;QAEzB,OAAO,OAAO,EAAE,EAAE,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IAC7F,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CACH,KAAa,EACb,OAAe,EACf,eAAuB,EACvB,SAA0C,EAC1C,OAAyB;QAEzB,MAAM,OAAO,GAAG,CAAC,OAAe,EAA6B,EAAE;YAC7D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACrC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,OAAO,IAAI,CAAC;YACd,CAAC;YACD,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACpE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,eAAe,EAAE,CAAC;QAChD,CAAC,CAAC;QACF,OAAO,OAAO,EAAE,EAAE,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,OAAO,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC;IAC7F,CAAC;IAED;;;;;;;;;;;;;;;;;OAiBG;IACH,UAAU,CAAI,KAAQ,EAAE,KAAa;QACnC,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;IACtC,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;OA0BG;IACH,KAAK,CACH,OAAe,EACf,KAAiC,EACjC,MAAS,EACT,MAAe,EACf,OAAkB;QAGlB,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;QAC/D,MAAM,MAAM,GAAkB,EAAE,CAAC;QACjC,MAAM,aAAa,GAAwC,EAAE,CAAC;QAC9D,MAAM,SAAS,GAAwC,EAAE,CAAC;QAE1D,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;YACrD,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAE,CAAC;YAChC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAmB,CAAC;YAC5C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBAChC,SAAS;YACX,CAAC;YACD,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YAClD,IAAI,KAAK,KAAK,IAAI,EAAE,CAAC;gBACnB,SAAS;YACX,CAAC;YACD,MAAM,WAAW,GAAG,MAAM,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAiB,CAAC;YAClE,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;gBACpB,MAAM,EAAE,IAAI,CAAC,+BAA+B,EAAE;oBAC5C,KAAK,EAAE,SAAS,CAAC,KAAK;oBACtB,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM;iBAC3B,CAAC,CAAC;gBACH,SAAS;YACX,CAAC;YACD,MAAM,kBAAkB,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;YAChD,IAAI,kBAAkB,KAAK,SAAS,IAAI,KAAK,CAAC,UAAU,IAAI,kBAAkB,EAAE,CAAC;gBAC/E,SAAS;YACX,CAAC;YACD,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,IAAwB,CAAC;YAChD,aAAa,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;YACxC,SAAS,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,EAAE,IAAI,GAAG,SAAS,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;QACnE,CAAC;QAED,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,IAAI,MAAM,CAAC,CAAC,CAAC;QAE7D,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,aAAa,EAAE,SAAS,EAAE,OAAO,EAAE,CAAC;IACnE,CAAC;CACF,CAAC"}
|
|
@@ -51,16 +51,24 @@ export type ExtractorLlmConfig = {
|
|
|
51
51
|
* produce values for each field before any LLM fallback kicks in.
|
|
52
52
|
*
|
|
53
53
|
* @typeParam S - A Zod object schema describing the target data shape.
|
|
54
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
55
|
+
* every rule's `extract` callback. Defaults to `unknown`, which keeps
|
|
56
|
+
* context-unaware rules assignable with zero boilerplate.
|
|
54
57
|
*/
|
|
55
|
-
export type ExtractorConfig<S extends z.ZodObject<z.ZodRawShape
|
|
58
|
+
export type ExtractorConfig<S extends z.ZodObject<z.ZodRawShape>, TContext = unknown> = {
|
|
56
59
|
/** Zod object schema the extractor targets. Drives field enumeration and re-validation. */
|
|
57
60
|
schema: S;
|
|
58
61
|
/** Deterministic rules evaluated against the raw content before any LLM fallback. */
|
|
59
|
-
rules: ExtractionRule[];
|
|
62
|
+
rules: ExtractionRule<TContext>[];
|
|
60
63
|
/** Optional LLM fallback invoked for fields the rules could not produce. */
|
|
61
64
|
llm?: ExtractorLlmConfig;
|
|
62
|
-
/**
|
|
63
|
-
|
|
65
|
+
/**
|
|
66
|
+
* Post-merge transformations, forwarded to every `merge.apply` call.
|
|
67
|
+
* Normalizers share the extractor's `TContext`, so they can read the same
|
|
68
|
+
* per-call context as the rules (see
|
|
69
|
+
* {@link Extractor.extract} / {@link Extractor.extractSync} / {@link Extractor.merge}).
|
|
70
|
+
*/
|
|
71
|
+
normalizers?: Normalizer<z.infer<S>, TContext>[];
|
|
64
72
|
/** Invariants checked on the normalized data; populate `result.validation`. */
|
|
65
73
|
validators?: Validator<ExtractedData<z.infer<S>>>[];
|
|
66
74
|
/** Overrides for the per-field merge policy (conflict strategy, confidences, compare). */
|
|
@@ -81,21 +89,25 @@ export type ExtractorConfig<S extends z.ZodObject<z.ZodRawShape>> = {
|
|
|
81
89
|
* this interface as the matching slice introduces them.
|
|
82
90
|
*
|
|
83
91
|
* @typeParam T - Shape of the target data object inferred from a Zod schema.
|
|
92
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
93
|
+
* every rule's `extract` callback. Defaults to `unknown`.
|
|
84
94
|
*/
|
|
85
|
-
export type Extractor<T> = {
|
|
95
|
+
export type Extractor<T, TContext = unknown> = {
|
|
86
96
|
/**
|
|
87
97
|
* Run the full extraction pipeline against `content`: deterministic rules,
|
|
88
98
|
* optionally followed by an LLM fallback for missing fields, then the merge
|
|
89
|
-
* + validation step.
|
|
99
|
+
* + validation step. An optional `context` is forwarded verbatim to every
|
|
100
|
+
* rule's `extract` callback.
|
|
90
101
|
*/
|
|
91
|
-
extract(content: string): Promise<ExtractionResult<T>>;
|
|
102
|
+
extract(content: string, context?: TContext): Promise<ExtractionResult<T>>;
|
|
92
103
|
/**
|
|
93
104
|
* Run the deterministic rules and merge them against a `null` LLM result.
|
|
94
105
|
* Synchronous counterpart to {@link Extractor.extract} for batch workflows
|
|
95
106
|
* where the LLM call is managed by the caller (queues, scheduled jobs,
|
|
96
|
-
* external batch APIs).
|
|
107
|
+
* external batch APIs). An optional `context` is forwarded verbatim to
|
|
108
|
+
* every rule's `extract` callback.
|
|
97
109
|
*/
|
|
98
|
-
extractSync(content: string): ExtractionResult<T>;
|
|
110
|
+
extractSync(content: string, context?: TContext): ExtractionResult<T>;
|
|
99
111
|
/**
|
|
100
112
|
* Build the LLM request for `partial`. The target field set depends on the
|
|
101
113
|
* configured `llm.mode`: `'fill-gaps'` (default) covers only
|
|
@@ -112,11 +124,12 @@ export type Extractor<T> = {
|
|
|
112
124
|
*/
|
|
113
125
|
parse(raw: unknown): LlmResult;
|
|
114
126
|
/**
|
|
115
|
-
* Merge a previously-obtained `partial` with an LLM result
|
|
116
|
-
*
|
|
117
|
-
*
|
|
118
|
-
*
|
|
127
|
+
* Merge a previously-obtained `partial` with an LLM result to produce the
|
|
128
|
+
* final {@link ExtractionResult}. Reuses the per-field values, confidence
|
|
129
|
+
* and provenance already carried by `partial` - the deterministic rules are
|
|
130
|
+
* not re-evaluated. Normalizers still run here, so an optional `context` is
|
|
131
|
+
* accepted and forwarded verbatim to every normalizer that declares one.
|
|
119
132
|
*/
|
|
120
|
-
merge(partial: ExtractionResult<T>, llmResult: LlmResult, content: string): ExtractionResult<T>;
|
|
133
|
+
merge(partial: ExtractionResult<T>, llmResult: LlmResult, content: string, context?: TContext): ExtractionResult<T>;
|
|
121
134
|
};
|
|
122
135
|
//# sourceMappingURL=extractor.types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extractor.types.d.ts","sourceRoot":"","sources":["../../src/types/extractor.types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,UAAU,EACX,MAAM,kBAAkB,CAAC;AAC1B,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEtF;;;;GAIG;AACH,MAAM,MAAM,kBAAkB,GAAG;IAC/B,+DAA+D;IAC/D,QAAQ,EAAE,WAAW,CAAC;IACtB,4GAA4G;IAC5G,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB;;;OAGG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;;OAOG;IACH,gBAAgB,CAAC,EAAE,CACjB,OAAO,EAAE,UAAU,EACnB,OAAO,EAAE,MAAM,KACZ,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACtC;;;;;;OAMG;IACH,iBAAiB,CAAC,EAAE,CAClB,MAAM,EAAE,SAAS,EACjB,OAAO,EAAE,UAAU,KAChB,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;CACrC,CAAC;AAEF
|
|
1
|
+
{"version":3,"file":"extractor.types.d.ts","sourceRoot":"","sources":["../../src/types/extractor.types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,KAAK,EACV,aAAa,EACb,gBAAgB,EAChB,gBAAgB,EAChB,SAAS,EACT,UAAU,EACX,MAAM,kBAAkB,CAAC;AAC1B,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAChD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEtF;;;;GAIG;AACH,MAAM,MAAM,kBAAkB,GAAG;IAC/B,+DAA+D;IAC/D,QAAQ,EAAE,WAAW,CAAC;IACtB,4GAA4G;IAC5G,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;;;OAKG;IACH,IAAI,CAAC,EAAE,eAAe,CAAC;IACvB;;;OAGG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;;OAOG;IACH,gBAAgB,CAAC,EAAE,CACjB,OAAO,EAAE,UAAU,EACnB,OAAO,EAAE,MAAM,KACZ,UAAU,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;IACtC;;;;;;OAMG;IACH,iBAAiB,CAAC,EAAE,CAClB,MAAM,EAAE,SAAS,EACjB,OAAO,EAAE,UAAU,KAChB,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;CACrC,CAAC;AAEF;;;;;;;;;GASG;AACH,MAAM,MAAM,eAAe,CACzB,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,EACpC,QAAQ,GAAG,OAAO,IAChB;IACF,2FAA2F;IAC3F,MAAM,EAAE,CAAC,CAAC;IACV,qFAAqF;IACrF,KAAK,EAAE,cAAc,CAAC,QAAQ,CAAC,EAAE,CAAC;IAClC,4EAA4E;IAC5E,GAAG,CAAC,EAAE,kBAAkB,CAAC;IACzB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,CAAC;IACjD,+EAA+E;IAC/E,UAAU,CAAC,EAAE,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACpD,0FAA0F;IAC1F,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACnC;;;;OAIG;IACH,aAAa,CAAC,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;KAAE,CAAC;IACxE,+EAA+E;IAC/E,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF;;;;;;;GAOG;AACH,MAAM,MAAM,SAAS,CAAC,CAAC,EAAE,QAAQ,GAAG,OAAO,IAAI;IAC7C;;;;;OAKG;IACH,OAAO,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3E;;;;;;OAMG;IACH,WAAW,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;IACtE;;;;;;OAMG;IACH,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC,GAAG,UAAU,CAAC;IAClE;;;;;OAKG;IACH,KAAK,CAAC,GAAG,EAAE,OAAO,GAAG,SAAS,CAAC;IAC/B;;;;;;OAMG;IACH,KAAK,CACH,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC,EAC5B,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,QAAQ,GACjB,gBAAgB,CAAC,CAAC,CAAC,CAAC;CACxB,CAAC"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Minimal logger interface consumed by llmbic internals. Any logger that
|
|
3
|
-
* exposes a `warn` method (and optionally `info`) can be plugged in
|
|
3
|
+
* exposes a `warn` method (and optionally `info`) can be plugged in -
|
|
4
4
|
* pino, winston, `console`, or a test double.
|
|
5
5
|
*/
|
|
6
6
|
export type Logger = {
|
|
@@ -17,17 +17,29 @@ export type ExtractedData<T> = {
|
|
|
17
17
|
* and return the updated data. They are allowed to mutate their input; the
|
|
18
18
|
* merge pipeline shallow-copies once before invoking them.
|
|
19
19
|
*
|
|
20
|
+
* Normalizers can optionally accept a `context` argument: an opaque,
|
|
21
|
+
* caller-defined value forwarded verbatim by `merge.apply` /
|
|
22
|
+
* `Extractor.extract` - the same per-call context the extractor's rules
|
|
23
|
+
* receive. Typical use is to gate cross-field fix-ups on tenant-specific
|
|
24
|
+
* configuration (e.g. a source URL, a feature flag). `TContext` defaults to
|
|
25
|
+
* `unknown` so context-unaware normalizers stay assignable to arrays typed
|
|
26
|
+
* with any context.
|
|
27
|
+
*
|
|
20
28
|
* @typeParam T - Non-null target shape the extraction is aiming for.
|
|
29
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
30
|
+
* the normalizer. Defaults to `unknown`.
|
|
21
31
|
*/
|
|
22
|
-
export type Normalizer<T> = (data: ExtractedData<T>, content: string) => ExtractedData<T>;
|
|
32
|
+
export type Normalizer<T, TContext = unknown> = (data: ExtractedData<T>, content: string, context?: TContext) => ExtractedData<T>;
|
|
23
33
|
/**
|
|
24
34
|
* Behavior overrides accepted by the top-level merge. Every field is
|
|
25
35
|
* optional; defaults match rules-only mode with no normalization and no
|
|
26
36
|
* validators.
|
|
27
37
|
*
|
|
28
38
|
* @typeParam T - Non-null target shape (`z.infer<Schema>`).
|
|
39
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
40
|
+
* every normalizer. Defaults to `unknown`.
|
|
29
41
|
*/
|
|
30
|
-
export type MergeApplyOptions<T> = {
|
|
42
|
+
export type MergeApplyOptions<T, TContext = unknown> = {
|
|
31
43
|
/** Overrides forwarded to every field-level fusion call. */
|
|
32
44
|
policy?: Partial<FieldMergePolicy>;
|
|
33
45
|
/**
|
|
@@ -39,7 +51,7 @@ export type MergeApplyOptions<T> = {
|
|
|
39
51
|
[K in keyof T]?: Partial<FieldMergePolicy>;
|
|
40
52
|
};
|
|
41
53
|
/** Transformations run in declared order after the per-field fusion. */
|
|
42
|
-
normalizers?: Normalizer<T>[];
|
|
54
|
+
normalizers?: Normalizer<T, TContext>[];
|
|
43
55
|
/** Invariants run on the normalized data; their violations populate `validation`. */
|
|
44
56
|
validators?: Validator<ExtractedData<T>>[];
|
|
45
57
|
/** Logger propagated through the pipeline for warnings and fallbacks. */
|
|
@@ -48,10 +60,10 @@ export type MergeApplyOptions<T> = {
|
|
|
48
60
|
/**
|
|
49
61
|
* Strategy applied when the rule and the LLM disagree on a field value.
|
|
50
62
|
*
|
|
51
|
-
* - `'flag'`
|
|
63
|
+
* - `'flag'` - keep the rule value, lower its confidence, and record a
|
|
52
64
|
* {@link Conflict} so the caller can review the disagreement.
|
|
53
|
-
* - `'prefer-rule'`
|
|
54
|
-
* - `'prefer-llm'`
|
|
65
|
+
* - `'prefer-rule'` - silently keep the rule value and its confidence.
|
|
66
|
+
* - `'prefer-llm'` - silently keep the LLM value and the default LLM
|
|
55
67
|
* confidence.
|
|
56
68
|
*/
|
|
57
69
|
export type ConflictStrategy = 'flag' | 'prefer-rule' | 'prefer-llm';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"merge.types.d.ts","sourceRoot":"","sources":["../../src/types/merge.types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,CAAC,CAAC,IAAI;KAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI;CAAE,CAAC;AAE/D
|
|
1
|
+
{"version":3,"file":"merge.types.d.ts","sourceRoot":"","sources":["../../src/types/merge.types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,qBAAqB,CAAC;AAChE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAEhD;;;;;;GAMG;AACH,MAAM,MAAM,aAAa,CAAC,CAAC,IAAI;KAAG,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI;CAAE,CAAC;AAE/D;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,MAAM,UAAU,CAAC,CAAC,EAAE,QAAQ,GAAG,OAAO,IAAI,CAC9C,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,EACtB,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE,QAAQ,KACf,aAAa,CAAC,CAAC,CAAC,CAAC;AAEtB;;;;;;;;GAQG;AACH,MAAM,MAAM,iBAAiB,CAAC,CAAC,EAAE,QAAQ,GAAG,OAAO,IAAI;IACrD,4DAA4D;IAC5D,MAAM,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC,CAAC;IACnC;;;;OAIG;IACH,aAAa,CAAC,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,gBAAgB,CAAC;KAAE,CAAC;IAC/D,wEAAwE;IACxE,WAAW,CAAC,EAAE,UAAU,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,CAAC;IACxC,qFAAqF;IACrF,UAAU,CAAC,EAAE,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC3C,yEAAyE;IACzE,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB,CAAC;AAEF;;;;;;;;GAQG;AACH,MAAM,MAAM,gBAAgB,GAAG,MAAM,GAAG,aAAa,GAAG,YAAY,CAAC;AAErE;;;;;;;;;;;;;GAaG;AACH,MAAM,MAAM,WAAW,GACnB;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GAChC;IAAE,IAAI,EAAE,KAAK,CAAA;CAAE,GACf;IAAE,IAAI,EAAE,WAAW,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,GACrC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,CAAA;CAAE,CAAC;AAErC;;;GAGG;AACH,MAAM,MAAM,QAAQ,GAAG;IACrB,gEAAgE;IAChE,KAAK,EAAE,MAAM,CAAC;IACd,kCAAkC;IAClC,SAAS,EAAE,OAAO,CAAC;IACnB,kEAAkE;IAClE,cAAc,EAAE,MAAM,CAAC;IACvB,iCAAiC;IACjC,QAAQ,EAAE,OAAO,CAAC;CACnB,CAAC;AAEF;;;;GAIG;AACH,MAAM,MAAM,gBAAgB,CAAC,CAAC,IAAI;IAChC,8EAA8E;IAC9E,KAAK,EAAE,CAAC,GAAG,IAAI,CAAC;IAChB,4EAA4E;IAC5E,UAAU,EAAE,MAAM,GAAG,IAAI,CAAC;IAC1B,uFAAuF;IACvF,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC;CAChC,CAAC;AAEF;;;;GAIG;AACH,MAAM,MAAM,YAAY,GAAG,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,KAAK,OAAO,CAAC;AAE/D;;;GAGG;AACH,MAAM,MAAM,gBAAgB,GAAG;IAC7B,kEAAkE;IAClE,QAAQ,EAAE,gBAAgB,CAAC;IAC3B,gEAAgE;IAChE,oBAAoB,EAAE,MAAM,CAAC;IAC7B,+EAA+E;IAC/E,iBAAiB,EAAE,MAAM,CAAC;IAC1B,qFAAqF;IACrF,mBAAmB,EAAE,MAAM,CAAC;IAC5B,4EAA4E;IAC5E,OAAO,EAAE,YAAY,CAAC;CACvB,CAAC;AAEF;;;;GAIG;AACH,YAAY,EAAE,SAAS,EAAE,CAAC;AAE1B;;;;GAIG;AACH,MAAM,MAAM,SAAS,GAAG;IACtB,4GAA4G;IAC5G,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,qGAAqG;IACrG,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;CACrB,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,gBAAgB,GAAG;IAC7B,+DAA+D;IAC/D,KAAK,EAAE,OAAO,CAAC;IACf,6EAA6E;IAC7E,UAAU,EAAE,SAAS,EAAE,CAAC;CACzB,CAAC;AAEF;;GAEG;AACH,MAAM,MAAM,cAAc,GAAG;IAC3B,gFAAgF;IAChF,YAAY,EAAE,MAAM,CAAC;IACrB,6DAA6D;IAC7D,SAAS,EAAE,OAAO,CAAC;IACnB,8DAA8D;IAC9D,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,MAAM,gBAAgB,CAAC,CAAC,IAAI;IAChC,2FAA2F;IAC3F,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC;IACvB,6EAA6E;IAC7E,UAAU,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,GAAG,MAAM,GAAG,IAAI;KAAE,CAAC;IAC9C;;;;OAIG;IACH,OAAO,EAAE;SAAG,CAAC,IAAI,MAAM,CAAC,GAAG,WAAW,GAAG,IAAI;KAAE,CAAC;IAChD,+EAA+E;IAC/E,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,mDAAmD;IACnD,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;IACrB,0CAA0C;IAC1C,UAAU,EAAE,gBAAgB,CAAC;IAC7B,8CAA8C;IAC9C,IAAI,EAAE,cAAc,CAAC;CACtB,CAAC"}
|
|
@@ -8,7 +8,7 @@ export type LlmProvider = {
|
|
|
8
8
|
/**
|
|
9
9
|
* Send `request` to the underlying model and return the structured values
|
|
10
10
|
* it produced. Observability concerns (token counters, latency, cost) are
|
|
11
|
-
* the caller's responsibility
|
|
11
|
+
* the caller's responsibility - they live outside the llmbic contract so
|
|
12
12
|
* the library stays free of vendor-specific metering.
|
|
13
13
|
*
|
|
14
14
|
* @param request - Prompt, user content, and JSON Schema built by {@link prompt.build}.
|
|
@@ -13,8 +13,18 @@ export type RuleMatch<T> = {
|
|
|
13
13
|
/**
|
|
14
14
|
* A deterministic rule that tries to extract a single schema field from raw
|
|
15
15
|
* content. `extract` returns `null` when the rule does not apply.
|
|
16
|
+
*
|
|
17
|
+
* Rules can optionally accept a `context` argument: an opaque, caller-defined
|
|
18
|
+
* value forwarded verbatim by `rule.apply` / `Extractor.extract`. Typical use
|
|
19
|
+
* is to expose per-call metadata (locale, tenant-specific configuration,
|
|
20
|
+
* feature flags) rules need to decide whether they apply. `TContext`
|
|
21
|
+
* defaults to `unknown` so context-unaware rules stay assignable to arrays
|
|
22
|
+
* typed with any context.
|
|
23
|
+
*
|
|
24
|
+
* @typeParam TContext - Shape of the optional per-call context forwarded to
|
|
25
|
+
* `extract`. Defaults to `unknown`.
|
|
16
26
|
*/
|
|
17
|
-
export type ExtractionRule = {
|
|
27
|
+
export type ExtractionRule<TContext = unknown> = {
|
|
18
28
|
/**
|
|
19
29
|
* Stable identifier surfaced in `ExtractionResult.sources` when this rule
|
|
20
30
|
* produces the kept value. Optional: when omitted, `rule.apply` assigns
|
|
@@ -24,8 +34,13 @@ export type ExtractionRule = {
|
|
|
24
34
|
id?: string;
|
|
25
35
|
/** Name of the schema field this rule targets. */
|
|
26
36
|
field: string;
|
|
27
|
-
/**
|
|
28
|
-
|
|
37
|
+
/**
|
|
38
|
+
* Inspects `content` - and optionally a caller-provided `context` -
|
|
39
|
+
* and returns a match, or `null` if nothing was found. `context` is
|
|
40
|
+
* forwarded verbatim by `rule.apply` / `Extractor.extract` and left
|
|
41
|
+
* `undefined` when the caller passes no context.
|
|
42
|
+
*/
|
|
43
|
+
extract: (content: string, context?: TContext) => RuleMatch<unknown> | null;
|
|
29
44
|
};
|
|
30
45
|
/**
|
|
31
46
|
* The output of the rules pass. Contains the values that deterministic rules
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"rule.types.d.ts","sourceRoot":"","sources":["../../src/types/rule.types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,MAAM,SAAS,CAAC,CAAC,IAAI;IACzB,gDAAgD;IAChD,KAAK,EAAE,CAAC,CAAC;IACT,4EAA4E;IAC5E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF
|
|
1
|
+
{"version":3,"file":"rule.types.d.ts","sourceRoot":"","sources":["../../src/types/rule.types.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,MAAM,SAAS,CAAC,CAAC,IAAI;IACzB,gDAAgD;IAChD,KAAK,EAAE,CAAC,CAAC;IACT,4EAA4E;IAC5E,UAAU,EAAE,MAAM,CAAC;CACpB,CAAC;AAEF;;;;;;;;;;;;;GAaG;AACH,MAAM,MAAM,cAAc,CAAC,QAAQ,GAAG,OAAO,IAAI;IAC/C;;;;;OAKG;IACH,EAAE,CAAC,EAAE,MAAM,CAAC;IACZ,kDAAkD;IAClD,KAAK,EAAE,MAAM,CAAC;IACd;;;;;OAKG;IACH,OAAO,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,QAAQ,KAAK,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,CAAC;CAC7E,CAAC;AAEF;;;;;;GAMG;AACH,MAAM,MAAM,WAAW,CAAC,CAAC,IAAI;IAC3B,mEAAmE;IACnE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC;IACnB,yDAAyD;IACzD,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;IAC7C;;;;;;;OAOG;IACH,SAAS,CAAC,EAAE,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;IAC7C,8DAA8D;IAC9D,OAAO,EAAE,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;CACtB,CAAC"}
|