llmbic 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +22 -0
- package/LICENSE +21 -0
- package/README.md +351 -0
- package/dist/extractor.d.ts +19 -0
- package/dist/extractor.d.ts.map +1 -0
- package/dist/extractor.js +96 -0
- package/dist/extractor.js.map +1 -0
- package/dist/index.d.ts +24 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +17 -0
- package/dist/index.js.map +1 -0
- package/dist/merge.d.ts +76 -0
- package/dist/merge.d.ts.map +1 -0
- package/dist/merge.js +230 -0
- package/dist/merge.js.map +1 -0
- package/dist/prompt.d.ts +50 -0
- package/dist/prompt.d.ts.map +1 -0
- package/dist/prompt.js +205 -0
- package/dist/prompt.js.map +1 -0
- package/dist/rules.d.ts +73 -0
- package/dist/rules.d.ts.map +1 -0
- package/dist/rules.js +118 -0
- package/dist/rules.js.map +1 -0
- package/dist/types/extractor.types.d.ts +72 -0
- package/dist/types/extractor.types.d.ts.map +1 -0
- package/dist/types/extractor.types.js +2 -0
- package/dist/types/extractor.types.js.map +1 -0
- package/dist/types/logger.types.d.ts +12 -0
- package/dist/types/logger.types.d.ts.map +1 -0
- package/dist/types/logger.types.js +2 -0
- package/dist/types/logger.types.js.map +1 -0
- package/dist/types/merge.types.d.ts +159 -0
- package/dist/types/merge.types.d.ts.map +1 -0
- package/dist/types/merge.types.js +2 -0
- package/dist/types/merge.types.js.map +1 -0
- package/dist/types/prompt.types.d.ts +22 -0
- package/dist/types/prompt.types.d.ts.map +1 -0
- package/dist/types/prompt.types.js +2 -0
- package/dist/types/prompt.types.js.map +1 -0
- package/dist/types/provider.types.d.ts +21 -0
- package/dist/types/provider.types.d.ts.map +1 -0
- package/dist/types/provider.types.js +2 -0
- package/dist/types/provider.types.js.map +1 -0
- package/dist/types/rule.types.d.ts +38 -0
- package/dist/types/rule.types.d.ts.map +1 -0
- package/dist/types/rule.types.js +2 -0
- package/dist/types/rule.types.js.map +1 -0
- package/dist/types/validate.types.d.ts +25 -0
- package/dist/types/validate.types.d.ts.map +1 -0
- package/dist/types/validate.types.js +2 -0
- package/dist/types/validate.types.js.map +1 -0
- package/dist/validate.d.ts +57 -0
- package/dist/validate.d.ts.map +1 -0
- package/dist/validate.js +46 -0
- package/dist/validate.js.map +1 -0
- package/package.json +59 -0
package/dist/merge.js
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Walk every schema field, build the {@link RuleMatch} if rules produced a
|
|
3
|
+
* value, fuse it with the LLM candidate via {@link merge.field}, and collect
|
|
4
|
+
* per-field outcomes. Invoked once at the top of {@link merge.apply}.
|
|
5
|
+
*/
|
|
6
|
+
function fuseAllFields(schemaKeys, rulesResult, llmResult, policy, logger) {
|
|
7
|
+
const data = {};
|
|
8
|
+
const confidence = {};
|
|
9
|
+
const conflicts = [];
|
|
10
|
+
const missing = [];
|
|
11
|
+
let rulesMatched = 0;
|
|
12
|
+
for (const field of schemaKeys) {
|
|
13
|
+
const hasRuleValue = field in rulesResult.values;
|
|
14
|
+
// hasRuleValue implies confidence[field] is defined — rule.apply only writes
|
|
15
|
+
// to `confidence` when it also writes to `values`.
|
|
16
|
+
const ruleMatch = hasRuleValue
|
|
17
|
+
? {
|
|
18
|
+
value: rulesResult.values[field],
|
|
19
|
+
confidence: rulesResult.confidence[field],
|
|
20
|
+
}
|
|
21
|
+
: null;
|
|
22
|
+
if (hasRuleValue) {
|
|
23
|
+
rulesMatched += 1;
|
|
24
|
+
}
|
|
25
|
+
const llmValue = llmResult?.values[field] ?? null;
|
|
26
|
+
const fused = merge.field(field, ruleMatch, llmValue, policy, logger);
|
|
27
|
+
data[field] = fused.value;
|
|
28
|
+
confidence[field] = fused.confidence;
|
|
29
|
+
if (fused.conflict !== undefined) {
|
|
30
|
+
conflicts.push(fused.conflict);
|
|
31
|
+
}
|
|
32
|
+
if (fused.value === null) {
|
|
33
|
+
missing.push(field);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return { data, confidence, conflicts, missing, rulesMatched };
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Apply every configured {@link Normalizer} to the merged data in declared
|
|
40
|
+
* order. Normalizers may mutate their argument; the returned reference is
|
|
41
|
+
* what the rest of the pipeline observes.
|
|
42
|
+
*/
|
|
43
|
+
function runNormalizers(data, normalizers, content) {
|
|
44
|
+
let current = data;
|
|
45
|
+
for (const normalizer of normalizers ?? []) {
|
|
46
|
+
current = normalizer(current, content);
|
|
47
|
+
}
|
|
48
|
+
return current;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Produce the violation list for the normalized data: first the Zod schema
|
|
52
|
+
* re-validation (skipping fields already tracked in `missing`), then every
|
|
53
|
+
* configured validator.
|
|
54
|
+
*/
|
|
55
|
+
function collectViolations(schema, normalized, missing, validators) {
|
|
56
|
+
const violations = [];
|
|
57
|
+
const missingSet = new Set(missing);
|
|
58
|
+
const parsed = schema.safeParse(normalized);
|
|
59
|
+
if (!parsed.success) {
|
|
60
|
+
for (const issue of parsed.error.issues) {
|
|
61
|
+
const [firstPath] = issue.path;
|
|
62
|
+
const field = typeof firstPath === 'string' ? firstPath : undefined;
|
|
63
|
+
if (field !== undefined && missingSet.has(field)) {
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
violations.push({
|
|
67
|
+
field,
|
|
68
|
+
rule: 'schema',
|
|
69
|
+
message: issue.message,
|
|
70
|
+
severity: 'error',
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
for (const validator of validators ?? []) {
|
|
75
|
+
violations.push(...validator(normalized));
|
|
76
|
+
}
|
|
77
|
+
return violations;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Field-level and object-level merge primitives.
|
|
81
|
+
*
|
|
82
|
+
* For now, only {@link merge.field} is exposed; the top-level object merge
|
|
83
|
+
* will be added in a later slice.
|
|
84
|
+
*/
|
|
85
|
+
export const merge = {
|
|
86
|
+
/**
|
|
87
|
+
* Library defaults applied by {@link merge.field} when the caller omits
|
|
88
|
+
* one or more policy fields. Exposed so consumers can reference or spread
|
|
89
|
+
* them (e.g. `{ ...merge.defaultFieldPolicy, strategy: 'prefer-llm' }`).
|
|
90
|
+
*
|
|
91
|
+
* See {@link FieldMergePolicy} for the meaning of each field.
|
|
92
|
+
*/
|
|
93
|
+
defaultFieldPolicy: {
|
|
94
|
+
/** See {@link FieldMergePolicy.strategy}. */
|
|
95
|
+
strategy: 'flag',
|
|
96
|
+
/** See {@link FieldMergePolicy.defaultLlmConfidence}. */
|
|
97
|
+
defaultLlmConfidence: 0.7,
|
|
98
|
+
/** See {@link FieldMergePolicy.flaggedConfidence}. */
|
|
99
|
+
flaggedConfidence: 0.3,
|
|
100
|
+
/** See {@link FieldMergePolicy.agreementConfidence}. */
|
|
101
|
+
agreementConfidence: 1.0,
|
|
102
|
+
/** See {@link FieldMergePolicy.compare}. Case-insensitive for strings, strict equality otherwise. */
|
|
103
|
+
compare: (a, b) => {
|
|
104
|
+
if (typeof a === 'string' && typeof b === 'string') {
|
|
105
|
+
return a.toLowerCase() === b.toLowerCase();
|
|
106
|
+
}
|
|
107
|
+
return a === b;
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
/**
|
|
111
|
+
* Fuse a rule match and an LLM value for a single field, following the
|
|
112
|
+
* provided policy. Returns the kept value, its confidence, and a conflict
|
|
113
|
+
* record if the strategy flagged a disagreement.
|
|
114
|
+
*
|
|
115
|
+
* Any policy field omitted from `policy` falls back to
|
|
116
|
+
* {@link merge.defaultFieldPolicy}.
|
|
117
|
+
*
|
|
118
|
+
* Decision table (in order): rule-only, llm-only, both-null, agree,
|
|
119
|
+
* prefer-rule, prefer-llm, flag (default fallback).
|
|
120
|
+
*
|
|
121
|
+
* @typeParam T - Type of the rule value.
|
|
122
|
+
* @param field - Name of the field being merged.
|
|
123
|
+
* @param ruleMatch - Value proposed by a deterministic rule, or `null` if none.
|
|
124
|
+
* @param llmValue - Value proposed by the LLM, or `null` if none. Cast to `T`
|
|
125
|
+
* without runtime type-check — callers that expose `merge.field` via
|
|
126
|
+
* `merge.apply` rely on the final Zod re-validation to reject invalid LLM values.
|
|
127
|
+
* @param policy - Optional strategy and confidence overrides.
|
|
128
|
+
* @param logger - Optional logger notified of unexpected runtime situations
|
|
129
|
+
* (e.g. an unknown strategy slipped past the type system).
|
|
130
|
+
*/
|
|
131
|
+
field(field, ruleMatch, llmValue, policy, logger) {
|
|
132
|
+
const fullPolicy = { ...merge.defaultFieldPolicy, ...policy };
|
|
133
|
+
const normalizedLlm = llmValue ?? null;
|
|
134
|
+
if (ruleMatch !== null && normalizedLlm === null) {
|
|
135
|
+
return {
|
|
136
|
+
value: ruleMatch.value,
|
|
137
|
+
confidence: ruleMatch.confidence,
|
|
138
|
+
conflict: undefined,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
if (ruleMatch === null && normalizedLlm !== null) {
|
|
142
|
+
return {
|
|
143
|
+
value: normalizedLlm,
|
|
144
|
+
confidence: fullPolicy.defaultLlmConfidence,
|
|
145
|
+
conflict: undefined,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
if (ruleMatch === null || normalizedLlm === null) {
|
|
149
|
+
return { value: null, confidence: null, conflict: undefined };
|
|
150
|
+
}
|
|
151
|
+
if (fullPolicy.compare(ruleMatch.value, normalizedLlm)) {
|
|
152
|
+
return {
|
|
153
|
+
value: ruleMatch.value,
|
|
154
|
+
confidence: fullPolicy.agreementConfidence,
|
|
155
|
+
conflict: undefined,
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
if (fullPolicy.strategy === 'prefer-rule') {
|
|
159
|
+
return {
|
|
160
|
+
value: ruleMatch.value,
|
|
161
|
+
confidence: ruleMatch.confidence,
|
|
162
|
+
conflict: undefined,
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
if (fullPolicy.strategy === 'prefer-llm') {
|
|
166
|
+
return {
|
|
167
|
+
value: normalizedLlm,
|
|
168
|
+
confidence: fullPolicy.defaultLlmConfidence,
|
|
169
|
+
conflict: undefined,
|
|
170
|
+
};
|
|
171
|
+
}
|
|
172
|
+
if (fullPolicy.strategy !== 'flag') {
|
|
173
|
+
logger?.warn('unknown conflict strategy, falling back to flag', {
|
|
174
|
+
strategy: fullPolicy.strategy,
|
|
175
|
+
field,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
return {
|
|
179
|
+
value: ruleMatch.value,
|
|
180
|
+
confidence: fullPolicy.flaggedConfidence,
|
|
181
|
+
conflict: {
|
|
182
|
+
field,
|
|
183
|
+
ruleValue: ruleMatch.value,
|
|
184
|
+
ruleConfidence: ruleMatch.confidence,
|
|
185
|
+
llmValue: normalizedLlm,
|
|
186
|
+
},
|
|
187
|
+
};
|
|
188
|
+
},
|
|
189
|
+
/**
|
|
190
|
+
* Walk every field of `schema`, fuse the rules pass result with the LLM
|
|
191
|
+
* result via {@link merge.field}, and produce a typed
|
|
192
|
+
* {@link ExtractionResult}.
|
|
193
|
+
*
|
|
194
|
+
* Passing `llmResult = null` runs in rules-only mode: every field keeps
|
|
195
|
+
* whatever the rules produced and `meta.llmCalled` is `false`.
|
|
196
|
+
*
|
|
197
|
+
* Orchestration only — the three phases (fusion, normalization, validation)
|
|
198
|
+
* each live in their own private helper above.
|
|
199
|
+
*
|
|
200
|
+
* Runtime fields of `meta` (`durationMs`, `tokensUsed`) are populated by
|
|
201
|
+
* later slices; for now `durationMs` is `0`.
|
|
202
|
+
*
|
|
203
|
+
* @typeParam S - A Zod object schema.
|
|
204
|
+
* @param schema - Zod object schema describing the target data shape.
|
|
205
|
+
* @param rulesResult - Output of {@link rule.apply} for the same schema.
|
|
206
|
+
* @param llmResult - Parsed LLM response, or `null` for rules-only mode.
|
|
207
|
+
* @param content - Original text the rules and LLM were derived from; forwarded to normalizers so they can cross-reference the source.
|
|
208
|
+
* @param options - Optional behavior overrides (policy, normalizers, validators, logger).
|
|
209
|
+
*/
|
|
210
|
+
apply(schema, rulesResult, llmResult, content, options) {
|
|
211
|
+
const schemaKeys = Object.keys(schema.shape);
|
|
212
|
+
const fusion = fuseAllFields(schemaKeys, rulesResult, llmResult, options?.policy, options?.logger);
|
|
213
|
+
const normalized = runNormalizers(fusion.data, options?.normalizers, content);
|
|
214
|
+
const violations = collectViolations(schema, normalized, fusion.missing, options?.validators);
|
|
215
|
+
const valid = !violations.some((v) => v.severity === 'error');
|
|
216
|
+
return {
|
|
217
|
+
data: normalized,
|
|
218
|
+
confidence: fusion.confidence,
|
|
219
|
+
conflicts: fusion.conflicts,
|
|
220
|
+
missing: fusion.missing,
|
|
221
|
+
validation: { valid, violations },
|
|
222
|
+
meta: {
|
|
223
|
+
rulesMatched: fusion.rulesMatched,
|
|
224
|
+
llmCalled: llmResult !== null,
|
|
225
|
+
durationMs: 0,
|
|
226
|
+
},
|
|
227
|
+
};
|
|
228
|
+
},
|
|
229
|
+
};
|
|
230
|
+
//# sourceMappingURL=merge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.js","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAuBA;;;;GAIG;AACH,SAAS,aAAa,CACpB,UAAuB,EACvB,WAA2B,EAC3B,SAA2B,EAC3B,MAA6C,EAC7C,MAA0B;IAE1B,MAAM,IAAI,GAAG,EAAsB,CAAC;IACpC,MAAM,UAAU,GAAG,EAAuC,CAAC;IAC3D,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,KAAK,IAAI,WAAW,CAAC,MAAM,CAAC;QACjD,6EAA6E;QAC7E,mDAAmD;QACnD,MAAM,SAAS,GAA8B,YAAY;YACvD,CAAC,CAAC;gBACE,KAAK,EAAE,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC;gBAChC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC,KAAK,CAAW;aACpD;YACH,CAAC,CAAC,IAAI,CAAC;QACT,IAAI,YAAY,EAAE,CAAC;YACjB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,EAAE,MAAM,CAAC,KAAe,CAAC,IAAI,IAAI,CAAC;QAE5D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QAEhF,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAA0B,CAAC;QAC/C,UAAU,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;QACrC,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;AAChE,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CACrB,IAAsB,EACtB,WAAwC,EACxC,OAAe;IAEf,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,KAAK,MAAM,UAAU,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;QAC3C,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CACxB,MAAkC,EAClC,UAA4B,EAC5B,OAAoB,EACpB,UAA8C;IAE9C,MAAM,UAAU,GAAgB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAmB,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACxC,MAAM,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC;YAC/B,MAAM,KAAK,GAAG,OAAO,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;YACpE,IAAI,KAAK,KAAK,SAAS,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,UAAU,CAAC,IAAI,CAAC;gBACd,KAAK;gBACL,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,QAAQ,EAAE,OAAO;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,KAAK,MAAM,SAAS,IAAI,UAAU,IAAI,EAAE,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG;IACnB;;;;;;OAMG;IACH,kBAAkB,EAAE;QAClB,6CAA6C;QAC7C,QAAQ,EAAE,MAAM;QAChB,yDAAyD;QACzD,oBAAoB,EAAE,GAAG;QACzB,sDAAsD;QACtD,iBAAiB,EAAE,GAAG;QACtB,wDAAwD;QACxD,mBAAmB,EAAE,GAAG;QACxB,qGAAqG;QACrG,OAAO,EAAE,CAAC,CAAU,EAAE,CAAU,EAAW,EAAE;YAC3C,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACnD,OAAO,CAAC,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7C,CAAC;YACD,OAAO,CAAC,KAAK,CAAC,CAAC;QACjB,CAAC;KACyB;IAE5B;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CACH,KAAa,EACb,SAA8B,EAC9B,QAAiB,EACjB,MAAkC,EAClC,MAAe;QAEf,MAAM,UAAU,GAAqB,EAAE,GAAG,KAAK,CAAC,kBAAkB,EAAE,GAAG,MAAM,EAAE,CAAC;QAChF,MAAM,aAAa,GAAG,QAAQ,IAAI,IAAI,CAAC;QAEvC,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QAChE,CAAC;QAED,IAAI,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,EAAE,CAAC;YACvD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,UAAU,CAAC,mBAAmB;gBAC1C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;YAC1C,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;YACzC,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,iDAAiD,EAAE;gBAC9D,QAAQ,EAAE,UAAU,CAAC,QAAQ;gBAC7B,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QACD,OAAO;YACL,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,UAAU,EAAE,UAAU,CAAC,iBAAiB;YACxC,QAAQ,EAAE;gBACR,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,KAAK;gBAC1B,cAAc,EAAE,SAAS,CAAC,UAAU;gBACpC,QAAQ,EAAE,aAAa;aACxB;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CACH,MAAS,EACT,WAAoC,EACpC,SAA2B,EAC3B,OAAe,EACf,OAAuC;QAGvC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;QAE/D,MAAM,MAAM,GAAG,aAAa,CAC1B,UAAU,EACV,WAAW,EACX,SAAS,EACT,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,CAChB,CAAC;QAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAE9E,MAAM,UAAU,GAAG,iBAAiB,CAClC,MAAM,EACN,UAAU,EACV,MAAM,CAAC,OAAO,EACd,OAAO,EAAE,UAAU,CACpB,CAAC;QACF,MAAM,KAAK,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;QAE9D,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,UAAU,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE;YACjC,IAAI,EAAE;gBACJ,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,SAAS,EAAE,SAAS,KAAK,IAAI;gBAC7B,UAAU,EAAE,CAAC;aACd;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
|
package/dist/prompt.d.ts
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { ExtractionResult, LlmResult } from './types/merge.types.js';
|
|
3
|
+
import type { LlmRequest } from './types/prompt.types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Prompt-building primitives that turn a partial extraction result into an
|
|
6
|
+
* {@link LlmRequest} targeted at the fields the deterministic pass could not
|
|
7
|
+
* produce.
|
|
8
|
+
*/
|
|
9
|
+
export declare const prompt: {
|
|
10
|
+
/**
|
|
11
|
+
* Build an LLM request restricted to `partial.missing`. The response schema
|
|
12
|
+
* is a JSON Schema covering only those fields, and values already produced
|
|
13
|
+
* by the deterministic pass are surfaced both as `knownValues` and as a
|
|
14
|
+
* hint block prepended to `userContent`.
|
|
15
|
+
*
|
|
16
|
+
* Orchestration only — the four phases (response-schema build, known-values
|
|
17
|
+
* collection, user-content formatting, request assembly) each live in their
|
|
18
|
+
* own private helper above.
|
|
19
|
+
*
|
|
20
|
+
* @typeParam S - A Zod object schema describing the full target shape.
|
|
21
|
+
* @param schema - Zod object schema that drives the field selection.
|
|
22
|
+
* @param partial - Output of {@link merge.apply} (or any equivalent partial)
|
|
23
|
+
* — only `data` and `missing` are read.
|
|
24
|
+
* @param content - Original text the request will refer to.
|
|
25
|
+
* @param options - Optional behavior overrides (custom system prompt).
|
|
26
|
+
* @throws When a missing field uses a Zod kind outside the supported
|
|
27
|
+
* whitelist; the error message names the offending field.
|
|
28
|
+
*/
|
|
29
|
+
build<S extends z.ZodObject<z.ZodRawShape>>(schema: S, partial: Pick<ExtractionResult<z.infer<S>>, "data" | "missing">, content: string, options?: {
|
|
30
|
+
systemPrompt?: string;
|
|
31
|
+
}): LlmRequest;
|
|
32
|
+
/**
|
|
33
|
+
* Parse a raw LLM response permissively. Accepts either an already-decoded
|
|
34
|
+
* object or a JSON-encoded string. Each field listed in `missing` is
|
|
35
|
+
* validated individually against its Zod schema — valid fields flow into
|
|
36
|
+
* `values`, invalid ones are dropped and surfaced as warnings. Keys outside
|
|
37
|
+
* `missing` are dropped as well, with a single aggregated warning so the
|
|
38
|
+
* caller can spot a prompt/provider mismatch.
|
|
39
|
+
*
|
|
40
|
+
* Best-effort by design: never throws, always returns an {@link LlmResult}.
|
|
41
|
+
*
|
|
42
|
+
* @typeParam S - A Zod object schema describing the full target shape.
|
|
43
|
+
* @param schema - Zod object schema whose fields back the validation.
|
|
44
|
+
* @param missing - Fields the LLM was expected to produce (typically
|
|
45
|
+
* {@link ExtractionResult.missing}).
|
|
46
|
+
* @param raw - The provider response — object or JSON string.
|
|
47
|
+
*/
|
|
48
|
+
parse<S extends z.ZodObject<z.ZodRawShape>>(schema: S, missing: readonly (keyof z.infer<S>)[], raw: unknown): LlmResult;
|
|
49
|
+
};
|
|
50
|
+
//# sourceMappingURL=prompt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAEV,gBAAgB,EAChB,SAAS,EACV,MAAM,wBAAwB,CAAC;AAChC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAuK1D;;;;GAIG;AACH,eAAO,MAAM,MAAM;IACjB;;;;;;;;;;;;;;;;;;OAkBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,UAChC,CAAC,WACA,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,WACtD,MAAM,YACL;QAAE,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAClC,UAAU;IAcb;;;;;;;;;;;;;;;OAeG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,UAChC,CAAC,WACA,SAAS,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,OACjC,OAAO,GACX,SAAS;CAiBb,CAAC"}
|
package/dist/prompt.js
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
const DEFAULT_SYSTEM_PROMPT = 'Extract the listed fields from the content as a JSON object.';
|
|
2
|
+
/**
|
|
3
|
+
* Convert a single Zod field schema to JSON Schema. Throws on any Zod kind
|
|
4
|
+
* outside the documented whitelist (`string`, `number`, `boolean`, `enum`,
|
|
5
|
+
* `nullable`), naming the offending field so the caller can restructure
|
|
6
|
+
* their schema.
|
|
7
|
+
*/
|
|
8
|
+
function zodFieldToJsonSchema(zodType, field) {
|
|
9
|
+
const def = zodType.def;
|
|
10
|
+
const kind = def.type;
|
|
11
|
+
if (kind === 'string') {
|
|
12
|
+
return { type: 'string' };
|
|
13
|
+
}
|
|
14
|
+
if (kind === 'number') {
|
|
15
|
+
return { type: 'number' };
|
|
16
|
+
}
|
|
17
|
+
if (kind === 'boolean') {
|
|
18
|
+
return { type: 'boolean' };
|
|
19
|
+
}
|
|
20
|
+
if (kind === 'enum') {
|
|
21
|
+
const entries = def.entries;
|
|
22
|
+
return { type: 'string', enum: Object.values(entries) };
|
|
23
|
+
}
|
|
24
|
+
if (kind === 'nullable') {
|
|
25
|
+
const inner = zodFieldToJsonSchema(def.innerType, field);
|
|
26
|
+
if (typeof inner.type !== 'string') {
|
|
27
|
+
throw new Error(`Unsupported nested nullable on field "${field}"`);
|
|
28
|
+
}
|
|
29
|
+
return { ...inner, type: [inner.type, 'null'] };
|
|
30
|
+
}
|
|
31
|
+
throw new Error(`Unsupported Zod type "${kind}" on field "${field}"`);
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Build the JSON Schema handed to the LLM, restricted to the fields the
|
|
35
|
+
* deterministic pass could not produce.
|
|
36
|
+
*/
|
|
37
|
+
function buildResponseSchema(schema, missing) {
|
|
38
|
+
const properties = {};
|
|
39
|
+
const shape = schema.shape;
|
|
40
|
+
for (const field of missing) {
|
|
41
|
+
const zodField = shape[field];
|
|
42
|
+
if (zodField === undefined) {
|
|
43
|
+
continue;
|
|
44
|
+
}
|
|
45
|
+
properties[field] = zodFieldToJsonSchema(zodField, field);
|
|
46
|
+
}
|
|
47
|
+
return { type: 'object', properties, required: [...missing] };
|
|
48
|
+
}
|
|
49
|
+
/**
|
|
50
|
+
* Pick the non-null, non-missing entries of the partial result — the values
|
|
51
|
+
* the deterministic pass has already resolved.
|
|
52
|
+
*/
|
|
53
|
+
function collectKnownValues(data, missing) {
|
|
54
|
+
const missingSet = new Set(missing);
|
|
55
|
+
const known = {};
|
|
56
|
+
for (const [key, value] of Object.entries(data)) {
|
|
57
|
+
if (missingSet.has(key)) {
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
if (value === null || value === undefined) {
|
|
61
|
+
continue;
|
|
62
|
+
}
|
|
63
|
+
known[key] = value;
|
|
64
|
+
}
|
|
65
|
+
return known;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Prepend the known values as a short hint block so the LLM can ground its
|
|
69
|
+
* extraction in the deterministic pass. Returns the raw content unchanged
|
|
70
|
+
* when nothing is known yet.
|
|
71
|
+
*/
|
|
72
|
+
function formatUserContent(content, knownValues) {
|
|
73
|
+
const keys = Object.keys(knownValues);
|
|
74
|
+
if (keys.length === 0) {
|
|
75
|
+
return content;
|
|
76
|
+
}
|
|
77
|
+
const lines = keys.map((key) => `- ${key} = ${JSON.stringify(knownValues[key])}`);
|
|
78
|
+
return `Already extracted:\n${lines.join('\n')}\n\n${content}`;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Decode a raw LLM response into a plain object. Accepts either an already
|
|
82
|
+
* parsed object or a JSON-encoded string. Returns a warning message instead
|
|
83
|
+
* of throwing when the payload cannot be used.
|
|
84
|
+
*/
|
|
85
|
+
function decodeRaw(raw) {
|
|
86
|
+
let candidate = raw;
|
|
87
|
+
if (typeof raw === 'string') {
|
|
88
|
+
try {
|
|
89
|
+
candidate = JSON.parse(raw);
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return { warning: 'response is not valid JSON' };
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (candidate === null ||
|
|
96
|
+
typeof candidate !== 'object' ||
|
|
97
|
+
Array.isArray(candidate)) {
|
|
98
|
+
return { warning: 'response is not valid JSON' };
|
|
99
|
+
}
|
|
100
|
+
return { object: candidate };
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Validate the fields the LLM was asked to produce, keeping those that match
|
|
104
|
+
* their Zod schema and collecting a warning per field that fails validation.
|
|
105
|
+
*/
|
|
106
|
+
function validateMissingFields(schema, missing, object) {
|
|
107
|
+
const shape = schema.shape;
|
|
108
|
+
const values = {};
|
|
109
|
+
const warnings = [];
|
|
110
|
+
for (const field of missing) {
|
|
111
|
+
if (!(field in object)) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
const fieldSchema = shape[field];
|
|
115
|
+
if (fieldSchema === undefined) {
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
const parsed = fieldSchema.safeParse(object[field]);
|
|
119
|
+
if (parsed.success) {
|
|
120
|
+
values[field] = parsed.data;
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
const reason = parsed.error.issues[0]?.message ?? 'invalid value';
|
|
124
|
+
warnings.push(`field ${field}: ${reason}`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return { values, warnings };
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Collect the keys present in the response that were not part of `missing`.
|
|
131
|
+
* These are dropped, but the caller surfaces an aggregated warning so prompt
|
|
132
|
+
* engineering issues (LLM ignoring the restricted schema) stay visible.
|
|
133
|
+
*/
|
|
134
|
+
function collectUnexpectedKeys(object, missing) {
|
|
135
|
+
const missingSet = new Set(missing);
|
|
136
|
+
return Object.keys(object).filter((key) => !missingSet.has(key));
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Prompt-building primitives that turn a partial extraction result into an
|
|
140
|
+
* {@link LlmRequest} targeted at the fields the deterministic pass could not
|
|
141
|
+
* produce.
|
|
142
|
+
*/
|
|
143
|
+
export const prompt = {
|
|
144
|
+
/**
|
|
145
|
+
* Build an LLM request restricted to `partial.missing`. The response schema
|
|
146
|
+
* is a JSON Schema covering only those fields, and values already produced
|
|
147
|
+
* by the deterministic pass are surfaced both as `knownValues` and as a
|
|
148
|
+
* hint block prepended to `userContent`.
|
|
149
|
+
*
|
|
150
|
+
* Orchestration only — the four phases (response-schema build, known-values
|
|
151
|
+
* collection, user-content formatting, request assembly) each live in their
|
|
152
|
+
* own private helper above.
|
|
153
|
+
*
|
|
154
|
+
* @typeParam S - A Zod object schema describing the full target shape.
|
|
155
|
+
* @param schema - Zod object schema that drives the field selection.
|
|
156
|
+
* @param partial - Output of {@link merge.apply} (or any equivalent partial)
|
|
157
|
+
* — only `data` and `missing` are read.
|
|
158
|
+
* @param content - Original text the request will refer to.
|
|
159
|
+
* @param options - Optional behavior overrides (custom system prompt).
|
|
160
|
+
* @throws When a missing field uses a Zod kind outside the supported
|
|
161
|
+
* whitelist; the error message names the offending field.
|
|
162
|
+
*/
|
|
163
|
+
build(schema, partial, content, options) {
|
|
164
|
+
const missing = partial.missing;
|
|
165
|
+
const responseSchema = buildResponseSchema(schema, missing);
|
|
166
|
+
const knownValues = collectKnownValues(partial.data, partial.missing);
|
|
167
|
+
const userContent = formatUserContent(content, knownValues);
|
|
168
|
+
return {
|
|
169
|
+
systemPrompt: options?.systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
|
|
170
|
+
userContent,
|
|
171
|
+
responseSchema,
|
|
172
|
+
knownValues,
|
|
173
|
+
};
|
|
174
|
+
},
|
|
175
|
+
/**
|
|
176
|
+
* Parse a raw LLM response permissively. Accepts either an already-decoded
|
|
177
|
+
* object or a JSON-encoded string. Each field listed in `missing` is
|
|
178
|
+
* validated individually against its Zod schema — valid fields flow into
|
|
179
|
+
* `values`, invalid ones are dropped and surfaced as warnings. Keys outside
|
|
180
|
+
* `missing` are dropped as well, with a single aggregated warning so the
|
|
181
|
+
* caller can spot a prompt/provider mismatch.
|
|
182
|
+
*
|
|
183
|
+
* Best-effort by design: never throws, always returns an {@link LlmResult}.
|
|
184
|
+
*
|
|
185
|
+
* @typeParam S - A Zod object schema describing the full target shape.
|
|
186
|
+
* @param schema - Zod object schema whose fields back the validation.
|
|
187
|
+
* @param missing - Fields the LLM was expected to produce (typically
|
|
188
|
+
* {@link ExtractionResult.missing}).
|
|
189
|
+
* @param raw - The provider response — object or JSON string.
|
|
190
|
+
*/
|
|
191
|
+
parse(schema, missing, raw) {
|
|
192
|
+
const missingKeys = missing;
|
|
193
|
+
const decoded = decodeRaw(raw);
|
|
194
|
+
if ('warning' in decoded) {
|
|
195
|
+
return { values: {}, warnings: [decoded.warning] };
|
|
196
|
+
}
|
|
197
|
+
const { values, warnings } = validateMissingFields(schema, missingKeys, decoded.object);
|
|
198
|
+
const unexpected = collectUnexpectedKeys(decoded.object, missingKeys);
|
|
199
|
+
if (unexpected.length > 0) {
|
|
200
|
+
warnings.push(`unexpected fields dropped: ${unexpected.join(', ')}`);
|
|
201
|
+
}
|
|
202
|
+
return warnings.length > 0 ? { values, warnings } : { values };
|
|
203
|
+
},
|
|
204
|
+
};
|
|
205
|
+
//# sourceMappingURL=prompt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAQA,MAAM,qBAAqB,GACzB,8DAA8D,CAAC;AAKjE;;;;;GAKG;AACH,SAAS,oBAAoB,CAAC,OAAgB,EAAE,KAAa;IAC3D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;IACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IAEtB,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;IACD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;IACD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,GAAG,CAAC,OAA0C,CAAC;QAC/D,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;IAC1D,CAAC;IACD,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,oBAAoB,CAAC,GAAG,CAAC,SAAoB,EAAE,KAAK,CAAC,CAAC;QACpE,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,GAAG,CAAC,CAAC;QACrE,CAAC;QACD,OAAO,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;IAClD,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,eAAe,KAAK,GAAG,CAAC,CAAC;AACxE,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,MAAkC,EAClC,OAA0B;IAE1B,MAAM,UAAU,GAA4C,EAAE,CAAC;IAC/D,MAAM,KAAK,GAAG,MAAM,CAAC,KAA2C,CAAC;IACjE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAC9B,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QACD,UAAU,CAAC,KAAK,CAAC,GAAG,oBAAoB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAC5D,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC,GAAG,OAAO,CAAC,EAAE,CAAC;AAChE,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,IAAsB,EACtB,OAA6B;IAE7B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,OAA4B,CAAC,CAAC;IACjE,MAAM,KAAK,GAA4B,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QAChD,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,SAAS;QACX,CAAC;QACD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YAC1C,SAAS;QACX,CAAC;QACD,KAAK,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACrB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,OAAe,EAAE,WAAoC;IAC9E,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;IAClF,OAAO,uBAAuB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,OAAO,EAAE,CAAC;AACjE,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAChB,GAAY;IAEZ,IAAI,SAAS,GAAY,GAAG,CAAC;IAC7B,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,IAAI,CAAC;YACH,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;QACnD,CAAC;IACH,CAAC;IACD,IACE,SAAS,KAAK,IAAI;QAClB,OAAO,SAAS,KAAK,QAAQ;QAC7B,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;IACnD,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,SAAoC,EAAE,CAAC;AAC1D,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAC5B,MAAkC,EAClC,OAA0B,EAC1B,MAA+B;IAE/B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAkC,CAAC;IACxD,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YAC9B,SAAS;QACX,CAAC;QACD,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACpD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,eAAe,CAAC;YAClE,QAAQ,CAAC,IAAI,CAAC,SAAS,KAAK,KAAK,MAAM,EAAE,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC9B,CAAC;AAED;;;;GAIG;AACH,SAAS,qBAAqB,CAC5B,MAA+B,EAC/B,OAA0B;IAE1B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IACpC,OAAO,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;AACnE,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG;IACpB;;;;;;;;;;;;;;;;;;OAkBG;IACH,KAAK,CACH,MAAS,EACT,OAA+D,EAC/D,OAAe,EACf,OAAmC;QAGnC,MAAM,OAAO,GAAG,OAAO,CAAC,OAA4B,CAAC;QACrD,MAAM,cAAc,GAAG,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC5D,MAAM,WAAW,GAAG,kBAAkB,CAAO,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,iBAAiB,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAC5D,OAAO;YACL,YAAY,EAAE,OAAO,EAAE,YAAY,IAAI,qBAAqB;YAC5D,WAAW;YACX,cAAc;YACd,WAAW;SACZ,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CACH,MAAS,EACT,OAAsC,EACtC,GAAY;QAEZ,MAAM,WAAW,GAAG,OAA4B,CAAC;QACjD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;YACzB,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACrD,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,qBAAqB,CAChD,MAAM,EACN,WAAW,EACX,OAAO,CAAC,MAAM,CACf,CAAC;QACF,MAAM,UAAU,GAAG,qBAAqB,CAAC,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACtE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,8BAA8B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC;IACjE,CAAC;CACF,CAAC"}
|
package/dist/rules.d.ts
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import type { z } from 'zod';
|
|
2
|
+
import type { Logger } from './types/logger.types.js';
|
|
3
|
+
import type { ExtractionRule, RuleMatch, RulesResult } from './types/rule.types.js';
|
|
4
|
+
/**
|
|
5
|
+
* Namespace bundling every primitive used to declare and run deterministic
|
|
6
|
+
* extraction rules.
|
|
7
|
+
*/
|
|
8
|
+
export declare const rule: {
|
|
9
|
+
/**
|
|
10
|
+
* Declare a deterministic extraction rule targeting a single schema field.
|
|
11
|
+
*
|
|
12
|
+
* The `extract` callback receives the raw content and must return either a
|
|
13
|
+
* {@link RuleMatch} or `null` when the rule does not apply.
|
|
14
|
+
*
|
|
15
|
+
* @param field - Name of the schema field the rule writes to.
|
|
16
|
+
* @param extract - Callback that inspects the content and proposes a value.
|
|
17
|
+
* @returns An {@link ExtractionRule} ready to be passed to {@link rule.apply}.
|
|
18
|
+
*/
|
|
19
|
+
create(field: string, extract: (content: string) => RuleMatch<unknown> | null): ExtractionRule;
|
|
20
|
+
/**
|
|
21
|
+
* Shortcut to build a regex-based {@link ExtractionRule}. On match, the
|
|
22
|
+
* value is taken from capture group 1 (or the full match if none), then
|
|
23
|
+
* optionally passed through a `transform` callback.
|
|
24
|
+
*
|
|
25
|
+
* @typeParam T - Type produced by `transform`, defaults to `string`.
|
|
26
|
+
* @param field - Name of the schema field the rule writes to.
|
|
27
|
+
* @param pattern - Regular expression to evaluate against the content.
|
|
28
|
+
* @param confidenceScore - Confidence score assigned on a successful match.
|
|
29
|
+
* @param transform - Optional mapper from the raw `RegExpMatchArray` to a value.
|
|
30
|
+
* @returns An {@link ExtractionRule} ready to be passed to {@link rule.apply}.
|
|
31
|
+
*/
|
|
32
|
+
regex<T = string>(field: string, pattern: RegExp, confidenceScore: number, transform?: (match: RegExpMatchArray) => T): ExtractionRule;
|
|
33
|
+
/**
|
|
34
|
+
* Build a {@link RuleMatch} from a value and a confidence score. Syntactic
|
|
35
|
+
* sugar used inside custom rule callbacks to avoid writing the object literal
|
|
36
|
+
* by hand.
|
|
37
|
+
*
|
|
38
|
+
* @typeParam T - Type of the extracted value.
|
|
39
|
+
* @param value - The extracted value.
|
|
40
|
+
* @param score - Confidence score in `[0, 1]`.
|
|
41
|
+
* @returns A {@link RuleMatch} wrapping `value` and `score`.
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* ```ts
|
|
45
|
+
* const ageRule = rule.create('age', (text) => {
|
|
46
|
+
* const match = text.match(/(\d+)\s*years/);
|
|
47
|
+
* return match ? rule.confidence(Number(match[1]), 0.9) : null;
|
|
48
|
+
* });
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
confidence<T>(value: T, score: number): RuleMatch<T>;
|
|
52
|
+
/**
|
|
53
|
+
* Run every deterministic rule against `content`, collect their matches,
|
|
54
|
+
* resolve collisions by confidence, and type-check each candidate against
|
|
55
|
+
* the Zod schema before accepting it.
|
|
56
|
+
*
|
|
57
|
+
* Behavior:
|
|
58
|
+
* - Rules targeting a field absent from the schema are silently skipped.
|
|
59
|
+
* - On field collisions, the highest-confidence match wins; ties favor the
|
|
60
|
+
* first-declared rule.
|
|
61
|
+
* - Values failing the per-field Zod `safeParse` are discarded and the
|
|
62
|
+
* field falls back to `missing`. An optional logger receives a warning.
|
|
63
|
+
*
|
|
64
|
+
* @typeParam S - A Zod object schema.
|
|
65
|
+
* @param content - Raw content to extract from (typically markdown or text).
|
|
66
|
+
* @param rules - Deterministic rules to evaluate.
|
|
67
|
+
* @param schema - Zod object schema describing the target data shape.
|
|
68
|
+
* @param logger - Optional logger notified when a value is rejected.
|
|
69
|
+
* @returns The deterministic extraction result (values, confidence, missing).
|
|
70
|
+
*/
|
|
71
|
+
apply<S extends z.ZodObject<z.ZodRawShape>>(content: string, rules: ExtractionRule[], schema: S, logger?: Logger): RulesResult<z.infer<S>>;
|
|
72
|
+
};
|
|
73
|
+
//# sourceMappingURL=rules.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpF;;;GAGG;AACH,eAAO,MAAM,IAAI;IACf;;;;;;;;;OASG;kBAEM,MAAM,WACJ,CAAC,OAAO,EAAE,MAAM,KAAK,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,GACtD,cAAc;IAIjB;;;;;;;;;;;OAWG;UACG,CAAC,kBACE,MAAM,WACJ,MAAM,mBACE,MAAM,cACX,CAAC,KAAK,EAAE,gBAAgB,KAAK,CAAC,GACzC,cAAc;IAcjB;;;;;;;;;;;;;;;;;OAiBG;eACQ,CAAC,SAAS,CAAC,SAAS,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC;IAIpD;;;;;;;;;;;;;;;;;;OAkBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,WAC/B,MAAM,SACR,cAAc,EAAE,UACf,CAAC,WACA,MAAM,GACd,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CAqC3B,CAAC"}
|