llmbic 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/CHANGELOG.md +22 -0
  2. package/LICENSE +21 -0
  3. package/README.md +351 -0
  4. package/dist/extractor.d.ts +19 -0
  5. package/dist/extractor.d.ts.map +1 -0
  6. package/dist/extractor.js +96 -0
  7. package/dist/extractor.js.map +1 -0
  8. package/dist/index.d.ts +24 -0
  9. package/dist/index.d.ts.map +1 -0
  10. package/dist/index.js +17 -0
  11. package/dist/index.js.map +1 -0
  12. package/dist/merge.d.ts +76 -0
  13. package/dist/merge.d.ts.map +1 -0
  14. package/dist/merge.js +230 -0
  15. package/dist/merge.js.map +1 -0
  16. package/dist/prompt.d.ts +50 -0
  17. package/dist/prompt.d.ts.map +1 -0
  18. package/dist/prompt.js +205 -0
  19. package/dist/prompt.js.map +1 -0
  20. package/dist/rules.d.ts +73 -0
  21. package/dist/rules.d.ts.map +1 -0
  22. package/dist/rules.js +118 -0
  23. package/dist/rules.js.map +1 -0
  24. package/dist/types/extractor.types.d.ts +72 -0
  25. package/dist/types/extractor.types.d.ts.map +1 -0
  26. package/dist/types/extractor.types.js +2 -0
  27. package/dist/types/extractor.types.js.map +1 -0
  28. package/dist/types/logger.types.d.ts +12 -0
  29. package/dist/types/logger.types.d.ts.map +1 -0
  30. package/dist/types/logger.types.js +2 -0
  31. package/dist/types/logger.types.js.map +1 -0
  32. package/dist/types/merge.types.d.ts +159 -0
  33. package/dist/types/merge.types.d.ts.map +1 -0
  34. package/dist/types/merge.types.js +2 -0
  35. package/dist/types/merge.types.js.map +1 -0
  36. package/dist/types/prompt.types.d.ts +22 -0
  37. package/dist/types/prompt.types.d.ts.map +1 -0
  38. package/dist/types/prompt.types.js +2 -0
  39. package/dist/types/prompt.types.js.map +1 -0
  40. package/dist/types/provider.types.d.ts +21 -0
  41. package/dist/types/provider.types.d.ts.map +1 -0
  42. package/dist/types/provider.types.js +2 -0
  43. package/dist/types/provider.types.js.map +1 -0
  44. package/dist/types/rule.types.d.ts +38 -0
  45. package/dist/types/rule.types.d.ts.map +1 -0
  46. package/dist/types/rule.types.js +2 -0
  47. package/dist/types/rule.types.js.map +1 -0
  48. package/dist/types/validate.types.d.ts +25 -0
  49. package/dist/types/validate.types.d.ts.map +1 -0
  50. package/dist/types/validate.types.js +2 -0
  51. package/dist/types/validate.types.js.map +1 -0
  52. package/dist/validate.d.ts +57 -0
  53. package/dist/validate.d.ts.map +1 -0
  54. package/dist/validate.js +46 -0
  55. package/dist/validate.js.map +1 -0
  56. package/package.json +59 -0
package/dist/merge.js ADDED
@@ -0,0 +1,230 @@
1
+ /**
2
+ * Walk every schema field, build the {@link RuleMatch} if rules produced a
3
+ * value, fuse it with the LLM candidate via {@link merge.field}, and collect
4
+ * per-field outcomes. Invoked once at the top of {@link merge.apply}.
5
+ */
6
+ function fuseAllFields(schemaKeys, rulesResult, llmResult, policy, logger) {
7
+ const data = {};
8
+ const confidence = {};
9
+ const conflicts = [];
10
+ const missing = [];
11
+ let rulesMatched = 0;
12
+ for (const field of schemaKeys) {
13
+ const hasRuleValue = field in rulesResult.values;
14
+ // hasRuleValue implies confidence[field] is defined — rule.apply only writes
15
+ // to `confidence` when it also writes to `values`.
16
+ const ruleMatch = hasRuleValue
17
+ ? {
18
+ value: rulesResult.values[field],
19
+ confidence: rulesResult.confidence[field],
20
+ }
21
+ : null;
22
+ if (hasRuleValue) {
23
+ rulesMatched += 1;
24
+ }
25
+ const llmValue = llmResult?.values[field] ?? null;
26
+ const fused = merge.field(field, ruleMatch, llmValue, policy, logger);
27
+ data[field] = fused.value;
28
+ confidence[field] = fused.confidence;
29
+ if (fused.conflict !== undefined) {
30
+ conflicts.push(fused.conflict);
31
+ }
32
+ if (fused.value === null) {
33
+ missing.push(field);
34
+ }
35
+ }
36
+ return { data, confidence, conflicts, missing, rulesMatched };
37
+ }
38
+ /**
39
+ * Apply every configured {@link Normalizer} to the merged data in declared
40
+ * order. Normalizers may mutate their argument; the returned reference is
41
+ * what the rest of the pipeline observes.
42
+ */
43
+ function runNormalizers(data, normalizers, content) {
44
+ let current = data;
45
+ for (const normalizer of normalizers ?? []) {
46
+ current = normalizer(current, content);
47
+ }
48
+ return current;
49
+ }
50
+ /**
51
+ * Produce the violation list for the normalized data: first the Zod schema
52
+ * re-validation (skipping fields already tracked in `missing`), then every
53
+ * configured validator.
54
+ */
55
+ function collectViolations(schema, normalized, missing, validators) {
56
+ const violations = [];
57
+ const missingSet = new Set(missing);
58
+ const parsed = schema.safeParse(normalized);
59
+ if (!parsed.success) {
60
+ for (const issue of parsed.error.issues) {
61
+ const [firstPath] = issue.path;
62
+ const field = typeof firstPath === 'string' ? firstPath : undefined;
63
+ if (field !== undefined && missingSet.has(field)) {
64
+ continue;
65
+ }
66
+ violations.push({
67
+ field,
68
+ rule: 'schema',
69
+ message: issue.message,
70
+ severity: 'error',
71
+ });
72
+ }
73
+ }
74
+ for (const validator of validators ?? []) {
75
+ violations.push(...validator(normalized));
76
+ }
77
+ return violations;
78
+ }
79
+ /**
80
+ * Field-level and object-level merge primitives.
81
+ *
82
+ * For now, only {@link merge.field} is exposed; the top-level object merge
83
+ * will be added in a later slice.
84
+ */
85
+ export const merge = {
86
+ /**
87
+ * Library defaults applied by {@link merge.field} when the caller omits
88
+ * one or more policy fields. Exposed so consumers can reference or spread
89
+ * them (e.g. `{ ...merge.defaultFieldPolicy, strategy: 'prefer-llm' }`).
90
+ *
91
+ * See {@link FieldMergePolicy} for the meaning of each field.
92
+ */
93
+ defaultFieldPolicy: {
94
+ /** See {@link FieldMergePolicy.strategy}. */
95
+ strategy: 'flag',
96
+ /** See {@link FieldMergePolicy.defaultLlmConfidence}. */
97
+ defaultLlmConfidence: 0.7,
98
+ /** See {@link FieldMergePolicy.flaggedConfidence}. */
99
+ flaggedConfidence: 0.3,
100
+ /** See {@link FieldMergePolicy.agreementConfidence}. */
101
+ agreementConfidence: 1.0,
102
+ /** See {@link FieldMergePolicy.compare}. Case-insensitive for strings, strict equality otherwise. */
103
+ compare: (a, b) => {
104
+ if (typeof a === 'string' && typeof b === 'string') {
105
+ return a.toLowerCase() === b.toLowerCase();
106
+ }
107
+ return a === b;
108
+ },
109
+ },
110
+ /**
111
+ * Fuse a rule match and an LLM value for a single field, following the
112
+ * provided policy. Returns the kept value, its confidence, and a conflict
113
+ * record if the strategy flagged a disagreement.
114
+ *
115
+ * Any policy field omitted from `policy` falls back to
116
+ * {@link merge.defaultFieldPolicy}.
117
+ *
118
+ * Decision table (in order): rule-only, llm-only, both-null, agree,
119
+ * prefer-rule, prefer-llm, flag (default fallback).
120
+ *
121
+ * @typeParam T - Type of the rule value.
122
+ * @param field - Name of the field being merged.
123
+ * @param ruleMatch - Value proposed by a deterministic rule, or `null` if none.
124
+ * @param llmValue - Value proposed by the LLM, or `null` if none. Cast to `T`
125
+ * without runtime type-check — callers that expose `merge.field` via
126
+ * `merge.apply` rely on the final Zod re-validation to reject invalid LLM values.
127
+ * @param policy - Optional strategy and confidence overrides.
128
+ * @param logger - Optional logger notified of unexpected runtime situations
129
+ * (e.g. an unknown strategy slipped past the type system).
130
+ */
131
+ field(field, ruleMatch, llmValue, policy, logger) {
132
+ const fullPolicy = { ...merge.defaultFieldPolicy, ...policy };
133
+ const normalizedLlm = llmValue ?? null;
134
+ if (ruleMatch !== null && normalizedLlm === null) {
135
+ return {
136
+ value: ruleMatch.value,
137
+ confidence: ruleMatch.confidence,
138
+ conflict: undefined,
139
+ };
140
+ }
141
+ if (ruleMatch === null && normalizedLlm !== null) {
142
+ return {
143
+ value: normalizedLlm,
144
+ confidence: fullPolicy.defaultLlmConfidence,
145
+ conflict: undefined,
146
+ };
147
+ }
148
+ if (ruleMatch === null || normalizedLlm === null) {
149
+ return { value: null, confidence: null, conflict: undefined };
150
+ }
151
+ if (fullPolicy.compare(ruleMatch.value, normalizedLlm)) {
152
+ return {
153
+ value: ruleMatch.value,
154
+ confidence: fullPolicy.agreementConfidence,
155
+ conflict: undefined,
156
+ };
157
+ }
158
+ if (fullPolicy.strategy === 'prefer-rule') {
159
+ return {
160
+ value: ruleMatch.value,
161
+ confidence: ruleMatch.confidence,
162
+ conflict: undefined,
163
+ };
164
+ }
165
+ if (fullPolicy.strategy === 'prefer-llm') {
166
+ return {
167
+ value: normalizedLlm,
168
+ confidence: fullPolicy.defaultLlmConfidence,
169
+ conflict: undefined,
170
+ };
171
+ }
172
+ if (fullPolicy.strategy !== 'flag') {
173
+ logger?.warn('unknown conflict strategy, falling back to flag', {
174
+ strategy: fullPolicy.strategy,
175
+ field,
176
+ });
177
+ }
178
+ return {
179
+ value: ruleMatch.value,
180
+ confidence: fullPolicy.flaggedConfidence,
181
+ conflict: {
182
+ field,
183
+ ruleValue: ruleMatch.value,
184
+ ruleConfidence: ruleMatch.confidence,
185
+ llmValue: normalizedLlm,
186
+ },
187
+ };
188
+ },
189
+ /**
190
+ * Walk every field of `schema`, fuse the rules pass result with the LLM
191
+ * result via {@link merge.field}, and produce a typed
192
+ * {@link ExtractionResult}.
193
+ *
194
+ * Passing `llmResult = null` runs in rules-only mode: every field keeps
195
+ * whatever the rules produced and `meta.llmCalled` is `false`.
196
+ *
197
+ * Orchestration only — the three phases (fusion, normalization, validation)
198
+ * each live in their own private helper above.
199
+ *
200
+ * Runtime fields of `meta` (`durationMs`, `tokensUsed`) are populated by
201
+ * later slices; for now `durationMs` is `0`.
202
+ *
203
+ * @typeParam S - A Zod object schema.
204
+ * @param schema - Zod object schema describing the target data shape.
205
+ * @param rulesResult - Output of {@link rule.apply} for the same schema.
206
+ * @param llmResult - Parsed LLM response, or `null` for rules-only mode.
207
+ * @param content - Original text the rules and LLM were derived from; forwarded to normalizers so they can cross-reference the source.
208
+ * @param options - Optional behavior overrides (policy, normalizers, validators, logger).
209
+ */
210
+ apply(schema, rulesResult, llmResult, content, options) {
211
+ const schemaKeys = Object.keys(schema.shape);
212
+ const fusion = fuseAllFields(schemaKeys, rulesResult, llmResult, options?.policy, options?.logger);
213
+ const normalized = runNormalizers(fusion.data, options?.normalizers, content);
214
+ const violations = collectViolations(schema, normalized, fusion.missing, options?.validators);
215
+ const valid = !violations.some((v) => v.severity === 'error');
216
+ return {
217
+ data: normalized,
218
+ confidence: fusion.confidence,
219
+ conflicts: fusion.conflicts,
220
+ missing: fusion.missing,
221
+ validation: { valid, violations },
222
+ meta: {
223
+ rulesMatched: fusion.rulesMatched,
224
+ llmCalled: llmResult !== null,
225
+ durationMs: 0,
226
+ },
227
+ };
228
+ },
229
+ };
230
+ //# sourceMappingURL=merge.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"merge.js","sourceRoot":"","sources":["../src/merge.ts"],"names":[],"mappings":"AAuBA;;;;GAIG;AACH,SAAS,aAAa,CACpB,UAAuB,EACvB,WAA2B,EAC3B,SAA2B,EAC3B,MAA6C,EAC7C,MAA0B;IAE1B,MAAM,IAAI,GAAG,EAAsB,CAAC;IACpC,MAAM,UAAU,GAAG,EAAuC,CAAC;IAC3D,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,MAAM,OAAO,GAAgB,EAAE,CAAC;IAChC,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAG,KAAK,IAAI,WAAW,CAAC,MAAM,CAAC;QACjD,6EAA6E;QAC7E,mDAAmD;QACnD,MAAM,SAAS,GAA8B,YAAY;YACvD,CAAC,CAAC;gBACE,KAAK,EAAE,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC;gBAChC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC,KAAK,CAAW;aACpD;YACH,CAAC,CAAC,IAAI,CAAC;QACT,IAAI,YAAY,EAAE,CAAC;YACjB,YAAY,IAAI,CAAC,CAAC;QACpB,CAAC;QAED,MAAM,QAAQ,GAAG,SAAS,EAAE,MAAM,CAAC,KAAe,CAAC,IAAI,IAAI,CAAC;QAE5D,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,SAAS,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;QAEhF,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAA0B,CAAC;QAC/C,UAAU,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC;QACrC,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACjC,CAAC;QACD,IAAI,KAAK,CAAC,KAAK,KAAK,IAAI,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC;AAChE,CAAC;AAED;;;;GAIG;AACH,SAAS,cAAc,CACrB,IAAsB,EACtB,WAAwC,EACxC,OAAe;IAEf,IAAI,OAAO,GAAG,IAAI,CAAC;IACnB,KAAK,MAAM,UAAU,IAAI,WAAW,IAAI,EAAE,EAAE,CAAC;QAC3C,OAAO,GAAG,UAAU,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CACxB,MAAkC,EAClC,UAA4B,EAC5B,OAAoB,EACpB,UAA8C;IAE9C,MAAM,UAAU,GAAgB,EAAE,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAmB,CAAC,CAAC;IAChD,MAAM,MAAM,GAAG,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;IAC5C,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;YACxC,MAAM,CAAC,SAAS,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC;YAC/B,MAAM,KAAK,GAAG,OAAO,SAAS,KAAK,QAAQ,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC;YACpE,IAAI,KAAK,KAAK,SAAS,IAAI,UAAU,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,CAAC;gBACjD,SAAS;YACX,CAAC;YACD,UAAU,CAAC,IAAI,CAAC;gBACd,KAAK;gBACL,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,QAAQ,EAAE,OAAO;aAClB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IACD,KAAK,MAAM,SAAS,IAAI,UAAU,IAAI,EAAE,EAAE,CAAC;QACzC,UAAU,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,MAAM,KAAK,GAAG;IACnB;;;;;;OAMG;IACH,kBAAkB,EAAE;QAClB,6CAA6C;QAC7C,QAAQ,EAAE,MAAM;QAChB,yDAAyD;QACzD,oBAAoB,EAAE,GAAG;QACzB,sDAAsD;QACtD,iBAAiB,EAAE,GAAG;QACtB,wDAAwD;QACxD,mBAAmB,EAAE,GAAG;QACxB,qGAAqG;QACrG,OAAO,EAAE,CAAC,CAAU,EAAE,CAAU,EAAW,EAAE;YAC3C,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,EAAE,CAAC;gBACnD,OAAO,CAAC,CAAC,WAAW,EAAE,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC;YAC7C,CAAC;YACD,OAAO,CAAC,KAAK,CAAC,CAAC;QACjB,CAAC;KACyB;IAE5B;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CACH,KAAa,EACb,SAA8B,EAC9B,QAAiB,EACjB,MAAkC,EAClC,MAAe;QAEf,MAAM,UAAU,GAAqB,EAAE,GAAG,KAAK,CAAC,kBAAkB,EAAE,GAAG,MAAM,EAAE,CAAC;QAChF,MAAM,aAAa,GAAG,QAAQ,IAAI,IAAI,CAAC;QAEvC,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,SAAS,KAAK,IAAI,IAAI,aAAa,KAAK,IAAI,EAAE,CAAC;YACjD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,CAAC;QAChE,CAAC;QAED,IAAI,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,EAAE,CAAC;YACvD,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,UAAU,CAAC,mBAAmB;gBAC1C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QAED,IAAI,UAAU,CAAC,QAAQ,KAAK,aAAa,EAAE,CAAC;YAC1C,OAAO;gBACL,KAAK,EAAE,SAAS,CAAC,KAAK;gBACtB,UAAU,EAAE,SAAS,CAAC,UAAU;gBAChC,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,YAAY,EAAE,CAAC;YACzC,OAAO;gBACL,KAAK,EAAE,aAAkB;gBACzB,UAAU,EAAE,UAAU,CAAC,oBAAoB;gBAC3C,QAAQ,EAAE,SAAS;aACpB,CAAC;QACJ,CAAC;QACD,IAAI,UAAU,CAAC,QAAQ,KAAK,MAAM,EAAE,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,iDAAiD,EAAE;gBAC9D,QAAQ,EAAE,UAAU,CAAC,QAAQ;gBAC7B,KAAK;aACN,CAAC,CAAC;QACL,CAAC;QACD,OAAO;YACL,KAAK,EAAE,SAAS,CAAC,KAAK;YACtB,UAAU,EAAE,UAAU,CAAC,iBAAiB;YACxC,QAAQ,EAAE;gBACR,KAAK;gBACL,SAAS,EAAE,SAAS,CAAC,KAAK;gBAC1B,cAAc,EAAE,SAAS,CAAC,UAAU;gBACpC,QAAQ,EAAE,aAAa;aACxB;SACF,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;;;;;;OAoBG;IACH,KAAK,CACH,MAAS,EACT,WAAoC,EACpC,SAA2B,EAC3B,OAAe,EACf,OAAuC;QAGvC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,KAAK,CAAmB,CAAC;QAE/D,MAAM,MAAM,GAAG,aAAa,CAC1B,UAAU,EACV,WAAW,EACX,SAAS,EACT,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,MAAM,CAChB,CAAC;QAEF,MAAM,UAAU,GAAG,cAAc,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;QAE9E,MAAM,UAAU,GAAG,iBAAiB,CAClC,MAAM,EACN,UAAU,EACV,MAAM,CAAC,OAAO,EACd,OAAO,EAAE,UAAU,CACpB,CAAC;QACF,MAAM,KAAK,GAAG,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC;QAE9D,OAAO;YACL,IAAI,EAAE,UAAU;YAChB,UAAU,EAAE,MAAM,CAAC,UAAU;YAC7B,SAAS,EAAE,MAAM,CAAC,SAAS;YAC3B,OAAO,EAAE,MAAM,CAAC,OAAO;YACvB,UAAU,EAAE,EAAE,KAAK,EAAE,UAAU,EAAE;YACjC,IAAI,EAAE;gBACJ,YAAY,EAAE,MAAM,CAAC,YAAY;gBACjC,SAAS,EAAE,SAAS,KAAK,IAAI;gBAC7B,UAAU,EAAE,CAAC;aACd;SACF,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,50 @@
1
+ import type { z } from 'zod';
2
+ import type { ExtractionResult, LlmResult } from './types/merge.types.js';
3
+ import type { LlmRequest } from './types/prompt.types.js';
4
+ /**
5
+ * Prompt-building primitives that turn a partial extraction result into an
6
+ * {@link LlmRequest} targeted at the fields the deterministic pass could not
7
+ * produce.
8
+ */
9
+ export declare const prompt: {
10
+ /**
11
+ * Build an LLM request restricted to `partial.missing`. The response schema
12
+ * is a JSON Schema covering only those fields, and values already produced
13
+ * by the deterministic pass are surfaced both as `knownValues` and as a
14
+ * hint block prepended to `userContent`.
15
+ *
16
+ * Orchestration only — the four phases (response-schema build, known-values
17
+ * collection, user-content formatting, request assembly) each live in their
18
+ * own private helper above.
19
+ *
20
+ * @typeParam S - A Zod object schema describing the full target shape.
21
+ * @param schema - Zod object schema that drives the field selection.
22
+ * @param partial - Output of {@link merge.apply} (or any equivalent partial)
23
+ * — only `data` and `missing` are read.
24
+ * @param content - Original text the request will refer to.
25
+ * @param options - Optional behavior overrides (custom system prompt).
26
+ * @throws When a missing field uses a Zod kind outside the supported
27
+ * whitelist; the error message names the offending field.
28
+ */
29
+ build<S extends z.ZodObject<z.ZodRawShape>>(schema: S, partial: Pick<ExtractionResult<z.infer<S>>, "data" | "missing">, content: string, options?: {
30
+ systemPrompt?: string;
31
+ }): LlmRequest;
32
+ /**
33
+ * Parse a raw LLM response permissively. Accepts either an already-decoded
34
+ * object or a JSON-encoded string. Each field listed in `missing` is
35
+ * validated individually against its Zod schema — valid fields flow into
36
+ * `values`, invalid ones are dropped and surfaced as warnings. Keys outside
37
+ * `missing` are dropped as well, with a single aggregated warning so the
38
+ * caller can spot a prompt/provider mismatch.
39
+ *
40
+ * Best-effort by design: never throws, always returns an {@link LlmResult}.
41
+ *
42
+ * @typeParam S - A Zod object schema describing the full target shape.
43
+ * @param schema - Zod object schema whose fields back the validation.
44
+ * @param missing - Fields the LLM was expected to produce (typically
45
+ * {@link ExtractionResult.missing}).
46
+ * @param raw - The provider response — object or JSON string.
47
+ */
48
+ parse<S extends z.ZodObject<z.ZodRawShape>>(schema: S, missing: readonly (keyof z.infer<S>)[], raw: unknown): LlmResult;
49
+ };
50
+ //# sourceMappingURL=prompt.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAEV,gBAAgB,EAChB,SAAS,EACV,MAAM,wBAAwB,CAAC;AAChC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAuK1D;;;;GAIG;AACH,eAAO,MAAM,MAAM;IACjB;;;;;;;;;;;;;;;;;;OAkBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,UAChC,CAAC,WACA,IAAI,CAAC,gBAAgB,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC,WACtD,MAAM,YACL;QAAE,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE,GAClC,UAAU;IAcb;;;;;;;;;;;;;;;OAeG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,UAChC,CAAC,WACA,SAAS,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,OACjC,OAAO,GACX,SAAS;CAiBb,CAAC"}
package/dist/prompt.js ADDED
@@ -0,0 +1,205 @@
1
+ const DEFAULT_SYSTEM_PROMPT = 'Extract the listed fields from the content as a JSON object.';
2
+ /**
3
+ * Convert a single Zod field schema to JSON Schema. Throws on any Zod kind
4
+ * outside the documented whitelist (`string`, `number`, `boolean`, `enum`,
5
+ * `nullable`), naming the offending field so the caller can restructure
6
+ * their schema.
7
+ */
8
+ function zodFieldToJsonSchema(zodType, field) {
9
+ const def = zodType.def;
10
+ const kind = def.type;
11
+ if (kind === 'string') {
12
+ return { type: 'string' };
13
+ }
14
+ if (kind === 'number') {
15
+ return { type: 'number' };
16
+ }
17
+ if (kind === 'boolean') {
18
+ return { type: 'boolean' };
19
+ }
20
+ if (kind === 'enum') {
21
+ const entries = def.entries;
22
+ return { type: 'string', enum: Object.values(entries) };
23
+ }
24
+ if (kind === 'nullable') {
25
+ const inner = zodFieldToJsonSchema(def.innerType, field);
26
+ if (typeof inner.type !== 'string') {
27
+ throw new Error(`Unsupported nested nullable on field "${field}"`);
28
+ }
29
+ return { ...inner, type: [inner.type, 'null'] };
30
+ }
31
+ throw new Error(`Unsupported Zod type "${kind}" on field "${field}"`);
32
+ }
33
+ /**
34
+ * Build the JSON Schema handed to the LLM, restricted to the fields the
35
+ * deterministic pass could not produce.
36
+ */
37
+ function buildResponseSchema(schema, missing) {
38
+ const properties = {};
39
+ const shape = schema.shape;
40
+ for (const field of missing) {
41
+ const zodField = shape[field];
42
+ if (zodField === undefined) {
43
+ continue;
44
+ }
45
+ properties[field] = zodFieldToJsonSchema(zodField, field);
46
+ }
47
+ return { type: 'object', properties, required: [...missing] };
48
+ }
49
+ /**
50
+ * Pick the non-null, non-missing entries of the partial result — the values
51
+ * the deterministic pass has already resolved.
52
+ */
53
+ function collectKnownValues(data, missing) {
54
+ const missingSet = new Set(missing);
55
+ const known = {};
56
+ for (const [key, value] of Object.entries(data)) {
57
+ if (missingSet.has(key)) {
58
+ continue;
59
+ }
60
+ if (value === null || value === undefined) {
61
+ continue;
62
+ }
63
+ known[key] = value;
64
+ }
65
+ return known;
66
+ }
67
+ /**
68
+ * Prepend the known values as a short hint block so the LLM can ground its
69
+ * extraction in the deterministic pass. Returns the raw content unchanged
70
+ * when nothing is known yet.
71
+ */
72
+ function formatUserContent(content, knownValues) {
73
+ const keys = Object.keys(knownValues);
74
+ if (keys.length === 0) {
75
+ return content;
76
+ }
77
+ const lines = keys.map((key) => `- ${key} = ${JSON.stringify(knownValues[key])}`);
78
+ return `Already extracted:\n${lines.join('\n')}\n\n${content}`;
79
+ }
80
+ /**
81
+ * Decode a raw LLM response into a plain object. Accepts either an already
82
+ * parsed object or a JSON-encoded string. Returns a warning message instead
83
+ * of throwing when the payload cannot be used.
84
+ */
85
+ function decodeRaw(raw) {
86
+ let candidate = raw;
87
+ if (typeof raw === 'string') {
88
+ try {
89
+ candidate = JSON.parse(raw);
90
+ }
91
+ catch {
92
+ return { warning: 'response is not valid JSON' };
93
+ }
94
+ }
95
+ if (candidate === null ||
96
+ typeof candidate !== 'object' ||
97
+ Array.isArray(candidate)) {
98
+ return { warning: 'response is not valid JSON' };
99
+ }
100
+ return { object: candidate };
101
+ }
102
+ /**
103
+ * Validate the fields the LLM was asked to produce, keeping those that match
104
+ * their Zod schema and collecting a warning per field that fails validation.
105
+ */
106
+ function validateMissingFields(schema, missing, object) {
107
+ const shape = schema.shape;
108
+ const values = {};
109
+ const warnings = [];
110
+ for (const field of missing) {
111
+ if (!(field in object)) {
112
+ continue;
113
+ }
114
+ const fieldSchema = shape[field];
115
+ if (fieldSchema === undefined) {
116
+ continue;
117
+ }
118
+ const parsed = fieldSchema.safeParse(object[field]);
119
+ if (parsed.success) {
120
+ values[field] = parsed.data;
121
+ }
122
+ else {
123
+ const reason = parsed.error.issues[0]?.message ?? 'invalid value';
124
+ warnings.push(`field ${field}: ${reason}`);
125
+ }
126
+ }
127
+ return { values, warnings };
128
+ }
129
+ /**
130
+ * Collect the keys present in the response that were not part of `missing`.
131
+ * These are dropped, but the caller surfaces an aggregated warning so prompt
132
+ * engineering issues (LLM ignoring the restricted schema) stay visible.
133
+ */
134
+ function collectUnexpectedKeys(object, missing) {
135
+ const missingSet = new Set(missing);
136
+ return Object.keys(object).filter((key) => !missingSet.has(key));
137
+ }
138
+ /**
139
+ * Prompt-building primitives that turn a partial extraction result into an
140
+ * {@link LlmRequest} targeted at the fields the deterministic pass could not
141
+ * produce.
142
+ */
143
+ export const prompt = {
144
+ /**
145
+ * Build an LLM request restricted to `partial.missing`. The response schema
146
+ * is a JSON Schema covering only those fields, and values already produced
147
+ * by the deterministic pass are surfaced both as `knownValues` and as a
148
+ * hint block prepended to `userContent`.
149
+ *
150
+ * Orchestration only — the four phases (response-schema build, known-values
151
+ * collection, user-content formatting, request assembly) each live in their
152
+ * own private helper above.
153
+ *
154
+ * @typeParam S - A Zod object schema describing the full target shape.
155
+ * @param schema - Zod object schema that drives the field selection.
156
+ * @param partial - Output of {@link merge.apply} (or any equivalent partial)
157
+ * — only `data` and `missing` are read.
158
+ * @param content - Original text the request will refer to.
159
+ * @param options - Optional behavior overrides (custom system prompt).
160
+ * @throws When a missing field uses a Zod kind outside the supported
161
+ * whitelist; the error message names the offending field.
162
+ */
163
+ build(schema, partial, content, options) {
164
+ const missing = partial.missing;
165
+ const responseSchema = buildResponseSchema(schema, missing);
166
+ const knownValues = collectKnownValues(partial.data, partial.missing);
167
+ const userContent = formatUserContent(content, knownValues);
168
+ return {
169
+ systemPrompt: options?.systemPrompt ?? DEFAULT_SYSTEM_PROMPT,
170
+ userContent,
171
+ responseSchema,
172
+ knownValues,
173
+ };
174
+ },
175
+ /**
176
+ * Parse a raw LLM response permissively. Accepts either an already-decoded
177
+ * object or a JSON-encoded string. Each field listed in `missing` is
178
+ * validated individually against its Zod schema — valid fields flow into
179
+ * `values`, invalid ones are dropped and surfaced as warnings. Keys outside
180
+ * `missing` are dropped as well, with a single aggregated warning so the
181
+ * caller can spot a prompt/provider mismatch.
182
+ *
183
+ * Best-effort by design: never throws, always returns an {@link LlmResult}.
184
+ *
185
+ * @typeParam S - A Zod object schema describing the full target shape.
186
+ * @param schema - Zod object schema whose fields back the validation.
187
+ * @param missing - Fields the LLM was expected to produce (typically
188
+ * {@link ExtractionResult.missing}).
189
+ * @param raw - The provider response — object or JSON string.
190
+ */
191
+ parse(schema, missing, raw) {
192
+ const missingKeys = missing;
193
+ const decoded = decodeRaw(raw);
194
+ if ('warning' in decoded) {
195
+ return { values: {}, warnings: [decoded.warning] };
196
+ }
197
+ const { values, warnings } = validateMissingFields(schema, missingKeys, decoded.object);
198
+ const unexpected = collectUnexpectedKeys(decoded.object, missingKeys);
199
+ if (unexpected.length > 0) {
200
+ warnings.push(`unexpected fields dropped: ${unexpected.join(', ')}`);
201
+ }
202
+ return warnings.length > 0 ? { values, warnings } : { values };
203
+ },
204
+ };
205
+ //# sourceMappingURL=prompt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"prompt.js","sourceRoot":"","sources":["../src/prompt.ts"],"names":[],"mappings":"AAQA,MAAM,qBAAqB,GACzB,8DAA8D,CAAC;AAKjE;;;;;GAKG;AACH,SAAS,oBAAoB,CAAC,OAAgB,EAAE,KAAa;IAC3D,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC;IACxB,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,CAAC;IAEtB,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;IACD,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtB,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;IACD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;IAC7B,CAAC;IACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,MAAM,OAAO,GAAG,GAAG,CAAC,OAA0C,CAAC;QAC/D,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC;IAC1D,CAAC;IACD,IAAI,IAAI,KAAK,UAAU,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,oBAAoB,CAAC,GAAG,CAAC,SAAoB,EAAE,KAAK,CAAC,CAAC;QACpE,IAAI,OAAO,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YACnC,MAAM,IAAI,KAAK,CAAC,yCAAyC,KAAK,GAAG,CAAC,CAAC;QACrE,CAAC;QACD,OAAO,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,EAAE,CAAC;IAClD,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,yBAAyB,IAAI,eAAe,KAAK,GAAG,CAAC,CAAC;AACxE,CAAC;AAED;;;GAGG;AACH,SAAS,mBAAmB,CAC1B,MAAkC,EAClC,OAA0B;IAE1B,MAAM,UAAU,GAA4C,EAAE,CAAC;IAC/D,MAAM,KAAK,GAAG,MAAM,CAAC,KAA2C,CAAC;IACjE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QAC9B,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YAC3B,SAAS;QACX,CAAC;QACD,UAAU,CAAC,KAAK,CAAC,GAAG,oBAAoB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAC5D,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,UAAU,EAAE,QAAQ,EAAE,CAAC,GAAG,OAAO,CAAC,EAAE,CAAC;AAChE,CAAC;AAED;;;GAGG;AACH,SAAS,kBAAkB,CACzB,IAAsB,EACtB,OAA6B;IAE7B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAS,OAA4B,CAAC,CAAC;IACjE,MAAM,KAAK,GAA4B,EAAE,CAAC;IAC1C,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QAChD,IAAI,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,SAAS;QACX,CAAC;QACD,IAAI,KAAK,KAAK,IAAI,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;YAC1C,SAAS;QACX,CAAC;QACD,KAAK,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACrB,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;;;GAIG;AACH,SAAS,iBAAiB,CAAC,OAAe,EAAE,WAAoC;IAC9E,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,OAAO,CAAC;IACjB,CAAC;IACD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;IAClF,OAAO,uBAAuB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,OAAO,EAAE,CAAC;AACjE,CAAC;AAED;;;;GAIG;AACH,SAAS,SAAS,CAChB,GAAY;IAEZ,IAAI,SAAS,GAAY,GAAG,CAAC;IAC7B,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;QAC5B,IAAI,CAAC;YACH,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;QACnD,CAAC;IACH,CAAC;IACD,IACE,SAAS,KAAK,IAAI;QAClB,OAAO,SAAS,KAAK,QAAQ;QAC7B,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EACxB,CAAC;QACD,OAAO,EAAE,OAAO,EAAE,4BAA4B,EAAE,CAAC;IACnD,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,SAAoC,EAAE,CAAC;AAC1D,CAAC;AAED;;;GAGG;AACH,SAAS,qBAAqB,CAC5B,MAAkC,EAClC,OAA0B,EAC1B,MAA+B;IAE/B,MAAM,KAAK,GAAG,MAAM,CAAC,KAAkC,CAAC;IACxD,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,IAAI,CAAC,CAAC,KAAK,IAAI,MAAM,CAAC,EAAE,CAAC;YACvB,SAAS;QACX,CAAC;QACD,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC;QACjC,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;YAC9B,SAAS;QACX,CAAC;QACD,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;QACpD,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YACnB,MAAM,CAAC,KAAK,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC;QAC9B,CAAC;aAAM,CAAC;YACN,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,eAAe,CAAC;YAClE,QAAQ,CAAC,IAAI,CAAC,SAAS,KAAK,KAAK,MAAM,EAAE,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC9B,CAAC;AAED;;;;GAIG;AACH,SAAS,qBAAqB,CAC5B,MAA+B,EAC/B,OAA0B;IAE1B,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IACpC,OAAO,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;AACnE,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,MAAM,MAAM,GAAG;IACpB;;;;;;;;;;;;;;;;;;OAkBG;IACH,KAAK,CACH,MAAS,EACT,OAA+D,EAC/D,OAAe,EACf,OAAmC;QAGnC,MAAM,OAAO,GAAG,OAAO,CAAC,OAA4B,CAAC;QACrD,MAAM,cAAc,GAAG,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC5D,MAAM,WAAW,GAAG,kBAAkB,CAAO,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QAC5E,MAAM,WAAW,GAAG,iBAAiB,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;QAC5D,OAAO;YACL,YAAY,EAAE,OAAO,EAAE,YAAY,IAAI,qBAAqB;YAC5D,WAAW;YACX,cAAc;YACd,WAAW;SACZ,CAAC;IACJ,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CACH,MAAS,EACT,OAAsC,EACtC,GAAY;QAEZ,MAAM,WAAW,GAAG,OAA4B,CAAC;QACjD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC;QAC/B,IAAI,SAAS,IAAI,OAAO,EAAE,CAAC;YACzB,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACrD,CAAC;QACD,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,qBAAqB,CAChD,MAAM,EACN,WAAW,EACX,OAAO,CAAC,MAAM,CACf,CAAC;QACF,MAAM,UAAU,GAAG,qBAAqB,CAAC,OAAO,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QACtE,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,QAAQ,CAAC,IAAI,CAAC,8BAA8B,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;QACD,OAAO,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC;IACjE,CAAC;CACF,CAAC"}
@@ -0,0 +1,73 @@
1
+ import type { z } from 'zod';
2
+ import type { Logger } from './types/logger.types.js';
3
+ import type { ExtractionRule, RuleMatch, RulesResult } from './types/rule.types.js';
4
+ /**
5
+ * Namespace bundling every primitive used to declare and run deterministic
6
+ * extraction rules.
7
+ */
8
+ export declare const rule: {
9
+ /**
10
+ * Declare a deterministic extraction rule targeting a single schema field.
11
+ *
12
+ * The `extract` callback receives the raw content and must return either a
13
+ * {@link RuleMatch} or `null` when the rule does not apply.
14
+ *
15
+ * @param field - Name of the schema field the rule writes to.
16
+ * @param extract - Callback that inspects the content and proposes a value.
17
+ * @returns An {@link ExtractionRule} ready to be passed to {@link rule.apply}.
18
+ */
19
+ create(field: string, extract: (content: string) => RuleMatch<unknown> | null): ExtractionRule;
20
+ /**
21
+ * Shortcut to build a regex-based {@link ExtractionRule}. On match, the
22
+ * value is taken from capture group 1 (or the full match if none), then
23
+ * optionally passed through a `transform` callback.
24
+ *
25
+ * @typeParam T - Type produced by `transform`, defaults to `string`.
26
+ * @param field - Name of the schema field the rule writes to.
27
+ * @param pattern - Regular expression to evaluate against the content.
28
+ * @param confidenceScore - Confidence score assigned on a successful match.
29
+ * @param transform - Optional mapper from the raw `RegExpMatchArray` to a value.
30
+ * @returns An {@link ExtractionRule} ready to be passed to {@link rule.apply}.
31
+ */
32
+ regex<T = string>(field: string, pattern: RegExp, confidenceScore: number, transform?: (match: RegExpMatchArray) => T): ExtractionRule;
33
+ /**
34
+ * Build a {@link RuleMatch} from a value and a confidence score. Syntactic
35
+ * sugar used inside custom rule callbacks to avoid writing the object literal
36
+ * by hand.
37
+ *
38
+ * @typeParam T - Type of the extracted value.
39
+ * @param value - The extracted value.
40
+ * @param score - Confidence score in `[0, 1]`.
41
+ * @returns A {@link RuleMatch} wrapping `value` and `score`.
42
+ *
43
+ * @example
44
+ * ```ts
45
+ * const ageRule = rule.create('age', (text) => {
46
+ * const match = text.match(/(\d+)\s*years/);
47
+ * return match ? rule.confidence(Number(match[1]), 0.9) : null;
48
+ * });
49
+ * ```
50
+ */
51
+ confidence<T>(value: T, score: number): RuleMatch<T>;
52
+ /**
53
+ * Run every deterministic rule against `content`, collect their matches,
54
+ * resolve collisions by confidence, and type-check each candidate against
55
+ * the Zod schema before accepting it.
56
+ *
57
+ * Behavior:
58
+ * - Rules targeting a field absent from the schema are silently skipped.
59
+ * - On field collisions, the highest-confidence match wins; ties favor the
60
+ * first-declared rule.
61
+ * - Values failing the per-field Zod `safeParse` are discarded and the
62
+ * field falls back to `missing`. An optional logger receives a warning.
63
+ *
64
+ * @typeParam S - A Zod object schema.
65
+ * @param content - Raw content to extract from (typically markdown or text).
66
+ * @param rules - Deterministic rules to evaluate.
67
+ * @param schema - Zod object schema describing the target data shape.
68
+ * @param logger - Optional logger notified when a value is rejected.
69
+ * @returns The deterministic extraction result (values, confidence, missing).
70
+ */
71
+ apply<S extends z.ZodObject<z.ZodRawShape>>(content: string, rules: ExtractionRule[], schema: S, logger?: Logger): RulesResult<z.infer<S>>;
72
+ };
73
+ //# sourceMappingURL=rules.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"rules.d.ts","sourceRoot":"","sources":["../src/rules.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,yBAAyB,CAAC;AACtD,OAAO,KAAK,EAAE,cAAc,EAAE,SAAS,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAC;AAEpF;;;GAGG;AACH,eAAO,MAAM,IAAI;IACf;;;;;;;;;OASG;kBAEM,MAAM,WACJ,CAAC,OAAO,EAAE,MAAM,KAAK,SAAS,CAAC,OAAO,CAAC,GAAG,IAAI,GACtD,cAAc;IAIjB;;;;;;;;;;;OAWG;UACG,CAAC,kBACE,MAAM,WACJ,MAAM,mBACE,MAAM,cACX,CAAC,KAAK,EAAE,gBAAgB,KAAK,CAAC,GACzC,cAAc;IAcjB;;;;;;;;;;;;;;;;;OAiBG;eACQ,CAAC,SAAS,CAAC,SAAS,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC;IAIpD;;;;;;;;;;;;;;;;;;OAkBG;UACG,CAAC,SAAS,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,WAC/B,MAAM,SACR,cAAc,EAAE,UACf,CAAC,WACA,MAAM,GACd,WAAW,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;CAqC3B,CAAC"}