@metaobjectsdev/render 0.8.1-rc.1 → 0.9.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/dist/email-document.d.ts +7 -0
  2. package/dist/email-document.d.ts.map +1 -0
  3. package/dist/email-document.js +2 -0
  4. package/dist/email-document.js.map +1 -0
  5. package/dist/extract/coerce.d.ts +15 -0
  6. package/dist/extract/coerce.d.ts.map +1 -0
  7. package/dist/{recover → extract}/coerce.js +87 -13
  8. package/dist/extract/coerce.js.map +1 -0
  9. package/dist/{recover/recover-map.d.ts → extract/extract-map.d.ts} +1 -1
  10. package/dist/{recover/recover-map.d.ts.map → extract/extract-map.d.ts.map} +1 -1
  11. package/dist/{recover/recover-map.js → extract/extract-map.js} +3 -3
  12. package/dist/{recover/recover-map.js.map → extract/extract-map.js.map} +1 -1
  13. package/dist/extract/extract.d.ts +4 -0
  14. package/dist/extract/extract.d.ts.map +1 -0
  15. package/dist/extract/extract.js +157 -0
  16. package/dist/extract/extract.js.map +1 -0
  17. package/dist/{recover → extract}/json-forgiving-reader.d.ts.map +1 -1
  18. package/dist/{recover → extract}/json-forgiving-reader.js +1 -1
  19. package/dist/{recover → extract}/json-forgiving-reader.js.map +1 -1
  20. package/dist/{recover → extract}/locate.d.ts.map +1 -1
  21. package/dist/{recover → extract}/locate.js.map +1 -1
  22. package/dist/extract/normalize.d.ts +4 -0
  23. package/dist/extract/normalize.d.ts.map +1 -0
  24. package/dist/extract/normalize.js +22 -0
  25. package/dist/extract/normalize.js.map +1 -0
  26. package/dist/extract/strip.d.ts.map +1 -0
  27. package/dist/{recover → extract}/strip.js.map +1 -1
  28. package/dist/extract/types.d.ts +160 -0
  29. package/dist/extract/types.d.ts.map +1 -0
  30. package/dist/extract/types.js +221 -0
  31. package/dist/extract/types.js.map +1 -0
  32. package/dist/{recover → extract}/xml-forgiving-reader.d.ts.map +1 -1
  33. package/dist/{recover → extract}/xml-forgiving-reader.js +1 -1
  34. package/dist/{recover → extract}/xml-forgiving-reader.js.map +1 -1
  35. package/dist/index.d.ts +4 -3
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +4 -4
  38. package/dist/index.js.map +1 -1
  39. package/dist/prompt/output-format-renderer.d.ts.map +1 -1
  40. package/dist/prompt/output-format-renderer.js +113 -59
  41. package/dist/prompt/output-format-renderer.js.map +1 -1
  42. package/dist/prompt/output-format-spec.d.ts +1 -1
  43. package/dist/prompt/prompt-field.d.ts +1 -1
  44. package/package.json +1 -1
  45. package/src/email-document.ts +6 -0
  46. package/src/extract/KNOWN_GAPS.md +59 -0
  47. package/src/extract/coerce.ts +224 -0
  48. package/src/{recover/recover-map.ts → extract/extract-map.ts} +2 -2
  49. package/src/extract/extract.ts +187 -0
  50. package/src/{recover → extract}/json-forgiving-reader.ts +1 -1
  51. package/src/extract/normalize.ts +23 -0
  52. package/src/extract/types.ts +346 -0
  53. package/src/{recover → extract}/xml-forgiving-reader.ts +1 -1
  54. package/src/index.ts +17 -11
  55. package/src/prompt/output-format-renderer.ts +140 -61
  56. package/src/prompt/output-format-spec.ts +1 -1
  57. package/src/prompt/prompt-field.ts +1 -1
  58. package/dist/recover/coerce.d.ts +0 -5
  59. package/dist/recover/coerce.d.ts.map +0 -1
  60. package/dist/recover/coerce.js.map +0 -1
  61. package/dist/recover/recover.d.ts +0 -4
  62. package/dist/recover/recover.d.ts.map +0 -1
  63. package/dist/recover/recover.js +0 -115
  64. package/dist/recover/recover.js.map +0 -1
  65. package/dist/recover/strip.d.ts.map +0 -1
  66. package/dist/recover/types.d.ts +0 -117
  67. package/dist/recover/types.d.ts.map +0 -1
  68. package/dist/recover/types.js +0 -124
  69. package/dist/recover/types.js.map +0 -1
  70. package/src/recover/KNOWN_GAPS.md +0 -35
  71. package/src/recover/coerce.ts +0 -141
  72. package/src/recover/recover.ts +0 -146
  73. package/src/recover/types.ts +0 -217
  74. /package/dist/{recover → extract}/json-forgiving-reader.d.ts +0 -0
  75. /package/dist/{recover → extract}/locate.d.ts +0 -0
  76. /package/dist/{recover → extract}/locate.js +0 -0
  77. /package/dist/{recover → extract}/strip.d.ts +0 -0
  78. /package/dist/{recover → extract}/strip.js +0 -0
  79. /package/dist/{recover → extract}/xml-forgiving-reader.d.ts +0 -0
  80. /package/src/{recover → extract}/locate.ts +0 -0
  81. /package/src/{recover → extract}/strip.ts +0 -0
@@ -0,0 +1,187 @@
1
+ // Public entry point. Runs the staged pipeline; NEVER throws. Mirrors Java Extract.
2
+
3
+ import {
4
+ Format,
5
+ FieldKind,
6
+ FieldExtraction,
7
+ Tolerance,
8
+ normalizeOptions,
9
+ } from "./types.js";
10
+ import type { FieldSpec, ExtractOptions, ExtractionOutcome, ExtractSchema } from "./types.js";
11
+ import { ExtractionReport } from "./types.js";
12
+ import { strip } from "./strip.js";
13
+ import { locateJson, locateXml } from "./locate.js";
14
+ import { readJson, TRUNCATED } from "./json-forgiving-reader.js";
15
+ import { readXml } from "./xml-forgiving-reader.js";
16
+ import { coerceValue, scalarCoerce, MALFORMED } from "./coerce.js";
17
+
18
+ /** The forgiving entry point: extract dirty `text` against `schema`. Never throws. */
19
+ export function extract(
20
+ text: string | null | undefined,
21
+ schema: ExtractSchema,
22
+ opts?: Partial<ExtractOptions> | null,
23
+ ): ExtractionOutcome {
24
+ const o = normalizeOptions(opts);
25
+ const report = new ExtractionReport();
26
+ const data: Record<string, unknown> = {};
27
+
28
+ const stripped = strip(text);
29
+ const ci = o.tolerance !== Tolerance.STRICT;
30
+
31
+ const span =
32
+ schema.format === Format.JSON ? locateJson(stripped) : locateXml(stripped, schema.rootName, ci);
33
+
34
+ let raw: Record<string, unknown>;
35
+ if (span == null) {
36
+ raw = {};
37
+ } else if (schema.format === Format.JSON) {
38
+ raw = readJson(span);
39
+ } else {
40
+ raw = readXml(span, ci);
41
+ }
42
+
43
+ if (isEmptyRecord(raw) && (stripped.length === 0 || span == null)) {
44
+ report.markEmpty();
45
+ }
46
+
47
+ extractFields(schema.fields, raw, "", data, report, o, ci);
48
+ return { data, report };
49
+ }
50
+
51
+ function extractFields(
52
+ fields: readonly FieldSpec[],
53
+ raw: Record<string, unknown>,
54
+ prefix: string,
55
+ data: Record<string, unknown>,
56
+ report: ExtractionReport,
57
+ o: ExtractOptions,
58
+ ci: boolean,
59
+ ): void {
60
+ for (const f of fields) {
61
+ const path = prefix.length === 0 ? f.name : `${prefix}.${f.name}`;
62
+ const present = lookup(raw, f.name, ci);
63
+ if (present === undefined) {
64
+ // FR-011 / Phase B: an absent field with a declared @default fills the value → DEFAULTED
65
+ // (which satisfies a @required field). Generalized to all field kinds: an enum default is
66
+ // its member string as-is; a non-enum default is coerced to the field's kind via the pure
67
+ // scalar coerce (so @default "0" on field.int yields integer 0). A non-coercible non-enum
68
+ // default is treated as no default.
69
+ if (f.defaultValue != null) {
70
+ const coerced =
71
+ f.kind === FieldKind.ENUM ? f.defaultValue : scalarCoerce(f.defaultValue, f);
72
+ if (coerced !== MALFORMED) {
73
+ data[f.name] = coerced;
74
+ report.addCoercion({ fieldPath: path, from: "", to: f.defaultValue, kind: "default" });
75
+ report.set(path, FieldExtraction.DEFAULTED);
76
+ continue;
77
+ }
78
+ }
79
+ report.set(path, f.required ? FieldExtraction.LOST_REQUIRED : FieldExtraction.LOST_OPTIONAL);
80
+ continue;
81
+ }
82
+ if (present === TRUNCATED) {
83
+ // present-but-garbled (empty/cut-off value)
84
+ report.set(path, FieldExtraction.MALFORMED);
85
+ continue;
86
+ }
87
+ if (f.array) {
88
+ // An array field: a single non-list value is treated as a one-element array
89
+ // (e.g. a single repeated-XML tag). Each element is coerced/recursed independently.
90
+ const elements: unknown[] = Array.isArray(present) ? present : [present];
91
+ const out: unknown[] = [];
92
+ let anyMalformed = false;
93
+ // Phase B (array-of-enum): an enum element flows through the SAME enum coercion pipeline a
94
+ // scalar enum uses (extractValue → coerceValue → coerceEnum), and is CLASSIFIED per element
95
+ // by indexed path (tags[0], tags[1], …) exactly as a scalar enum: EXTRACTED / DEFAULTED (via
96
+ // @coerceDefault) / MALFORMED. Non-enum scalar arrays keep their existing behavior (raw
97
+ // element list, no per-element states).
98
+ const enumElements = f.kind === FieldKind.ENUM;
99
+ for (let idx = 0; idx < elements.length; idx++) {
100
+ const elemPath = `${path}[${idx}]`;
101
+ const v = extractValue(f, elements[idx], elemPath, report, o, ci);
102
+ if (v === MALFORMED) {
103
+ anyMalformed = true;
104
+ if (enumElements) report.set(elemPath, FieldExtraction.MALFORMED);
105
+ } else {
106
+ out.push(v);
107
+ if (enumElements) report.set(elemPath, classifyCoerced(elemPath, report));
108
+ }
109
+ }
110
+ // Cross-port contract: a MALFORMED array still places its successfully-coerced
111
+ // elements into data (partial extraction), UNLIKE a MALFORMED scalar which is absent.
112
+ data[f.name] = out;
113
+ report.set(path, anyMalformed ? FieldExtraction.MALFORMED : FieldExtraction.EXTRACTED);
114
+ continue;
115
+ }
116
+ if (Array.isArray(present)) {
117
+ // a list where a singular value was expected
118
+ report.set(path, FieldExtraction.MALFORMED);
119
+ continue;
120
+ }
121
+ const v = extractValue(f, present, path, report, o, ci);
122
+ if (v === MALFORMED) {
123
+ report.set(path, FieldExtraction.MALFORMED);
124
+ } else {
125
+ data[f.name] = v;
126
+ // FR-011: a value reached via @coerceDefault (or @default) is DEFAULTED, not EXTRACTED.
127
+ report.set(path, classifyCoerced(path, report));
128
+ }
129
+ }
130
+ }
131
+
132
+ /**
133
+ * FR-011: classify a successfully-coerced field. DEFAULTED when its terminal (last-logged)
134
+ * coercion for this path is a default-class fallback; EXTRACTED otherwise. Nested objects
135
+ * (which log no coercion of their own) classify as EXTRACTED.
136
+ */
137
+ function classifyCoerced(path: string, report: ExtractionReport): FieldExtraction {
138
+ let terminalKind: string | null = null;
139
+ for (const c of report.coercions()) if (c.fieldPath === path) terminalKind = c.kind;
140
+ return terminalKind === "coerceDefault" || terminalKind === "default"
141
+ ? FieldExtraction.DEFAULTED
142
+ : FieldExtraction.EXTRACTED;
143
+ }
144
+
145
+ /** Coerce one (non-array) element: nested-object recursion or scalar coercion. Returns MALFORMED on failure. */
146
+ function extractValue(
147
+ f: FieldSpec,
148
+ present: unknown,
149
+ path: string,
150
+ report: ExtractionReport,
151
+ o: ExtractOptions,
152
+ ci: boolean,
153
+ ): unknown | typeof MALFORMED {
154
+ if (f.kind === FieldKind.OBJECT) {
155
+ if (f.nested != null && isPlainObject(present)) {
156
+ const nestedData: Record<string, unknown> = {};
157
+ extractFields(f.nested.fields, present as Record<string, unknown>, path, nestedData, report, o, ci);
158
+ return nestedData;
159
+ }
160
+ return MALFORMED; // object expected but scalar/non-map present
161
+ }
162
+ const rawStr = typeof present === "string" ? present : stringifyScalar(present);
163
+ return coerceValue(rawStr, f, o, path, report);
164
+ }
165
+
166
+ /** Case-folding lookup honoring tolerance. Returns `undefined` for absent (mirrors Java null). */
167
+ function lookup(raw: Record<string, unknown>, name: string, ci: boolean): unknown {
168
+ if (Object.prototype.hasOwnProperty.call(raw, name)) return raw[name];
169
+ if (ci) {
170
+ const lower = name.toLowerCase();
171
+ for (const k of Object.keys(raw)) if (k.toLowerCase() === lower) return raw[k];
172
+ }
173
+ return undefined;
174
+ }
175
+
176
+ function isPlainObject(o: unknown): boolean {
177
+ return typeof o === "object" && o !== null && !Array.isArray(o);
178
+ }
179
+
180
+ function isEmptyRecord(o: Record<string, unknown>): boolean {
181
+ return Object.keys(o).length === 0;
182
+ }
183
+
184
+ /** Mirror Java String.valueOf for non-string forgiving-reader scalars. */
185
+ function stringifyScalar(v: unknown): string {
186
+ return String(v);
187
+ }
@@ -2,7 +2,7 @@
2
2
  // Mirrors Java JsonForgivingReader. The no-hang + TRUNCATED contracts are load-bearing.
3
3
 
4
4
  /** Sentinel: a key appeared in the text but its value was empty/cut-off (present-but-garbled). */
5
- export const TRUNCATED: unique symbol = Symbol("recover.json.TRUNCATED");
5
+ export const TRUNCATED: unique symbol = Symbol("extract.json.TRUNCATED");
6
6
 
7
7
  /** A character is JSON-insignificant whitespace. Mirrors Java Character.isWhitespace closely enough for the corpus. */
8
8
  function isWhitespace(c: string): boolean {
@@ -0,0 +1,23 @@
1
+ // FR-011: enum-variant normalization for the Coerce stage.
2
+ // ASCII-only by design: enum members are ASCII identifiers, so a pure [A-Za-z0-9]
3
+ // transform is byte-identical across ports and sidesteps locale case-folding (Turkish-İ).
4
+ // Mode comes from the @normalize attr (none|collapse|strip; default strip).
5
+
6
+ export type NormalizeMode = "none" | "collapse" | "strip";
7
+
8
+ /** ASCII-only enum normalization. Pure [A-Za-z0-9] transform → byte-identical cross-port. */
9
+ export function normalizeEnum(s: string, mode: NormalizeMode): string {
10
+ if (mode === "none") return s;
11
+ const up = asciiUpper(s.trim());
12
+ if (mode === "collapse") return up.replace(/[\s_-]+/g, "_");
13
+ return up.replace(/[^A-Z0-9]/g, ""); // strip
14
+ }
15
+
16
+ function asciiUpper(s: string): string {
17
+ let out = "";
18
+ for (let i = 0; i < s.length; i++) {
19
+ const c = s.charCodeAt(i);
20
+ out += c >= 97 && c <= 122 ? String.fromCharCode(c - 32) : s[i];
21
+ }
22
+ return out;
23
+ }
@@ -0,0 +1,346 @@
1
+ import type { NormalizeMode } from "./normalize.js";
2
+
3
+ // FR-010 extract engine — types & model (Tier-2 idiomatic TS port).
4
+ //
5
+ // Cross-port REFERENCE is the Java engine
6
+ // (server/java/render/.../extract/). This file ports the Java records/enums to
7
+ // idiomatic TS: enums become string-union `as const` objects (values match the
8
+ // corpus / Java enum names exactly), records become readonly interfaces +
9
+ // factory functions, and the mutable ExtractionReport is a class.
10
+
11
+ /** Output format the dirty text claims to be. Corpus schema.json uses "JSON"/"XML". */
12
+ export const Format = {
13
+ JSON: "JSON",
14
+ XML: "XML",
15
+ } as const;
16
+ export type Format = (typeof Format)[keyof typeof Format];
17
+
18
+ /** The coercion target kinds the engine understands. OBJECT = nested ExtractSchema. */
19
+ export const FieldKind = {
20
+ STRING: "STRING",
21
+ INT: "INT",
22
+ LONG: "LONG",
23
+ DOUBLE: "DOUBLE",
24
+ BOOLEAN: "BOOLEAN",
25
+ ENUM: "ENUM",
26
+ OBJECT: "OBJECT",
27
+ } as const;
28
+ export type FieldKind = (typeof FieldKind)[keyof typeof FieldKind];
29
+
30
+ /**
31
+ * FROZEN cross-port per-field extraction classification. Do not reorder or add
32
+ * without an ADR. These string values are SERIALIZED in the conformance corpus.
33
+ */
34
+ export const FieldExtraction = {
35
+ EXTRACTED: "EXTRACTED",
36
+ // A `@default`/`@coerceDefault`-backed value (absent-fill or present-but-uncoercible fallback).
37
+ DEFAULTED: "DEFAULTED",
38
+ LOST_OPTIONAL: "LOST_OPTIONAL",
39
+ LOST_REQUIRED: "LOST_REQUIRED",
40
+ MALFORMED: "MALFORMED",
41
+ } as const;
42
+ export type FieldExtraction = (typeof FieldExtraction)[keyof typeof FieldExtraction];
43
+
44
+ /**
45
+ * STRICT: case-sensitive, minimal repair. NORMAL: case-insensitive keys/tags
46
+ * (default). LOOSE: maximal repair (currently identical to NORMAL — reserved).
47
+ */
48
+ export const Tolerance = {
49
+ STRICT: "STRICT",
50
+ NORMAL: "NORMAL",
51
+ LOOSE: "LOOSE",
52
+ } as const;
53
+ export type Tolerance = (typeof Tolerance)[keyof typeof Tolerance];
54
+
55
+ /** A recorded normalization/coercion. kind e.g. "normalize", "alias", "runtime-alias-override", "clamp", "coerceDefault", "default". */
56
+ export interface Coercion {
57
+ readonly fieldPath: string;
58
+ readonly from: string;
59
+ readonly to: string;
60
+ readonly kind: string;
61
+ }
62
+
63
+ /**
64
+ * One field's extract descriptor. enumValues/enumAlias non-null only for ENUM;
65
+ * min/max non-null only for numeric range constraints; nested non-null only for OBJECT.
66
+ */
67
+ export interface FieldSpec {
68
+ readonly name: string;
69
+ readonly kind: FieldKind;
70
+ readonly required: boolean;
71
+ readonly array: boolean;
72
+ readonly enumValues: readonly string[] | null;
73
+ readonly enumAlias: Readonly<Record<string, string>> | null;
74
+ readonly min: number | null;
75
+ readonly max: number | null;
76
+ readonly nested: ExtractSchema | null;
77
+ /** FR-011: present-but-uncoercible fallback member (from `@coerceDefault`). ENUM-only; null = none. */
78
+ readonly coerceDefault: string | null;
79
+ /**
80
+ * Absent-fill default (from `@default`). When the field is ABSENT, extract fills this value
81
+ * → DEFAULTED (which satisfies `@required`). Generalized to ALL field kinds (Phase B): for an
82
+ * enum it is the member string verbatim; for a non-enum it is coerced to `kind` via the pure
83
+ * scalar coerce (so `@default "0"` on `field.int` yields integer 0). null = no default.
84
+ */
85
+ readonly defaultValue: string | null;
86
+ /** FR-011: resolved enum normalization mode (from `@normalize`; default `"strip"`). */
87
+ readonly normalize: NormalizeMode;
88
+ }
89
+
90
+ /**
91
+ * A scalar field, optionally carrying an absent-fill `@default` (Phase B — generalized
92
+ * `@default`). When ABSENT from the model response, extract coerces `defaultValue` to `kind`
93
+ * and classifies the field DEFAULTED (which satisfies `@required`). `defaultValue == null` is
94
+ * the no-default case (back-compat with the original two-arg call).
95
+ */
96
+ export function scalar(
97
+ name: string,
98
+ kind: FieldKind,
99
+ required: boolean,
100
+ defaultValue?: string | null,
101
+ ): FieldSpec {
102
+ return {
103
+ name,
104
+ kind,
105
+ required,
106
+ array: false,
107
+ enumValues: null,
108
+ enumAlias: null,
109
+ min: null,
110
+ max: null,
111
+ nested: null,
112
+ coerceDefault: null,
113
+ defaultValue: defaultValue ?? null,
114
+ normalize: "strip",
115
+ };
116
+ }
117
+
118
+ export function enumField(
119
+ name: string,
120
+ required: boolean,
121
+ values: readonly string[] | null,
122
+ aliases: Readonly<Record<string, string>> | null,
123
+ coerceDefault?: string | null,
124
+ normalize: NormalizeMode = "strip",
125
+ defaultValue?: string | null,
126
+ ): FieldSpec {
127
+ return {
128
+ name,
129
+ kind: FieldKind.ENUM,
130
+ required,
131
+ array: false,
132
+ enumValues: values == null ? null : [...values],
133
+ enumAlias: aliases == null ? {} : { ...aliases },
134
+ min: null,
135
+ max: null,
136
+ nested: null,
137
+ coerceDefault: coerceDefault ?? null,
138
+ defaultValue: defaultValue ?? null,
139
+ normalize,
140
+ };
141
+ }
142
+
143
+ /**
144
+ * Phase B (array-of-enum): an enum field that is a list (`array === true`). Each element flows
145
+ * through the SAME enum coercion pipeline a scalar enum uses (exact → normalize → `@enumAlias`
146
+ * → `@coerceDefault` → MALFORMED) and is classified independently by indexed path (`tags[0]`,
147
+ * `tags[1]`, …). Mirrors {@link enumField} but with `array = true`.
148
+ */
149
+ export function enumArray(
150
+ name: string,
151
+ required: boolean,
152
+ values: readonly string[] | null,
153
+ aliases: Readonly<Record<string, string>> | null,
154
+ coerceDefault?: string | null,
155
+ normalize: NormalizeMode = "strip",
156
+ defaultValue?: string | null,
157
+ ): FieldSpec {
158
+ return {
159
+ name,
160
+ kind: FieldKind.ENUM,
161
+ required,
162
+ array: true,
163
+ enumValues: values == null ? null : [...values],
164
+ enumAlias: aliases == null ? {} : { ...aliases },
165
+ min: null,
166
+ max: null,
167
+ nested: null,
168
+ coerceDefault: coerceDefault ?? null,
169
+ defaultValue: defaultValue ?? null,
170
+ normalize,
171
+ };
172
+ }
173
+
174
+ export function range(
175
+ name: string,
176
+ kind: FieldKind,
177
+ required: boolean,
178
+ min: number | null,
179
+ max: number | null,
180
+ ): FieldSpec {
181
+ return {
182
+ name,
183
+ kind,
184
+ required,
185
+ array: false,
186
+ enumValues: null,
187
+ enumAlias: null,
188
+ min,
189
+ max,
190
+ nested: null,
191
+ coerceDefault: null,
192
+ defaultValue: null,
193
+ normalize: "strip",
194
+ };
195
+ }
196
+
197
+ export function object(name: string, required: boolean, array: boolean, nested: ExtractSchema | null): FieldSpec {
198
+ return {
199
+ name,
200
+ kind: FieldKind.OBJECT,
201
+ required,
202
+ array,
203
+ enumValues: null,
204
+ enumAlias: null,
205
+ min: null,
206
+ max: null,
207
+ nested,
208
+ coerceDefault: null,
209
+ defaultValue: null,
210
+ normalize: "strip",
211
+ };
212
+ }
213
+
214
+ /** Top-level extract descriptor. rootName = the XML root tag / logical JSON root name. */
215
+ export interface ExtractSchema {
216
+ readonly format: Format;
217
+ readonly rootName: string;
218
+ readonly fields: readonly FieldSpec[];
219
+ }
220
+
221
+ export function extractSchema(format: Format, rootName: string, fields: readonly FieldSpec[] | null): ExtractSchema {
222
+ return { format, rootName, fields: fields == null ? [] : [...fields] };
223
+ }
224
+
225
+ /**
226
+ * ctx carries the field path and the FieldSpec; return null to fall through to
227
+ * default coercion. The single bespoke-coercion hook (the bounded "20%").
228
+ */
229
+ export type OnField = (fieldPath: string, rawValue: string, spec: FieldSpec) => unknown | null;
230
+
231
+ /**
232
+ * Bounded runtime override surface. aliases/normalizers are MERGED with the
233
+ * schema's, runtime winning on key conflict. onField is the single hook.
234
+ */
235
+ export interface ExtractOptions {
236
+ readonly tolerance: Tolerance;
237
+ readonly aliases: Readonly<Record<string, string>>;
238
+ readonly normalizers: Readonly<Record<string, (raw: string) => unknown | null>>;
239
+ readonly onField: OnField | null;
240
+ }
241
+
242
+ export function defaults(): ExtractOptions {
243
+ return { tolerance: Tolerance.NORMAL, aliases: {}, normalizers: {}, onField: null };
244
+ }
245
+
246
+ /** Normalize a partial / undefined options bag into a complete ExtractOptions. */
247
+ export function normalizeOptions(opts?: Partial<ExtractOptions> | null): ExtractOptions {
248
+ if (opts == null) return defaults();
249
+ return {
250
+ tolerance: opts.tolerance ?? Tolerance.NORMAL,
251
+ aliases: opts.aliases == null ? {} : { ...opts.aliases },
252
+ normalizers: opts.normalizers == null ? {} : { ...opts.normalizers },
253
+ onField: opts.onField ?? null,
254
+ };
255
+ }
256
+
257
+ /** Engine return. data is a forgiving record; Phase-2 codegen wraps it into a typed ExtractionResult<T>. */
258
+ export interface ExtractionOutcome {
259
+ readonly data: Record<string, unknown>;
260
+ readonly report: ExtractionReport;
261
+ }
262
+
263
+ /** Typed result of a generated extract(...): best-effort value (null where lost/malformed) + report. */
264
+ export interface ExtractionResult<T> {
265
+ readonly data: T | null;
266
+ readonly report: ExtractionReport;
267
+ }
268
+
269
+ /**
270
+ * Thrown by {@link orThrow} when a {@link ExtractionResult} lost a `@required` field. Mirrors
271
+ * Java's `ExtractException`. Carries the list of lost-required field paths.
272
+ */
273
+ export class ExtractError extends Error {
274
+ readonly lostRequired: readonly string[];
275
+ constructor(lostRequired: readonly string[]) {
276
+ super(`extract: required field(s) lost: ${lostRequired.join(", ")}`);
277
+ this.name = "ExtractError";
278
+ this.lostRequired = [...lostRequired];
279
+ }
280
+ }
281
+
282
+ /**
283
+ * Opt-in strictness over a never-throwing {@link ExtractionResult}. Mirrors Java
284
+ * `ExtractionResult.orThrow()`. Throws a {@link ExtractError} iff the report has a lost
285
+ * `@required` field; otherwise returns `result.data`.
286
+ *
287
+ * <p>TS divergence from Java (documented): `ExtractionResult` is a plain interface (the generated
288
+ * output-parsers build it as an object literal), so `orThrow` is a free function rather than a
289
+ * method on the result. Semantics are identical.</p>
290
+ */
291
+ export function orThrow<T>(result: ExtractionResult<T>): T | null {
292
+ if (result.report.hasLostRequired()) {
293
+ throw new ExtractError(result.report.lostRequired());
294
+ }
295
+ return result.data;
296
+ }
297
+
298
+ /** Mutable accumulator of per-field extraction classification, the degenerate-response flag, and coercion notes. */
299
+ export class ExtractionReport {
300
+ // Insertion-ordered (Map preserves insertion order, mirroring Java LinkedHashMap).
301
+ private readonly _states = new Map<string, FieldExtraction>();
302
+ private readonly _coercions: Coercion[] = [];
303
+ private _empty = false;
304
+
305
+ set(fieldPath: string, state: FieldExtraction): void {
306
+ this._states.set(fieldPath, state);
307
+ }
308
+
309
+ addCoercion(c: Coercion): void {
310
+ this._coercions.push(c);
311
+ }
312
+
313
+ markEmpty(): void {
314
+ this._empty = true;
315
+ }
316
+
317
+ isEmpty(): boolean {
318
+ return this._empty;
319
+ }
320
+
321
+ states(): ReadonlyMap<string, FieldExtraction> {
322
+ return new Map(this._states);
323
+ }
324
+
325
+ coercions(): readonly Coercion[] {
326
+ return [...this._coercions];
327
+ }
328
+
329
+ lostRequired(): string[] {
330
+ return this.byState(FieldExtraction.LOST_REQUIRED);
331
+ }
332
+
333
+ malformed(): string[] {
334
+ return this.byState(FieldExtraction.MALFORMED);
335
+ }
336
+
337
+ hasLostRequired(): boolean {
338
+ return this.lostRequired().length > 0;
339
+ }
340
+
341
+ private byState(s: FieldExtraction): string[] {
342
+ const out: string[] = [];
343
+ for (const [k, v] of this._states) if (v === s) out.push(k);
344
+ return out;
345
+ }
346
+ }
@@ -44,7 +44,7 @@ function parseChildren(inner: string, ci: boolean, out: Record<string, unknown>)
44
44
  contentEnd = close.index;
45
45
  next = close.index + close[0].length;
46
46
  } else {
47
- // unclosed tag: recover text up to the next sibling open tag
47
+ // unclosed tag: extract text up to the next sibling open tag
48
48
  const sib = matchFrom(OPEN_TAG_SRC, flags, inner, contentStart);
49
49
  if (sib != null) {
50
50
  contentEnd = sib.index;
package/src/index.ts CHANGED
@@ -12,28 +12,31 @@ export {
12
12
  type VerifyOptions,
13
13
  } from "./verify.js";
14
14
 
15
- // FR-010 tolerant recover engine (Tier-2 forgiving parser).
16
- export { recover } from "./recover/recover.js";
15
+ // FR-010 tolerant extract engine (Tier-2 forgiving parser).
16
+ export { extract } from "./extract/extract.js";
17
17
  export {
18
18
  Format,
19
19
  FieldKind,
20
- FieldRecovery,
20
+ FieldExtraction,
21
21
  Tolerance,
22
- RecoveryReport,
22
+ ExtractionReport,
23
23
  scalar,
24
24
  enumField,
25
+ enumArray,
25
26
  range,
26
27
  object,
27
- recoverSchema,
28
+ extractSchema,
28
29
  defaults,
30
+ orThrow,
31
+ ExtractError,
29
32
  type FieldSpec,
30
- type RecoverSchema,
31
- type RecoverOptions,
32
- type RecoverOutcome,
33
- type RecoveryResult,
33
+ type ExtractSchema,
34
+ type ExtractOptions,
35
+ type ExtractionOutcome,
36
+ type ExtractionResult,
34
37
  type Coercion,
35
38
  type OnField,
36
- } from "./recover/types.js";
39
+ } from "./extract/types.js";
37
40
  export {
38
41
  asString,
39
42
  asInt,
@@ -41,7 +44,7 @@ export {
41
44
  asDouble,
42
45
  asBool,
43
46
  asStringList,
44
- } from "./recover/recover-map.js";
47
+ } from "./extract/extract-map.js";
45
48
 
46
49
  // FR-010 artifact 1 — output-format prompt renderer ("produce your answer like this").
47
50
  export { renderOutputFormat } from "./prompt/output-format-renderer.js";
@@ -53,3 +56,6 @@ export {
53
56
  } from "./prompt/prompt-overrides.js";
54
57
  export type { OutputFormatSpec } from "./prompt/output-format-spec.js";
55
58
  export type { PromptField } from "./prompt/prompt-field.js";
59
+
60
+ // template.output render-helper result shape (shared per port).
61
+ export type { EmailDocument } from "./email-document.js";