@metaobjectsdev/render 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/extract/coerce.js +17 -8
  2. package/dist/extract/coerce.js.map +1 -1
  3. package/dist/extract/extract.d.ts.map +1 -1
  4. package/dist/extract/extract.js +35 -9
  5. package/dist/extract/extract.js.map +1 -1
  6. package/dist/extract/json-forgiving-reader.d.ts +7 -0
  7. package/dist/extract/json-forgiving-reader.d.ts.map +1 -1
  8. package/dist/extract/json-forgiving-reader.js +12 -1
  9. package/dist/extract/json-forgiving-reader.js.map +1 -1
  10. package/dist/extract/types.d.ts +19 -0
  11. package/dist/extract/types.d.ts.map +1 -1
  12. package/dist/extract/types.js +9 -1
  13. package/dist/extract/types.js.map +1 -1
  14. package/dist/extract/xml-forgiving-reader.d.ts +10 -0
  15. package/dist/extract/xml-forgiving-reader.d.ts.map +1 -1
  16. package/dist/extract/xml-forgiving-reader.js +96 -11
  17. package/dist/extract/xml-forgiving-reader.js.map +1 -1
  18. package/dist/index.d.ts +2 -2
  19. package/dist/index.d.ts.map +1 -1
  20. package/dist/index.js +2 -2
  21. package/dist/index.js.map +1 -1
  22. package/dist/recover/coerce.d.ts +5 -0
  23. package/dist/recover/coerce.d.ts.map +1 -0
  24. package/dist/recover/coerce.js +124 -0
  25. package/dist/recover/coerce.js.map +1 -0
  26. package/dist/recover/json-forgiving-reader.d.ts +5 -0
  27. package/dist/recover/json-forgiving-reader.d.ts.map +1 -0
  28. package/dist/recover/json-forgiving-reader.js +178 -0
  29. package/dist/recover/json-forgiving-reader.js.map +1 -0
  30. package/dist/recover/locate.d.ts +5 -0
  31. package/dist/recover/locate.d.ts.map +1 -0
  32. package/dist/recover/locate.js +75 -0
  33. package/dist/recover/locate.js.map +1 -0
  34. package/dist/recover/recover-map.d.ts +7 -0
  35. package/dist/recover/recover-map.d.ts.map +1 -0
  36. package/dist/recover/recover-map.js +36 -0
  37. package/dist/recover/recover-map.js.map +1 -0
  38. package/dist/recover/recover.d.ts +4 -0
  39. package/dist/recover/recover.d.ts.map +1 -0
  40. package/dist/recover/recover.js +115 -0
  41. package/dist/recover/recover.js.map +1 -0
  42. package/dist/recover/strip.d.ts +2 -0
  43. package/dist/recover/strip.d.ts.map +1 -0
  44. package/dist/recover/strip.js +17 -0
  45. package/dist/recover/strip.js.map +1 -0
  46. package/dist/recover/types.d.ts +117 -0
  47. package/dist/recover/types.d.ts.map +1 -0
  48. package/dist/recover/types.js +124 -0
  49. package/dist/recover/types.js.map +1 -0
  50. package/dist/recover/xml-forgiving-reader.d.ts +2 -0
  51. package/dist/recover/xml-forgiving-reader.d.ts.map +1 -0
  52. package/dist/recover/xml-forgiving-reader.js +79 -0
  53. package/dist/recover/xml-forgiving-reader.js.map +1 -0
  54. package/dist/verify.d.ts +24 -0
  55. package/dist/verify.d.ts.map +1 -1
  56. package/dist/verify.js +21 -5
  57. package/dist/verify.js.map +1 -1
  58. package/package.json +32 -21
  59. package/src/extract/coerce.ts +17 -8
  60. package/src/extract/extract.ts +35 -11
  61. package/src/extract/json-forgiving-reader.ts +12 -2
  62. package/src/extract/types.ts +24 -1
  63. package/src/extract/xml-forgiving-reader.ts +99 -12
  64. package/src/index.ts +4 -0
  65. package/src/verify.ts +37 -11
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@metaobjectsdev/render",
3
- "version": "0.9.0",
3
+ "version": "0.10.0",
4
4
  "description": "Logic-less, deterministic text render engine (Mustache) for MetaObjects templates — provider-resolved partials, format-driven escaping, zero core dependency.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -12,7 +12,12 @@
12
12
  "default": "./dist/index.js"
13
13
  }
14
14
  },
15
- "files": ["dist", "src", "README.md", "LICENSE"],
15
+ "files": [
16
+ "dist",
17
+ "src",
18
+ "README.md",
19
+ "LICENSE"
20
+ ],
16
21
  "scripts": {
17
22
  "build": "tsc -p .",
18
23
  "typecheck": "tsc -p tsconfig.typecheck.json"
@@ -21,23 +26,29 @@
21
26
  "author": "Doug Mealing <doug@dougmealing.com>",
22
27
  "homepage": "https://metaobjects.dev",
23
28
  "bugs": {
24
- "url": "https://github.com/metaobjectsdev/metaobjects/issues"
25
- },
26
- "repository": {
27
- "type": "git",
28
- "url": "https://github.com/metaobjectsdev/metaobjects.git",
29
- "directory": "server/typescript/packages/render"
30
- },
31
- "keywords": ["metaobjects", "render", "mustache", "prompt", "template"],
32
- "publishConfig": {
33
- "access": "public"
34
- },
35
- "dependencies": {
36
- "mustache": "^4.2.0"
37
- },
38
- "devDependencies": {
39
- "@types/mustache": "^4.2.5",
40
- "bun-types": "latest",
41
- "typescript": "^5.6.0"
42
- }
29
+ "url": "https://github.com/metaobjectsdev/metaobjects/issues"
30
+ },
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "https://github.com/metaobjectsdev/metaobjects.git",
34
+ "directory": "server/typescript/packages/render"
35
+ },
36
+ "keywords": [
37
+ "metaobjects",
38
+ "render",
39
+ "mustache",
40
+ "prompt",
41
+ "template"
42
+ ],
43
+ "publishConfig": {
44
+ "access": "public"
45
+ },
46
+ "dependencies": {
47
+ "mustache": "^4.2.0"
48
+ },
49
+ "devDependencies": {
50
+ "@types/mustache": "^4.2.5",
51
+ "bun-types": "latest",
52
+ "typescript": "^5.6.0"
53
+ }
43
54
  }
@@ -49,9 +49,9 @@ export function coerceValue(
49
49
  return coerceEnum(raw, spec, opts, fieldPath, report, ci);
50
50
  case FieldKind.INT:
51
51
  case FieldKind.LONG:
52
- return coerceInt(raw, spec, fieldPath, report);
52
+ return coerceInt(raw, spec, fieldPath, report, ci);
53
53
  case FieldKind.DOUBLE:
54
- return coerceDouble(raw, spec, fieldPath, report);
54
+ return coerceDouble(raw, spec, fieldPath, report, ci);
55
55
  case FieldKind.BOOLEAN:
56
56
  return coerceBool(raw, ci);
57
57
  default:
@@ -171,16 +171,16 @@ function lookupAliasIn(raw: string, aliases: Readonly<Record<string, string>>, m
171
171
  return null;
172
172
  }
173
173
 
174
- function coerceInt(raw: string, spec: FieldSpec, path: string, report: ExtractionReport): unknown | typeof MALFORMED {
174
+ function coerceInt(raw: string, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): unknown | typeof MALFORMED {
175
175
  const n = parseFiniteNumber(raw);
176
176
  if (n === null) return MALFORMED;
177
- return clamp(Math.trunc(n), spec, path, report);
177
+ return clamp(Math.trunc(n), spec, path, report, lenient);
178
178
  }
179
179
 
180
- function coerceDouble(raw: string, spec: FieldSpec, path: string, report: ExtractionReport): unknown | typeof MALFORMED {
180
+ function coerceDouble(raw: string, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): unknown | typeof MALFORMED {
181
181
  const n = parseFiniteNumber(raw);
182
182
  if (n === null) return MALFORMED;
183
- return clamp(n, spec, path, report);
183
+ return clamp(n, spec, path, report, lenient);
184
184
  }
185
185
 
186
186
  /** Parse a trimmed numeric string; null if empty, non-numeric, or non-finite (NaN/±Infinity). */
@@ -194,11 +194,20 @@ function parseFiniteNumber(raw: string): number | null {
194
194
  return Number.isFinite(n) ? n : null;
195
195
  }
196
196
 
197
- function clamp(n: number, spec: FieldSpec, path: string, report: ExtractionReport): number {
197
+ /**
198
+ * Apply the field's min/max range (sourced from its numeric validator). Under LENIENT tolerance an
199
+ * out-of-range value is CLAMPED to the bound (recorded as a "clamp" coercion); under STRICT tolerance
200
+ * it is MALFORMED (the validator's "value out of range" contract). Cross-port: ports must match the
201
+ * lenient-clamp / strict-reject split.
202
+ */
203
+ function clamp(n: number, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): number | typeof MALFORMED {
198
204
  let c = n;
199
205
  if (spec.min != null && c < spec.min) c = spec.min;
200
206
  if (spec.max != null && c > spec.max) c = spec.max;
201
- if (c !== n) report.addCoercion({ fieldPath: path, from: stringify(n), to: stringify(c), kind: "clamp" });
207
+ if (c !== n) {
208
+ if (!lenient) return MALFORMED; // STRICT: out-of-range is invalid, not silently clamped
209
+ report.addCoercion({ fieldPath: path, from: stringify(n), to: stringify(c), kind: "clamp" });
210
+ }
202
211
  return c;
203
212
  }
204
213
 
@@ -11,8 +11,8 @@ import type { FieldSpec, ExtractOptions, ExtractionOutcome, ExtractSchema } from
11
11
  import { ExtractionReport } from "./types.js";
12
12
  import { strip } from "./strip.js";
13
13
  import { locateJson, locateXml } from "./locate.js";
14
- import { readJson, TRUNCATED } from "./json-forgiving-reader.js";
15
- import { readXml } from "./xml-forgiving-reader.js";
14
+ import { readJson, TRUNCATED, NULL_LITERAL } from "./json-forgiving-reader.js";
15
+ import { readXml, readXmlRootless, TEXT_KEY } from "./xml-forgiving-reader.js";
16
16
  import { coerceValue, scalarCoerce, MALFORMED } from "./coerce.js";
17
17
 
18
18
  /** The forgiving entry point: extract dirty `text` against `schema`. Never throws. */
@@ -28,16 +28,20 @@ export function extract(
28
28
  const stripped = strip(text);
29
29
  const ci = o.tolerance !== Tolerance.STRICT;
30
30
 
31
- const span =
32
- schema.format === Format.JSON ? locateJson(stripped) : locateXml(stripped, schema.rootName, ci);
33
-
31
+ // XML rootless (opts.rootless): the payload's fields ARE the top-level elements — there is no
32
+ // enclosing root to locate so parse the whole stripped text's top-level elements directly.
33
+ // Otherwise locate the <rootName> span as before. JSON is unaffected. Mirrors Java Extract.
34
+ let span: string | null;
34
35
  let raw: Record<string, unknown>;
35
- if (span == null) {
36
- raw = {};
37
- } else if (schema.format === Format.JSON) {
38
- raw = readJson(span);
36
+ if (schema.format === Format.JSON) {
37
+ span = locateJson(stripped);
38
+ raw = span == null ? {} : readJson(span);
39
+ } else if (o.rootless) {
40
+ span = stripped.length === 0 ? null : stripped;
41
+ raw = span == null ? {} : readXmlRootless(stripped, ci);
39
42
  } else {
40
- raw = readXml(span, ci);
43
+ span = locateXml(stripped, schema.rootName, ci);
44
+ raw = span == null ? {} : readXml(span, ci);
41
45
  }
42
46
 
43
47
  if (isEmptyRecord(raw) && (stripped.length === 0 || span == null)) {
@@ -59,7 +63,9 @@ function extractFields(
59
63
  ): void {
60
64
  for (const f of fields) {
61
65
  const path = prefix.length === 0 ? f.name : `${prefix}.${f.name}`;
62
- const present = lookup(raw, f.name, ci);
66
+ // A @xmlText field reads the element's text body (carried under the #text sentinel when the
67
+ // element also has attributes), not a same-named child element.
68
+ const present = f.textContent === true ? raw[TEXT_KEY] : lookup(raw, f.name, ci);
63
69
  if (present === undefined) {
64
70
  // FR-011 / Phase B: an absent field with a declared @default fills the value → DEFAULTED
65
71
  // (which satisfies a @required field). Generalized to all field kinds: an enum default is
@@ -84,6 +90,13 @@ function extractFields(
84
90
  report.set(path, FieldExtraction.MALFORMED);
85
91
  continue;
86
92
  }
93
+ if (present === NULL_LITERAL) {
94
+ // The JSON null literal is the caller's explicit "no value": leave the field null
95
+ // (do NOT apply @default — an explicit null is a value, not an omission), matching a
96
+ // standard JSON bind. Without this the bare `null` token leaks as the string "null".
97
+ report.set(path, f.required ? FieldExtraction.LOST_REQUIRED : FieldExtraction.LOST_OPTIONAL);
98
+ continue;
99
+ }
87
100
  if (f.array) {
88
101
  // An array field: a single non-list value is treated as a one-element array
89
102
  // (e.g. a single repeated-XML tag). Each element is coerced/recursed independently.
@@ -151,6 +164,11 @@ function extractValue(
151
164
  o: ExtractOptions,
152
165
  ci: boolean,
153
166
  ): unknown | typeof MALFORMED {
167
+ if (present === NULL_LITERAL) {
168
+ // A JSON null array element (e.g. [1, null, 3]) carries no value → drop it as malformed
169
+ // rather than letting the sentinel stringify.
170
+ return MALFORMED;
171
+ }
154
172
  if (f.kind === FieldKind.OBJECT) {
155
173
  if (f.nested != null && isPlainObject(present)) {
156
174
  const nestedData: Record<string, unknown> = {};
@@ -159,6 +177,12 @@ function extractValue(
159
177
  }
160
178
  return MALFORMED; // object expected but scalar/non-map present
161
179
  }
180
+ // A text element that also carried XML attributes is represented by readXml as a record with
181
+ // the body under TEXT_KEY. A scalar field reads that text (attributes ignored for scalars —
182
+ // preserving pre-attribute-support behaviour).
183
+ if (isPlainObject(present) && Object.prototype.hasOwnProperty.call(present, TEXT_KEY)) {
184
+ present = (present as Record<string, unknown>)[TEXT_KEY];
185
+ }
162
186
  const rawStr = typeof present === "string" ? present : stringifyScalar(present);
163
187
  return coerceValue(rawStr, f, o, path, report);
164
188
  }
@@ -4,6 +4,14 @@
4
4
  /** Sentinel: a key appeared in the text but its value was empty/cut-off (present-but-garbled). */
5
5
  export const TRUNCATED: unique symbol = Symbol("extract.json.TRUNCATED");
6
6
 
7
+ /**
8
+ * Sentinel: the JSON `null` literal. Distinct from a JS `null` return (which this reader uses
9
+ * internally for "no token / garbled") and from the 4-char string "null". The extract phase maps
10
+ * this to an actual null field value (JSON null → null), instead of letting the bare `null` literal
11
+ * leak through as the text "null".
12
+ */
13
+ export const NULL_LITERAL: unique symbol = Symbol("extract.json.NULL_LITERAL");
14
+
7
15
  /** A character is JSON-insignificant whitespace. Mirrors Java Character.isWhitespace closely enough for the corpus. */
8
16
  function isWhitespace(c: string): boolean {
9
17
  return c === " " || c === "\t" || c === "\n" || c === "\r" || c === "\f" || c === "\v" || /\s/.test(c);
@@ -132,11 +140,13 @@ class Reader {
132
140
  return sb; // unterminated string → return what we have
133
141
  }
134
142
 
135
- private readBareScalar(): string | null {
143
+ private readBareScalar(): string | null | typeof NULL_LITERAL {
136
144
  const start = this.i;
137
145
  while (this.i < this.s.length && ",}]".indexOf(this.s.charAt(this.i)) < 0) this.i++;
138
146
  const result = this.s.substring(start, this.i).trim();
139
- return result.length === 0 ? null : result; // null = no token read (zero-width)
147
+ if (result.length === 0) return null; // no token read (zero-width)
148
+ if (result === "null") return NULL_LITERAL; // JSON null literal → explicit null, NOT the string "null"
149
+ return result;
140
150
  }
141
151
 
142
152
  private ws(): void {
@@ -85,6 +85,13 @@ export interface FieldSpec {
85
85
  readonly defaultValue: string | null;
86
86
  /** FR-011: resolved enum normalization mode (from `@normalize`; default `"strip"`). */
87
87
  readonly normalize: NormalizeMode;
88
+ /**
89
+ * `@xmlText`: this field receives its element's TEXT CONTENT (analogous to JAXB `@XmlValue` /
90
+ * Jackson `@JacksonXmlText` / .NET `[XmlText]`). The extract engine reads it from the
91
+ * `#text` sentinel the lenient XML reader carries when an element has both attributes and a
92
+ * text body, instead of a same-named child. Absent/false for normal fields and for JSON.
93
+ */
94
+ readonly textContent?: boolean;
88
95
  }
89
96
 
90
97
  /**
@@ -115,6 +122,14 @@ export function scalar(
115
122
  };
116
123
  }
117
124
 
125
+ /**
126
+ * A field that receives its element's TEXT CONTENT — the `@xmlText` marker (see
127
+ * {@link FieldSpec.textContent}). A scalar with the `textContent` flag set; coerced to `kind`.
128
+ */
129
+ export function textContentField(name: string, kind: FieldKind, required: boolean): FieldSpec {
130
+ return { ...scalar(name, kind, required), textContent: true };
131
+ }
132
+
118
133
  export function enumField(
119
134
  name: string,
120
135
  required: boolean,
@@ -231,16 +246,23 @@ export type OnField = (fieldPath: string, rawValue: string, spec: FieldSpec) =>
231
246
  /**
232
247
  * Bounded runtime override surface. aliases/normalizers are MERGED with the
233
248
  * schema's, runtime winning on key conflict. onField is the single hook.
249
+ *
250
+ * `rootless` (XML only): when `true`, the input has NO enclosing root element — the payload's
251
+ * fields ARE the top-level elements (a flat sequence like `<a>..</a><b>..</b>`). The engine
252
+ * parses those top-level elements directly instead of locating a `<rootName>` span, so the caller
253
+ * need not synthesize a wrapper. No effect for JSON. Default `false` (a single root element is
254
+ * expected, as before). Mirrors Java ExtractOptions.rootless.
234
255
  */
235
256
  export interface ExtractOptions {
236
257
  readonly tolerance: Tolerance;
237
258
  readonly aliases: Readonly<Record<string, string>>;
238
259
  readonly normalizers: Readonly<Record<string, (raw: string) => unknown | null>>;
239
260
  readonly onField: OnField | null;
261
+ readonly rootless: boolean;
240
262
  }
241
263
 
242
264
  export function defaults(): ExtractOptions {
243
- return { tolerance: Tolerance.NORMAL, aliases: {}, normalizers: {}, onField: null };
265
+ return { tolerance: Tolerance.NORMAL, aliases: {}, normalizers: {}, onField: null, rootless: false };
244
266
  }
245
267
 
246
268
  /** Normalize a partial / undefined options bag into a complete ExtractOptions. */
@@ -251,6 +273,7 @@ export function normalizeOptions(opts?: Partial<ExtractOptions> | null): Extract
251
273
  aliases: opts.aliases == null ? {} : { ...opts.aliases },
252
274
  normalizers: opts.normalizers == null ? {} : { ...opts.normalizers },
253
275
  onField: opts.onField ?? null,
276
+ rootless: opts.rootless ?? false,
254
277
  };
255
278
  }
256
279
 
@@ -1,5 +1,21 @@
1
1
  // Stage-4 tolerant XML reader for the bounded corpus malformation set. Never throws.
2
- // Mirrors Java XmlForgivingReader. Must not index-out-of-range on a leading close tag.
2
+ // Mirrors Java XmlForgivingReader: maps an element's child elements, text, AND attributes
3
+ // into the field map, and handles self-closing tags (<x a="1"/>). Must not index-out-of-range
4
+ // on a leading close tag.
5
+ //
6
+ // Representation:
7
+ // - text-only element, no attributes → its trimmed text (string) — unchanged
8
+ // - self-closing / attributes-only element → a record of attribute name→value ("" when none)
9
+ // - element with child elements (± attrs) → a record merging attributes + child entries
10
+ // (a child element wins a name collision)
11
+ // - element with text AND attributes → a record of the attributes plus the body text
12
+ // under TEXT_KEY (a scalar consumer unwraps it)
13
+ // - repeated sibling tags → an array (unchanged)
14
+
15
+ /** Reserved key holding an element's own text content when the element is represented as a
16
+ * record (because it also carries attributes). '#' is not a legal XML name char, so it never
17
+ * collides with a real attribute or child-element name. */
18
+ export const TEXT_KEY = "#text";
3
19
 
4
20
  function quote(s: string): string {
5
21
  return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
@@ -12,7 +28,11 @@ function matchFrom(source: string, flags: string, text: string, from: number): R
12
28
  return g.exec(text);
13
29
  }
14
30
 
15
- const OPEN_TAG_SRC = "<([A-Za-z_][A-Za-z0-9_]*)(\\s[^>]*)?>";
31
+ // tag name + everything up to the closing '>' (attributes and/or a trailing '/' for a
32
+ // self-closing tag). Non-greedy so the first '>' closes the open tag.
33
+ const OPEN_TAG_SRC = "<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>";
34
+ // one attribute: name = "double" | 'single' | bareword.
35
+ const ATTR_SRC = "([A-Za-z_:][A-Za-z0-9_:.\\-]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s/>]+))";
16
36
 
17
37
  export function readXml(span: string | null | undefined, caseInsensitive: boolean): Record<string, unknown> {
18
38
  const out: Record<string, unknown> = {};
@@ -25,6 +45,21 @@ export function readXml(span: string | null | undefined, caseInsensitive: boolea
25
45
  return out;
26
46
  }
27
47
 
48
+ /**
49
+ * Rootless read: parse the WHOLE text's top-level elements directly, with no enclosing root
50
+ * element to strip (a flat sequence like `<a>..</a><b>..</b>`). Used for `ExtractOptions.rootless`
51
+ * responses. Leading/trailing non-element text is ignored. Never throws. Mirrors Java readRootless.
52
+ */
53
+ export function readXmlRootless(
54
+ text: string | null | undefined,
55
+ caseInsensitive: boolean,
56
+ ): Record<string, unknown> {
57
+ const out: Record<string, unknown> = {};
58
+ if (text == null || text.trim().length === 0) return out;
59
+ parseChildren(text, caseInsensitive, out);
60
+ return out;
61
+ }
62
+
28
63
  function parseChildren(inner: string, ci: boolean, out: Record<string, unknown>): void {
29
64
  const flags = ci ? "i" : "";
30
65
  let pos = 0;
@@ -33,8 +68,19 @@ function parseChildren(inner: string, ci: boolean, out: Record<string, unknown>)
33
68
  if (m == null) break;
34
69
  const tag = m[1] ?? "";
35
70
  const key = ci ? tag.toLowerCase() : tag;
36
- const contentStart = m.index + m[0].length;
37
71
 
72
+ let rawAttrs = (m[2] ?? "").trim();
73
+ const selfClosing = rawAttrs.endsWith("/");
74
+ if (selfClosing) rawAttrs = rawAttrs.slice(0, -1).trim();
75
+ const attrs = parseAttrs(rawAttrs, ci);
76
+
77
+ if (selfClosing) {
78
+ accumulate(out, key, Object.keys(attrs).length === 0 ? "" : attrs);
79
+ pos = m.index + m[0].length;
80
+ continue;
81
+ }
82
+
83
+ const contentStart = m.index + m[0].length;
38
84
  const closeRe = `</${quote(tag)}\\s*>`;
39
85
  const close = matchFrom(closeRe, flags, inner, contentStart);
40
86
 
@@ -44,11 +90,25 @@ function parseChildren(inner: string, ci: boolean, out: Record<string, unknown>)
44
90
  contentEnd = close.index;
45
91
  next = close.index + close[0].length;
46
92
  } else {
47
- // unclosed tag: extract text up to the next sibling open tag
93
+ // unclosed tag: extract content up to the next sibling open tag.
48
94
  const sib = matchFrom(OPEN_TAG_SRC, flags, inner, contentStart);
49
95
  if (sib != null) {
50
- contentEnd = sib.index;
51
- next = contentEnd;
96
+ // When the unclosed element's content begins IMMEDIATELY with a child open tag
97
+ // (no leading text), that child was almost certainly meant to be NESTED, not a
98
+ // sibling — a common LLM malformation is dropping the parent's close tag while
99
+ // still emitting a real child element (e.g. <check ...><payoff>text). Absorb the
100
+ // remainder of this span as the unclosed element's content so the child nests
101
+ // under it. When there IS leading text before the first child tag (e.g. <t>hi<c>..),
102
+ // keep the sibling split — the leading text is the unclosed element's body and the
103
+ // following tag is its sibling. Mirrors Java XmlForgivingReader.
104
+ const noLeadingText = inner.substring(contentStart, sib.index).trim().length === 0;
105
+ if (noLeadingText) {
106
+ contentEnd = inner.length;
107
+ next = inner.length;
108
+ } else {
109
+ contentEnd = sib.index;
110
+ next = contentEnd;
111
+ }
52
112
  } else {
53
113
  contentEnd = inner.length;
54
114
  next = inner.length;
@@ -56,16 +116,43 @@ function parseChildren(inner: string, ci: boolean, out: Record<string, unknown>)
56
116
  }
57
117
 
58
118
  const content = inner.substring(contentStart, contentEnd);
59
- const value: unknown = content.includes("<") ? nestedOrText(content, ci) : content.trim();
60
- accumulate(out, key, value);
119
+ accumulate(out, key, combine(attrs, content, ci));
61
120
  pos = next;
62
121
  }
63
122
  }
64
123
 
65
- function nestedOrText(content: string, ci: boolean): unknown {
66
- const nested: Record<string, unknown> = {};
67
- parseChildren(content, ci, nested);
68
- return Object.keys(nested).length === 0 ? content.trim() : nested;
124
+ /** Combine an element's attributes with its body (nested children or plain text). */
125
+ function combine(attrs: Record<string, unknown>, content: string, ci: boolean): unknown {
126
+ if (content.includes("<")) {
127
+ const nested: Record<string, unknown> = {};
128
+ parseChildren(content, ci, nested);
129
+ if (Object.keys(nested).length > 0) {
130
+ // attributes first; a child element wins a name collision
131
+ return { ...attrs, ...nested };
132
+ }
133
+ }
134
+ return textValue(attrs, content);
135
+ }
136
+
137
+ function textValue(attrs: Record<string, unknown>, content: string): unknown {
138
+ const text = content.trim();
139
+ if (Object.keys(attrs).length === 0) return text;
140
+ return { ...attrs, [TEXT_KEY]: text };
141
+ }
142
+
143
+ function parseAttrs(rawAttrs: string, ci: boolean): Record<string, unknown> {
144
+ const attrs: Record<string, unknown> = {};
145
+ if (rawAttrs.length === 0) return attrs;
146
+ const re = new RegExp(ATTR_SRC, "g");
147
+ let a: RegExpExecArray | null;
148
+ while ((a = re.exec(rawAttrs)) != null) {
149
+ const rawName = a[1];
150
+ if (rawName === undefined) continue; // group 1 is mandatory in a match; guards strict TS
151
+ const name = ci ? rawName.toLowerCase() : rawName;
152
+ const val = a[2] ?? a[3] ?? a[4] ?? "";
153
+ if (!Object.prototype.hasOwnProperty.call(attrs, name)) attrs[name] = val;
154
+ }
155
+ return attrs;
69
156
  }
70
157
 
71
158
  function accumulate(out: Record<string, unknown>, key: string, value: unknown): void {
package/src/index.ts CHANGED
@@ -3,11 +3,14 @@ export { type Provider, InMemoryProvider } from "./provider.js";
3
3
  export { ESCAPERS, type RenderFormat } from "./escapers.js";
4
4
  export {
5
5
  verify,
6
+ resolveTemplateVariable,
7
+ parseTemplate,
6
8
  ERR_VAR_NOT_ON_PAYLOAD,
7
9
  ERR_PARTIAL_UNRESOLVED,
8
10
  ERR_REQUIRED_SLOT_UNUSED,
9
11
  ERR_OUTPUT_TAG_MISSING,
10
12
  type PayloadField,
13
+ type ResolveStack,
11
14
  type VerifyError,
12
15
  type VerifyOptions,
13
16
  } from "./verify.js";
@@ -21,6 +24,7 @@ export {
21
24
  Tolerance,
22
25
  ExtractionReport,
23
26
  scalar,
27
+ textContentField,
24
28
  enumField,
25
29
  enumArray,
26
30
  range,
package/src/verify.ts CHANGED
@@ -54,20 +54,34 @@ const MAX_DEPTH = 32;
54
54
 
55
55
  // A Mustache parse token: [type, value, start, end, subTokens?, ...].
56
56
  type Token = readonly unknown[];
57
- // The context stack — innermost context last, mirroring Mustache lookup order.
58
- type Stack = readonly PayloadField[][];
57
+ /**
58
+ * The context stack — innermost context last, mirroring Mustache lookup order.
59
+ * Generic over the field node so consumers (e.g. the docs annotator) can resolve
60
+ * an ENRICHED field tree (carrying owner/type metadata) through the EXACT same
61
+ * walk verify uses, guaranteeing the two surfaces agree.
62
+ */
63
+ export type ResolveStack<F extends PayloadField = PayloadField> = readonly F[][];
59
64
 
60
- function find(fields: PayloadField[], name: string): PayloadField | undefined {
65
+ function find<F extends PayloadField>(fields: F[], name: string): F | undefined {
61
66
  return fields.find((f) => f.name === name);
62
67
  }
63
68
 
64
- // Resolve a (possibly dotted) variable path the way Mustache does: the FIRST
65
- // segment is looked up through the context stack (innermost outermost); each
66
- // remaining segment is a direct descent into the resolved field's `fields`.
67
- // Returns the resolved field, or undefined if any segment is missing.
68
- function resolve(stack: Stack, path: string): PayloadField | undefined {
69
+ /**
70
+ * Resolve a (possibly dotted) variable path the way Mustache does: the FIRST
71
+ * segment is looked up through the context stack (innermost → outermost); each
72
+ * remaining segment is a direct descent into the resolved field's `fields`.
73
+ * Returns the resolved field, or undefined if any segment is missing.
74
+ *
75
+ * EXPORTED so the docs annotator can share this ONE resolution (annotator ⇆
76
+ * verify must agree). Generic over the node type: an enriched tree resolves the
77
+ * same way, since only `name`/`fields` drive the walk.
78
+ */
79
+ export function resolveTemplateVariable<F extends PayloadField>(
80
+ stack: ResolveStack<F>,
81
+ path: string,
82
+ ): F | undefined {
69
83
  const segs = path.split(".");
70
- let current: PayloadField | undefined;
84
+ let current: F | undefined;
71
85
  for (let i = stack.length - 1; i >= 0; i--) {
72
86
  const hit = find(stack[i]!, segs[0]!);
73
87
  if (hit) {
@@ -76,15 +90,27 @@ function resolve(stack: Stack, path: string): PayloadField | undefined {
76
90
  }
77
91
  }
78
92
  for (let i = 1; current && i < segs.length; i++) {
79
- current = current.fields ? find(current.fields, segs[i]!) : undefined;
93
+ current = current.fields ? (find(current.fields, segs[i]!) as F | undefined) : undefined;
80
94
  }
81
95
  return current;
82
96
  }
83
97
 
98
+ // Internal alias preserving the original call sites unchanged.
99
+ const resolve = resolveTemplateVariable;
100
+
84
101
  function parse(text: string): Token[] {
85
102
  return Mustache.parse(text) as unknown as Token[];
86
103
  }
87
104
 
105
+ /**
106
+ * Parse a template into Mustache tokens (`[type, value, start, end, subTokens?]`),
107
+ * the SAME parse verify walks. Exported so the docs annotator tokenizes through
108
+ * one parser (no divergent re-tokenization). Returns a readonly token list.
109
+ */
110
+ export function parseTemplate(text: string): readonly (readonly unknown[])[] {
111
+ return parse(text);
112
+ }
113
+
88
114
  // An opening tag is `<tag` immediately followed by `>` or XML whitespace, so
89
115
  // attributes are allowed (`<answer foo="1">`) but a longer name is not over-matched
90
116
  // (`<answers>` does not satisfy `answer`).
@@ -124,7 +150,7 @@ export function verify(
124
150
  // (no second resolution pass).
125
151
  const staticTexts: string[] = [templateText];
126
152
 
127
- function walk(tokens: Token[], stack: Stack, seen: readonly string[]): void {
153
+ function walk(tokens: Token[], stack: ResolveStack, seen: readonly string[]): void {
128
154
  const atRoot = stack.length === 1 && stack[0] === root;
129
155
  for (const tok of tokens) {
130
156
  const type = tok[0] as string;