@metaobjectsdev/render 0.9.0 → 0.11.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/extract/coerce.js +17 -8
- package/dist/extract/coerce.js.map +1 -1
- package/dist/extract/extract.d.ts.map +1 -1
- package/dist/extract/extract.js +35 -9
- package/dist/extract/extract.js.map +1 -1
- package/dist/extract/json-forgiving-reader.d.ts +7 -0
- package/dist/extract/json-forgiving-reader.d.ts.map +1 -1
- package/dist/extract/json-forgiving-reader.js +12 -1
- package/dist/extract/json-forgiving-reader.js.map +1 -1
- package/dist/extract/types.d.ts +19 -0
- package/dist/extract/types.d.ts.map +1 -1
- package/dist/extract/types.js +9 -1
- package/dist/extract/types.js.map +1 -1
- package/dist/extract/xml-forgiving-reader.d.ts +10 -0
- package/dist/extract/xml-forgiving-reader.d.ts.map +1 -1
- package/dist/extract/xml-forgiving-reader.js +96 -11
- package/dist/extract/xml-forgiving-reader.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/verify.d.ts +24 -0
- package/dist/verify.d.ts.map +1 -1
- package/dist/verify.js +21 -5
- package/dist/verify.js.map +1 -1
- package/package.json +32 -21
- package/src/extract/coerce.ts +17 -8
- package/src/extract/extract.ts +35 -11
- package/src/extract/json-forgiving-reader.ts +12 -2
- package/src/extract/types.ts +24 -1
- package/src/extract/xml-forgiving-reader.ts +99 -12
- package/src/index.ts +4 -0
- package/src/verify.ts +37 -11
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
// Stage-4 tolerant XML reader for the bounded corpus malformation set. Never throws.
|
|
2
|
-
// Mirrors Java XmlForgivingReader
|
|
2
|
+
// Mirrors Java XmlForgivingReader: maps an element's child elements, text, AND attributes
|
|
3
|
+
// into the field map, and handles self-closing tags (<x a="1"/>). Must not index-out-of-range
|
|
4
|
+
// on a leading close tag.
|
|
5
|
+
//
|
|
6
|
+
// Representation:
|
|
7
|
+
// - text-only element, no attributes → its trimmed text (string) — unchanged
|
|
8
|
+
// - self-closing / attributes-only element → a record of attribute name→value ("" when none)
|
|
9
|
+
// - element with child elements (± attrs) → a record merging attributes + child entries
|
|
10
|
+
// (a child element wins a name collision)
|
|
11
|
+
// - element with text AND attributes → a record of the attributes plus the body text
|
|
12
|
+
// under TEXT_KEY (a scalar consumer unwraps it)
|
|
13
|
+
// - repeated sibling tags → an array (unchanged)
|
|
14
|
+
/** Reserved key holding an element's own text content when the element is represented as a
|
|
15
|
+
* record (because it also carries attributes). '#' is not a legal XML name char, so it never
|
|
16
|
+
* collides with a real attribute or child-element name. */
|
|
17
|
+
export const TEXT_KEY = "#text";
|
|
3
18
|
function quote(s) {
|
|
4
19
|
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
5
20
|
}
|
|
@@ -9,7 +24,11 @@ function matchFrom(source, flags, text, from) {
|
|
|
9
24
|
g.lastIndex = from;
|
|
10
25
|
return g.exec(text);
|
|
11
26
|
}
|
|
12
|
-
|
|
27
|
+
// tag name + everything up to the closing '>' (attributes and/or a trailing '/' for a
|
|
28
|
+
// self-closing tag). Non-greedy so the first '>' closes the open tag.
|
|
29
|
+
const OPEN_TAG_SRC = "<([A-Za-z_][A-Za-z0-9_]*)([^>]*?)>";
|
|
30
|
+
// one attribute: name = "double" | 'single' | bareword.
|
|
31
|
+
const ATTR_SRC = "([A-Za-z_:][A-Za-z0-9_:.\\-]*)\\s*=\\s*(?:\"([^\"]*)\"|'([^']*)'|([^\\s/>]+))";
|
|
13
32
|
export function readXml(span, caseInsensitive) {
|
|
14
33
|
const out = {};
|
|
15
34
|
if (span == null || span.trim().length === 0)
|
|
@@ -22,6 +41,18 @@ export function readXml(span, caseInsensitive) {
|
|
|
22
41
|
parseChildren(inner, caseInsensitive, out);
|
|
23
42
|
return out;
|
|
24
43
|
}
|
|
44
|
+
/**
|
|
45
|
+
* Rootless read: parse the WHOLE text's top-level elements directly, with no enclosing root
|
|
46
|
+
* element to strip (a flat sequence like `<a>..</a><b>..</b>`). Used for `ExtractOptions.rootless`
|
|
47
|
+
* responses. Leading/trailing non-element text is ignored. Never throws. Mirrors Java readRootless.
|
|
48
|
+
*/
|
|
49
|
+
export function readXmlRootless(text, caseInsensitive) {
|
|
50
|
+
const out = {};
|
|
51
|
+
if (text == null || text.trim().length === 0)
|
|
52
|
+
return out;
|
|
53
|
+
parseChildren(text, caseInsensitive, out);
|
|
54
|
+
return out;
|
|
55
|
+
}
|
|
25
56
|
function parseChildren(inner, ci, out) {
|
|
26
57
|
const flags = ci ? "i" : "";
|
|
27
58
|
let pos = 0;
|
|
@@ -31,6 +62,16 @@ function parseChildren(inner, ci, out) {
|
|
|
31
62
|
break;
|
|
32
63
|
const tag = m[1] ?? "";
|
|
33
64
|
const key = ci ? tag.toLowerCase() : tag;
|
|
65
|
+
let rawAttrs = (m[2] ?? "").trim();
|
|
66
|
+
const selfClosing = rawAttrs.endsWith("/");
|
|
67
|
+
if (selfClosing)
|
|
68
|
+
rawAttrs = rawAttrs.slice(0, -1).trim();
|
|
69
|
+
const attrs = parseAttrs(rawAttrs, ci);
|
|
70
|
+
if (selfClosing) {
|
|
71
|
+
accumulate(out, key, Object.keys(attrs).length === 0 ? "" : attrs);
|
|
72
|
+
pos = m.index + m[0].length;
|
|
73
|
+
continue;
|
|
74
|
+
}
|
|
34
75
|
const contentStart = m.index + m[0].length;
|
|
35
76
|
const closeRe = `</${quote(tag)}\\s*>`;
|
|
36
77
|
const close = matchFrom(closeRe, flags, inner, contentStart);
|
|
@@ -41,11 +82,26 @@ function parseChildren(inner, ci, out) {
|
|
|
41
82
|
next = close.index + close[0].length;
|
|
42
83
|
}
|
|
43
84
|
else {
|
|
44
|
-
// unclosed tag: extract
|
|
85
|
+
// unclosed tag: extract content up to the next sibling open tag.
|
|
45
86
|
const sib = matchFrom(OPEN_TAG_SRC, flags, inner, contentStart);
|
|
46
87
|
if (sib != null) {
|
|
47
|
-
|
|
48
|
-
|
|
88
|
+
// When the unclosed element's content begins IMMEDIATELY with a child open tag
|
|
89
|
+
// (no leading text), that child was almost certainly meant to be NESTED, not a
|
|
90
|
+
// sibling — a common LLM malformation is dropping the parent's close tag while
|
|
91
|
+
// still emitting a real child element (e.g. <check ...><payoff>text). Absorb the
|
|
92
|
+
// remainder of this span as the unclosed element's content so the child nests
|
|
93
|
+
// under it. When there IS leading text before the first child tag (e.g. <t>hi<c>..),
|
|
94
|
+
// keep the sibling split — the leading text is the unclosed element's body and the
|
|
95
|
+
// following tag is its sibling. Mirrors Java XmlForgivingReader.
|
|
96
|
+
const noLeadingText = inner.substring(contentStart, sib.index).trim().length === 0;
|
|
97
|
+
if (noLeadingText) {
|
|
98
|
+
contentEnd = inner.length;
|
|
99
|
+
next = inner.length;
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
contentEnd = sib.index;
|
|
103
|
+
next = contentEnd;
|
|
104
|
+
}
|
|
49
105
|
}
|
|
50
106
|
else {
|
|
51
107
|
contentEnd = inner.length;
|
|
@@ -53,15 +109,44 @@ function parseChildren(inner, ci, out) {
|
|
|
53
109
|
}
|
|
54
110
|
}
|
|
55
111
|
const content = inner.substring(contentStart, contentEnd);
|
|
56
|
-
|
|
57
|
-
accumulate(out, key, value);
|
|
112
|
+
accumulate(out, key, combine(attrs, content, ci));
|
|
58
113
|
pos = next;
|
|
59
114
|
}
|
|
60
115
|
}
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
116
|
+
/** Combine an element's attributes with its body (nested children or plain text). */
|
|
117
|
+
function combine(attrs, content, ci) {
|
|
118
|
+
if (content.includes("<")) {
|
|
119
|
+
const nested = {};
|
|
120
|
+
parseChildren(content, ci, nested);
|
|
121
|
+
if (Object.keys(nested).length > 0) {
|
|
122
|
+
// attributes first; a child element wins a name collision
|
|
123
|
+
return { ...attrs, ...nested };
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return textValue(attrs, content);
|
|
127
|
+
}
|
|
128
|
+
function textValue(attrs, content) {
|
|
129
|
+
const text = content.trim();
|
|
130
|
+
if (Object.keys(attrs).length === 0)
|
|
131
|
+
return text;
|
|
132
|
+
return { ...attrs, [TEXT_KEY]: text };
|
|
133
|
+
}
|
|
134
|
+
function parseAttrs(rawAttrs, ci) {
|
|
135
|
+
const attrs = {};
|
|
136
|
+
if (rawAttrs.length === 0)
|
|
137
|
+
return attrs;
|
|
138
|
+
const re = new RegExp(ATTR_SRC, "g");
|
|
139
|
+
let a;
|
|
140
|
+
while ((a = re.exec(rawAttrs)) != null) {
|
|
141
|
+
const rawName = a[1];
|
|
142
|
+
if (rawName === undefined)
|
|
143
|
+
continue; // group 1 is mandatory in a match; guards strict TS
|
|
144
|
+
const name = ci ? rawName.toLowerCase() : rawName;
|
|
145
|
+
const val = a[2] ?? a[3] ?? a[4] ?? "";
|
|
146
|
+
if (!Object.prototype.hasOwnProperty.call(attrs, name))
|
|
147
|
+
attrs[name] = val;
|
|
148
|
+
}
|
|
149
|
+
return attrs;
|
|
65
150
|
}
|
|
66
151
|
function accumulate(out, key, value) {
|
|
67
152
|
if (!Object.prototype.hasOwnProperty.call(out, key)) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"xml-forgiving-reader.js","sourceRoot":"","sources":["../../src/extract/xml-forgiving-reader.ts"],"names":[],"mappings":"AAAA,qFAAqF;AACrF,
|
|
1
|
+
{"version":3,"file":"xml-forgiving-reader.js","sourceRoot":"","sources":["../../src/extract/xml-forgiving-reader.ts"],"names":[],"mappings":"AAAA,qFAAqF;AACrF,0FAA0F;AAC1F,8FAA8F;AAC9F,0BAA0B;AAC1B,EAAE;AACF,kBAAkB;AAClB,sFAAsF;AACtF,iGAAiG;AACjG,8FAA8F;AAC9F,2FAA2F;AAC3F,gGAAgG;AAChG,iGAAiG;AACjG,uEAAuE;AAEvE;;4DAE4D;AAC5D,MAAM,CAAC,MAAM,QAAQ,GAAG,OAAO,CAAC;AAEhC,SAAS,KAAK,CAAC,CAAS;IACtB,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AAED,mFAAmF;AACnF,SAAS,SAAS,CAAC,MAAc,EAAE,KAAa,EAAE,IAAY,EAAE,IAAY;IAC1E,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;IACxE,CAAC,CAAC,SAAS,GAAG,IAAI,CAAC;IACnB,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACtB,CAAC;AAED,sFAAsF;AACtF,sEAAsE;AACtE,MAAM,YAAY,GAAG,oCAAoC,CAAC;AAC1D,wDAAwD;AACxD,MAAM,QAAQ,GAAG,+EAA+E,CAAC;AAEjG,MAAM,UAAU,OAAO,CAAC,IAA+B,EAAE,eAAwB;IAC/E,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IACzD,MAAM,EAAE,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,EAAE,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC;IACvB,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC,EAAE,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,IAAI,OAAO,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAC3F,aAAa,CAAC,KAAK,EAAE,eAAe,EAAE,GAAG,CAAC,CAAC;IAC3C,OAAO,GAAG,CAAC;AACb,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,IAA+B,EAC/B,eAAwB;IAExB,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IACzD,aAAa,CAAC,IAAI,EAAE,eAAe,EAAE,GAAG,CAAC,CAAC;IAC1C,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,EAAW,EAAE,GAA4B;IAC7E,MAAM,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;IAC5B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,SAAS,CAAC;QACR,MAAM,CAAC,GAAG,SAAS,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC;QACrD,IAAI,CAAC,IAAI,IAAI;YAAE,MAAM;QACrB,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvB,MAAM,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAEzC,IAAI,QAAQ,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;QAC3C,IAAI,WAAW;YAAE,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,MAAM,KAAK,GAAG,UAAU,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAEvC,IAAI,WAAW,EAAE,CAAC;YAChB,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACnE,GAAG,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YAC5B,SAAS;QACX,CAAC;QAED,MAAM,YAAY,GAAG,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC3C,MAAM,OAAO,GAAG,KAAK,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;QACvC,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC;QAE7D,IAAI,UAAkB,CAAC;QACvB,IAAI,IAAY,CAAC;QACjB,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;YAClB,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC;YACzB,IAAI,GAAG,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,iEAAiE;YACjE,MAAM,GAAG,GAAG,SAAS,CAAC,YAAY,EAAE,KAAK,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC;YAChE,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;gBAChB,+EAA+E;gBAC/E,+EAA+E;gBAC/E,+EAA+E;gBAC/E,iFAAiF;gBACjF,8EAA8E;gBAC9E,qFAAqF;gBACrF,mFAAmF;gBACnF,iEAAiE;gBACjE,MAAM,aAAa,GAAG,KAAK,CAAC,SAAS,CAAC,YAAY,EAAE,GAAG,CAAC,KAAK,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,CAAC;gBACnF,IAAI,aAAa,EAAE,CAAC;oBAClB,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;oBAC1B,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC;gBACtB,CAAC;qBAAM,CAAC;oBACN,UAAU,GAAG,GAAG,CAAC,KAAK,CAAC;oBACvB,IAAI,GAAG,UAAU,CAAC;gBACpB,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;gBAC1B,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC;YACtB,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,CAAC,SAAS,CAAC,YAAY,EAAE,UAAU,CAAC,CAAC;QAC1D,UAAU,CAAC,GAAG,EAAE,GAAG,EAAE,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;QAClD,GAAG,GAAG,IAAI,CAAC;IACb,CAAC;AACH,CAAC;AAED,qFAAqF;AACrF,SAAS,OAAO,CAAC,KAA8B,EAAE,OAAe,EAAE,EAAW;IAC3E,IAAI,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;QAC1B,MAAM,MAAM,GAA4B,EAAE,CAAC;QAC3C,aAAa,CAAC,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;QACnC,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,0DAA0D;YAC1D,OAAO,EAAE,GAAG,KAAK,EAAE,GAAG,MAAM,EAAE,CAAC;QACjC,CAAC;IACH,CAAC;IACD,OAAO,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;AACnC,CAAC;AAED,SAAS,SAAS,CAAC,KAA8B,EAAE,OAAe;IAChE,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IACjD,OAAO,EAAE,GAAG,KAAK,EAAE,CAAC,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC;AACxC,CAAC;AAED,SAAS,UAAU,CAAC,QAAgB,EAAE,EAAW;IAC/C,MAAM,KAAK,GAA4B,EAAE,CAAC;IAC1C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACxC,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IACrC,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,IAAI,EAAE,CAAC;QACvC,MAAM,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,IAAI,OAAO,KAAK,SAAS;YAAE,SAAS,CAAC,oDAAoD;QACzF,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;QAClD,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACvC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,EAAE,IAAI,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC;IAC5E,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,UAAU,CAAC,GAA4B,EAAE,GAAW,EAAE,KAAc;IAC3E,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,CAAC;QACpD,GAAG,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;QACjB,OAAO;IACT,CAAC;IACD,MAAM,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;IAC1B,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC5B,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IACvB,CAAC;SAAM,CAAC;QACN,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
export { render, type RenderOptions } from "./render.js";
|
|
2
2
|
export { type Provider, InMemoryProvider } from "./provider.js";
|
|
3
3
|
export { ESCAPERS, type RenderFormat } from "./escapers.js";
|
|
4
|
-
export { verify, ERR_VAR_NOT_ON_PAYLOAD, ERR_PARTIAL_UNRESOLVED, ERR_REQUIRED_SLOT_UNUSED, ERR_OUTPUT_TAG_MISSING, type PayloadField, type VerifyError, type VerifyOptions, } from "./verify.js";
|
|
4
|
+
export { verify, resolveTemplateVariable, parseTemplate, ERR_VAR_NOT_ON_PAYLOAD, ERR_PARTIAL_UNRESOLVED, ERR_REQUIRED_SLOT_UNUSED, ERR_OUTPUT_TAG_MISSING, type PayloadField, type ResolveStack, type VerifyError, type VerifyOptions, } from "./verify.js";
|
|
5
5
|
export { extract } from "./extract/extract.js";
|
|
6
|
-
export { Format, FieldKind, FieldExtraction, Tolerance, ExtractionReport, scalar, enumField, enumArray, range, object, extractSchema, defaults, orThrow, ExtractError, type FieldSpec, type ExtractSchema, type ExtractOptions, type ExtractionOutcome, type ExtractionResult, type Coercion, type OnField, } from "./extract/types.js";
|
|
6
|
+
export { Format, FieldKind, FieldExtraction, Tolerance, ExtractionReport, scalar, textContentField, enumField, enumArray, range, object, extractSchema, defaults, orThrow, ExtractError, type FieldSpec, type ExtractSchema, type ExtractOptions, type ExtractionOutcome, type ExtractionResult, type Coercion, type OnField, } from "./extract/types.js";
|
|
7
7
|
export { asString, asInt, asLong, asDouble, asBool, asStringList, } from "./extract/extract-map.js";
|
|
8
8
|
export { renderOutputFormat } from "./prompt/output-format-renderer.js";
|
|
9
9
|
export { PromptStyle, promptStyleFrom } from "./prompt/prompt-style.js";
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,KAAK,QAAQ,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EACL,MAAM,EACN,sBAAsB,EACtB,sBAAsB,EACtB,wBAAwB,EACxB,sBAAsB,EACtB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,aAAa,GACnB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EACL,MAAM,EACN,SAAS,EACT,eAAe,EACf,SAAS,EACT,gBAAgB,EAChB,MAAM,EACN,SAAS,EACT,SAAS,EACT,KAAK,EACL,MAAM,EACN,aAAa,EACb,QAAQ,EACR,OAAO,EACP,YAAY,EACZ,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,iBAAiB,EACtB,KAAK,gBAAgB,EACrB,KAAK,QAAQ,EACb,KAAK,OAAO,GACb,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,QAAQ,EACR,KAAK,EACL,MAAM,EACN,QAAQ,EACR,MAAM,EACN,YAAY,GACb,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AACxE,OAAO,EACL,qBAAqB,EACrB,WAAW,EACX,KAAK,eAAe,GACrB,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AACvE,YAAY,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAG5D,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,KAAK,aAAa,EAAE,MAAM,aAAa,CAAC;AACzD,OAAO,EAAE,KAAK,QAAQ,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAE,KAAK,YAAY,EAAE,MAAM,eAAe,CAAC;AAC5D,OAAO,EACL,MAAM,EACN,uBAAuB,EACvB,aAAa,EACb,sBAAsB,EACtB,sBAAsB,EACtB,wBAAwB,EACxB,sBAAsB,EACtB,KAAK,YAAY,EACjB,KAAK,YAAY,EACjB,KAAK,WAAW,EAChB,KAAK,aAAa,GACnB,MAAM,aAAa,CAAC;AAGrB,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EACL,MAAM,EACN,SAAS,EACT,eAAe,EACf,SAAS,EACT,gBAAgB,EAChB,MAAM,EACN,gBAAgB,EAChB,SAAS,EACT,SAAS,EACT,KAAK,EACL,MAAM,EACN,aAAa,EACb,QAAQ,EACR,OAAO,EACP,YAAY,EACZ,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,iBAAiB,EACtB,KAAK,gBAAgB,EACrB,KAAK,QAAQ,EACb,KAAK,OAAO,GACb,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,QAAQ,EACR,KAAK,EACL,MAAM,EACN,QAAQ,EACR,MAAM,EACN,YAAY,GACb,MAAM,0BAA0B,CAAC;AAGlC,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AACxE,OAAO,EACL,qBAAqB,EACrB,WAAW,EACX,KAAK,eAAe,GACrB,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,gBAAgB,EAAE,MAAM,gCAAgC,CAAC;AACvE,YAAY,EAAE,WAAW,EAAE,MAAM,0BAA0B,CAAC;AAG5D,YAAY,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
export { render } from "./render.js";
|
|
2
2
|
export { InMemoryProvider } from "./provider.js";
|
|
3
3
|
export { ESCAPERS } from "./escapers.js";
|
|
4
|
-
export { verify, ERR_VAR_NOT_ON_PAYLOAD, ERR_PARTIAL_UNRESOLVED, ERR_REQUIRED_SLOT_UNUSED, ERR_OUTPUT_TAG_MISSING, } from "./verify.js";
|
|
4
|
+
export { verify, resolveTemplateVariable, parseTemplate, ERR_VAR_NOT_ON_PAYLOAD, ERR_PARTIAL_UNRESOLVED, ERR_REQUIRED_SLOT_UNUSED, ERR_OUTPUT_TAG_MISSING, } from "./verify.js";
|
|
5
5
|
// FR-010 tolerant extract engine (Tier-2 forgiving parser).
|
|
6
6
|
export { extract } from "./extract/extract.js";
|
|
7
|
-
export { Format, FieldKind, FieldExtraction, Tolerance, ExtractionReport, scalar, enumField, enumArray, range, object, extractSchema, defaults, orThrow, ExtractError, } from "./extract/types.js";
|
|
7
|
+
export { Format, FieldKind, FieldExtraction, Tolerance, ExtractionReport, scalar, textContentField, enumField, enumArray, range, object, extractSchema, defaults, orThrow, ExtractError, } from "./extract/types.js";
|
|
8
8
|
export { asString, asInt, asLong, asDouble, asBool, asStringList, } from "./extract/extract-map.js";
|
|
9
9
|
// FR-010 artifact 1 — output-format prompt renderer ("produce your answer like this").
|
|
10
10
|
export { renderOutputFormat } from "./prompt/output-format-renderer.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAsB,MAAM,aAAa,CAAC;AACzD,OAAO,EAAiB,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAqB,MAAM,eAAe,CAAC;AAC5D,OAAO,EACL,MAAM,EACN,sBAAsB,EACtB,sBAAsB,EACtB,wBAAwB,EACxB,sBAAsB,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAsB,MAAM,aAAa,CAAC;AACzD,OAAO,EAAiB,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAChE,OAAO,EAAE,QAAQ,EAAqB,MAAM,eAAe,CAAC;AAC5D,OAAO,EACL,MAAM,EACN,uBAAuB,EACvB,aAAa,EACb,sBAAsB,EACtB,sBAAsB,EACtB,wBAAwB,EACxB,sBAAsB,GAKvB,MAAM,aAAa,CAAC;AAErB,4DAA4D;AAC5D,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EACL,MAAM,EACN,SAAS,EACT,eAAe,EACf,SAAS,EACT,gBAAgB,EAChB,MAAM,EACN,gBAAgB,EAChB,SAAS,EACT,SAAS,EACT,KAAK,EACL,MAAM,EACN,aAAa,EACb,QAAQ,EACR,OAAO,EACP,YAAY,GAQb,MAAM,oBAAoB,CAAC;AAC5B,OAAO,EACL,QAAQ,EACR,KAAK,EACL,MAAM,EACN,QAAQ,EACR,MAAM,EACN,YAAY,GACb,MAAM,0BAA0B,CAAC;AAElC,uFAAuF;AACvF,OAAO,EAAE,kBAAkB,EAAE,MAAM,oCAAoC,CAAC;AACxE,OAAO,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,0BAA0B,CAAC;AACxE,OAAO,EACL,qBAAqB,EACrB,WAAW,GAEZ,MAAM,8BAA8B,CAAC"}
|
package/dist/verify.d.ts
CHANGED
|
@@ -33,6 +33,30 @@ export interface VerifyOptions {
|
|
|
33
33
|
*/
|
|
34
34
|
requiredTags?: string[];
|
|
35
35
|
}
|
|
36
|
+
/**
|
|
37
|
+
* The context stack — innermost context last, mirroring Mustache lookup order.
|
|
38
|
+
* Generic over the field node so consumers (e.g. the docs annotator) can resolve
|
|
39
|
+
* an ENRICHED field tree (carrying owner/type metadata) through the EXACT same
|
|
40
|
+
* walk verify uses, guaranteeing the two surfaces agree.
|
|
41
|
+
*/
|
|
42
|
+
export type ResolveStack<F extends PayloadField = PayloadField> = readonly F[][];
|
|
43
|
+
/**
|
|
44
|
+
* Resolve a (possibly dotted) variable path the way Mustache does: the FIRST
|
|
45
|
+
* segment is looked up through the context stack (innermost → outermost); each
|
|
46
|
+
* remaining segment is a direct descent into the resolved field's `fields`.
|
|
47
|
+
* Returns the resolved field, or undefined if any segment is missing.
|
|
48
|
+
*
|
|
49
|
+
* EXPORTED so the docs annotator can share this ONE resolution (annotator ⇆
|
|
50
|
+
* verify must agree). Generic over the node type: an enriched tree resolves the
|
|
51
|
+
* same way, since only `name`/`fields` drive the walk.
|
|
52
|
+
*/
|
|
53
|
+
export declare function resolveTemplateVariable<F extends PayloadField>(stack: ResolveStack<F>, path: string): F | undefined;
|
|
54
|
+
/**
|
|
55
|
+
* Parse a template into Mustache tokens (`[type, value, start, end, subTokens?]`),
|
|
56
|
+
* the SAME parse verify walks. Exported so the docs annotator tokenizes through
|
|
57
|
+
* one parser (no divergent re-tokenization). Returns a readonly token list.
|
|
58
|
+
*/
|
|
59
|
+
export declare function parseTemplate(text: string): readonly (readonly unknown[])[];
|
|
36
60
|
/**
|
|
37
61
|
* Walk a Mustache template's tokens against a payload field tree, returning a
|
|
38
62
|
* list of drift errors. Context-sensitive: a section `{{#posts}}…{{/posts}}`
|
package/dist/verify.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"verify.d.ts","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAC/D,8DAA8D;AAC9D,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAC/D,oFAAoF;AACpF,eAAO,MAAM,wBAAwB,6BAA6B,CAAC;AACnE,4EAA4E;AAC5E,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAE/D;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,YAAY,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,6EAA6E;IAC7E,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;
|
|
1
|
+
{"version":3,"file":"verify.d.ts","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAYA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAE9C,gFAAgF;AAChF,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAC/D,8DAA8D;AAC9D,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAC/D,oFAAoF;AACpF,eAAO,MAAM,wBAAwB,6BAA6B,CAAC;AACnE,4EAA4E;AAC5E,eAAO,MAAM,sBAAsB,2BAA2B,CAAC;AAE/D;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,YAAY,EAAE,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,8DAA8D;IAC9D,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,aAAa;IAC5B,6EAA6E;IAC7E,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,6EAA6E;IAC7E,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;IACzB;;;;OAIG;IACH,YAAY,CAAC,EAAE,MAAM,EAAE,CAAC;CACzB;AAMD;;;;;GAKG;AACH,MAAM,MAAM,YAAY,CAAC,CAAC,SAAS,YAAY,GAAG,YAAY,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;AAMjF;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CAAC,CAAC,SAAS,YAAY,EAC5D,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC,EACtB,IAAI,EAAE,MAAM,GACX,CAAC,GAAG,SAAS,CAcf;AASD;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,CAAC,SAAS,OAAO,EAAE,CAAC,EAAE,CAE3E;AAsBD;;;;GAIG;AACH,wBAAgB,MAAM,CACpB,YAAY,EAAE,MAAM,EACpB,MAAM,EAAE,YAAY,EAAE,EACtB,IAAI,CAAC,EAAE,aAAa,GACnB,WAAW,EAAE,CAuFf"}
|
package/dist/verify.js
CHANGED
|
@@ -21,11 +21,17 @@ const MAX_DEPTH = 32;
|
|
|
21
21
|
function find(fields, name) {
|
|
22
22
|
return fields.find((f) => f.name === name);
|
|
23
23
|
}
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
24
|
+
/**
|
|
25
|
+
* Resolve a (possibly dotted) variable path the way Mustache does: the FIRST
|
|
26
|
+
* segment is looked up through the context stack (innermost → outermost); each
|
|
27
|
+
* remaining segment is a direct descent into the resolved field's `fields`.
|
|
28
|
+
* Returns the resolved field, or undefined if any segment is missing.
|
|
29
|
+
*
|
|
30
|
+
* EXPORTED so the docs annotator can share this ONE resolution (annotator ⇆
|
|
31
|
+
* verify must agree). Generic over the node type: an enriched tree resolves the
|
|
32
|
+
* same way, since only `name`/`fields` drive the walk.
|
|
33
|
+
*/
|
|
34
|
+
export function resolveTemplateVariable(stack, path) {
|
|
29
35
|
const segs = path.split(".");
|
|
30
36
|
let current;
|
|
31
37
|
for (let i = stack.length - 1; i >= 0; i--) {
|
|
@@ -40,9 +46,19 @@ function resolve(stack, path) {
|
|
|
40
46
|
}
|
|
41
47
|
return current;
|
|
42
48
|
}
|
|
49
|
+
// Internal alias preserving the original call sites unchanged.
|
|
50
|
+
const resolve = resolveTemplateVariable;
|
|
43
51
|
function parse(text) {
|
|
44
52
|
return Mustache.parse(text);
|
|
45
53
|
}
|
|
54
|
+
/**
|
|
55
|
+
* Parse a template into Mustache tokens (`[type, value, start, end, subTokens?]`),
|
|
56
|
+
* the SAME parse verify walks. Exported so the docs annotator tokenizes through
|
|
57
|
+
* one parser (no divergent re-tokenization). Returns a readonly token list.
|
|
58
|
+
*/
|
|
59
|
+
export function parseTemplate(text) {
|
|
60
|
+
return parse(text);
|
|
61
|
+
}
|
|
46
62
|
// An opening tag is `<tag` immediately followed by `>` or XML whitespace, so
|
|
47
63
|
// attributes are allowed (`<answer foo="1">`) but a longer name is not over-matched
|
|
48
64
|
// (`<answers>` does not satisfy `answer`).
|
package/dist/verify.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"verify.js","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAAA,wEAAwE;AACxE,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAC9E,8EAA8E;AAC9E,sDAAsD;AACtD,EAAE;AACF,wEAAwE;AACxE,+EAA+E;AAC/E,yEAAyE;AAEzE,OAAO,QAAQ,MAAM,UAAU,CAAC;AAGhC,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AAC/D,8DAA8D;AAC9D,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AAC/D,oFAAoF;AACpF,MAAM,CAAC,MAAM,wBAAwB,GAAG,0BAA0B,CAAC;AACnE,4EAA4E;AAC5E,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AA+B/D,MAAM,SAAS,GAAG,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"verify.js","sourceRoot":"","sources":["../src/verify.ts"],"names":[],"mappings":"AAAA,wEAAwE;AACxE,8EAA8E;AAC9E,2EAA2E;AAC3E,8EAA8E;AAC9E,8EAA8E;AAC9E,sDAAsD;AACtD,EAAE;AACF,wEAAwE;AACxE,+EAA+E;AAC/E,yEAAyE;AAEzE,OAAO,QAAQ,MAAM,UAAU,CAAC;AAGhC,gFAAgF;AAChF,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AAC/D,8DAA8D;AAC9D,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AAC/D,oFAAoF;AACpF,MAAM,CAAC,MAAM,wBAAwB,GAAG,0BAA0B,CAAC;AACnE,4EAA4E;AAC5E,MAAM,CAAC,MAAM,sBAAsB,GAAG,wBAAwB,CAAC;AA+B/D,MAAM,SAAS,GAAG,EAAE,CAAC;AAYrB,SAAS,IAAI,CAAyB,MAAW,EAAE,IAAY;IAC7D,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,uBAAuB,CACrC,KAAsB,EACtB,IAAY;IAEZ,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC7B,IAAI,OAAsB,CAAC;IAC3B,KAAK,IAAI,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAE,EAAE,IAAI,CAAC,CAAC,CAAE,CAAC,CAAC;QACtC,IAAI,GAAG,EAAE,CAAC;YACR,OAAO,GAAG,GAAG,CAAC;YACd,MAAM;QACR,CAAC;IACH,CAAC;IACD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChD,OAAO,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAE,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAE,CAAmB,CAAC,CAAC,CAAC,SAAS,CAAC;IAC3F,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,+DAA+D;AAC/D,MAAM,OAAO,GAAG,uBAAuB,CAAC;AAExC,SAAS,KAAK,CAAC,IAAY;IACzB,OAAO,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAuB,CAAC;AACpD,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC;AACrB,CAAC;AAED,6EAA6E;AAC7E,oFAAoF;AACpF,2CAA2C;AAC3C,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC;AAE9D,SAAS,UAAU,CAAC,IAAY,EAAE,GAAW;IAC3C,MAAM,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;IACzB,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;QAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC;QACrC,IAAI,IAAI,KAAK,SAAS,IAAI,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,OAAO,IAAI,CAAC;IACnE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,mFAAmF;AACnF,kFAAkF;AAClF,SAAS,WAAW,CAAC,IAAY,EAAE,GAAW;IAC5C,OAAO,IAAI,CAAC,QAAQ,CAAC,KAAK,GAAG,GAAG,CAAC,CAAC;AACpC,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,MAAM,CACpB,YAAoB,EACpB,MAAsB,EACtB,IAAoB;IAEpB,MAAM,MAAM,GAAkB,EAAE,CAAC;IACjC,MAAM,QAAQ,GAAG,IAAI,EAAE,QAAQ,CAAC;IAChC,MAAM,IAAI,GAAG,MAAM,CAAC;IACpB,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC3C,kEAAkE;IAClE,yEAAyE;IACzE,+BAA+B;IAC/B,MAAM,WAAW,GAAa,CAAC,YAAY,CAAC,CAAC;IAE7C,SAAS,IAAI,CAAC,MAAe,EAAE,KAAmB,EAAE,IAAuB;QACzE,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC;QACvD,KAAK,MAAM,GAAG,IAAI,MAAM,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAW,CAAC;YAC9B,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAW,CAAC;YAC/B,QAAQ,IAAI,EAAE,CAAC;gBACb,KAAK,MAAM,CAAC,CAAC,QAAQ;gBACrB,KAAK,GAAG,CAAC,CAAC,SAAS;gBACnB,KAAK,GAAG,CAAC,CAAC,CAAC;oBACT,mDAAmD;oBACnD,IAAI,KAAK,KAAK,GAAG;wBAAE,MAAM,CAAC,mCAAmC;oBAC7D,IAAI,MAAM;wBAAE,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;oBACvD,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC;wBAAE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;oBACvF,MAAM;gBACR,CAAC;gBACD,KAAK,GAAG,CAAC,CAAC,gBAAgB;gBAC1B,KAAK,GAAG,CAAC,CAAC,CAAC;oBACT,gBAAgB;oBAChB,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,GAAG,CAAC,CAAC,CAAa,CAAC,CAAC,CAAC,EAAE,CAAC;oBAC7D,IAAI,KAAK,KAAK,GAAG,EAAE,CAAC;wBAClB,IAAI,CAAC,GAAG,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;wBACvB,MAAM;oBACR,CAAC;oBACD,IAAI,MAAM;wBAAE,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;oBACvD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;oBACpC,IAAI,CAAC,KAAK,EAAE,CAAC;wBACX,8DAA8D;wBAC9D,iEAAiE;wBACjE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;wBAC3D,MAAM;oBACR,CAAC;oBACD,sEAAsE;oBACtE,2DAA2D;oBAC3D,MAAM,IAAI,GAAG,IAAI,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC;oBACxD,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,MAAO,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;oBAC1D,MAAM;gBACR,CAAC;gBACD,KAAK,GAAG,CAAC,CAAC,CAAC;oBACT,qBAAqB;oBACrB,IAAI,CAAC,QAAQ;wBAAE,MAAM,CAAC,mCAAmC;oBACzD,IAAI,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS;wBAAE,MAAM,CAAC,oBAAoB;oBACjF,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;oBACrC,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;wBACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;wBAC3D,MAAM;oBACR,CAAC;oBACD,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;oBACvB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,GAAG,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC;oBAC3C,MAAM;gBACR,CAAC;gBACD;oBACE,MAAM,CAAC,iCAAiC;YAC5C,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;IAEtC,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,aAAa,IAAI,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC;YAAE,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,wBAAwB,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;IAC/F,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,EAAE,YAAY,IAAI,EAAE,CAAC;IAC9C,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC5B,yEAAyE;QACzE,0EAA0E;QAC1E,2EAA2E;QAC3E,wEAAwE;QACxE,MAAM,QAAQ,GAAG,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxC,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;YAC/B,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC9D,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,sBAAsB,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@metaobjectsdev/render",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.0-rc.1",
|
|
4
4
|
"description": "Logic-less, deterministic text render engine (Mustache) for MetaObjects templates — provider-resolved partials, format-driven escaping, zero core dependency.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -12,7 +12,12 @@
|
|
|
12
12
|
"default": "./dist/index.js"
|
|
13
13
|
}
|
|
14
14
|
},
|
|
15
|
-
"files": [
|
|
15
|
+
"files": [
|
|
16
|
+
"dist",
|
|
17
|
+
"src",
|
|
18
|
+
"README.md",
|
|
19
|
+
"LICENSE"
|
|
20
|
+
],
|
|
16
21
|
"scripts": {
|
|
17
22
|
"build": "tsc -p .",
|
|
18
23
|
"typecheck": "tsc -p tsconfig.typecheck.json"
|
|
@@ -21,23 +26,29 @@
|
|
|
21
26
|
"author": "Doug Mealing <doug@dougmealing.com>",
|
|
22
27
|
"homepage": "https://metaobjects.dev",
|
|
23
28
|
"bugs": {
|
|
24
|
-
|
|
25
|
-
},
|
|
26
|
-
"repository": {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
},
|
|
31
|
-
"keywords": [
|
|
32
|
-
"
|
|
33
|
-
"
|
|
34
|
-
|
|
35
|
-
"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
"
|
|
42
|
-
|
|
29
|
+
"url": "https://github.com/metaobjectsdev/metaobjects/issues"
|
|
30
|
+
},
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "https://github.com/metaobjectsdev/metaobjects.git",
|
|
34
|
+
"directory": "server/typescript/packages/render"
|
|
35
|
+
},
|
|
36
|
+
"keywords": [
|
|
37
|
+
"metaobjects",
|
|
38
|
+
"render",
|
|
39
|
+
"mustache",
|
|
40
|
+
"prompt",
|
|
41
|
+
"template"
|
|
42
|
+
],
|
|
43
|
+
"publishConfig": {
|
|
44
|
+
"access": "public"
|
|
45
|
+
},
|
|
46
|
+
"dependencies": {
|
|
47
|
+
"mustache": "^4.2.0"
|
|
48
|
+
},
|
|
49
|
+
"devDependencies": {
|
|
50
|
+
"@types/mustache": "^4.2.5",
|
|
51
|
+
"bun-types": "latest",
|
|
52
|
+
"typescript": "^5.6.0"
|
|
53
|
+
}
|
|
43
54
|
}
|
package/src/extract/coerce.ts
CHANGED
|
@@ -49,9 +49,9 @@ export function coerceValue(
|
|
|
49
49
|
return coerceEnum(raw, spec, opts, fieldPath, report, ci);
|
|
50
50
|
case FieldKind.INT:
|
|
51
51
|
case FieldKind.LONG:
|
|
52
|
-
return coerceInt(raw, spec, fieldPath, report);
|
|
52
|
+
return coerceInt(raw, spec, fieldPath, report, ci);
|
|
53
53
|
case FieldKind.DOUBLE:
|
|
54
|
-
return coerceDouble(raw, spec, fieldPath, report);
|
|
54
|
+
return coerceDouble(raw, spec, fieldPath, report, ci);
|
|
55
55
|
case FieldKind.BOOLEAN:
|
|
56
56
|
return coerceBool(raw, ci);
|
|
57
57
|
default:
|
|
@@ -171,16 +171,16 @@ function lookupAliasIn(raw: string, aliases: Readonly<Record<string, string>>, m
|
|
|
171
171
|
return null;
|
|
172
172
|
}
|
|
173
173
|
|
|
174
|
-
function coerceInt(raw: string, spec: FieldSpec, path: string, report: ExtractionReport): unknown | typeof MALFORMED {
|
|
174
|
+
function coerceInt(raw: string, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): unknown | typeof MALFORMED {
|
|
175
175
|
const n = parseFiniteNumber(raw);
|
|
176
176
|
if (n === null) return MALFORMED;
|
|
177
|
-
return clamp(Math.trunc(n), spec, path, report);
|
|
177
|
+
return clamp(Math.trunc(n), spec, path, report, lenient);
|
|
178
178
|
}
|
|
179
179
|
|
|
180
|
-
function coerceDouble(raw: string, spec: FieldSpec, path: string, report: ExtractionReport): unknown | typeof MALFORMED {
|
|
180
|
+
function coerceDouble(raw: string, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): unknown | typeof MALFORMED {
|
|
181
181
|
const n = parseFiniteNumber(raw);
|
|
182
182
|
if (n === null) return MALFORMED;
|
|
183
|
-
return clamp(n, spec, path, report);
|
|
183
|
+
return clamp(n, spec, path, report, lenient);
|
|
184
184
|
}
|
|
185
185
|
|
|
186
186
|
/** Parse a trimmed numeric string; null if empty, non-numeric, or non-finite (NaN/±Infinity). */
|
|
@@ -194,11 +194,20 @@ function parseFiniteNumber(raw: string): number | null {
|
|
|
194
194
|
return Number.isFinite(n) ? n : null;
|
|
195
195
|
}
|
|
196
196
|
|
|
197
|
-
|
|
197
|
+
/**
|
|
198
|
+
* Apply the field's min/max range (sourced from its numeric validator). Under LENIENT tolerance an
|
|
199
|
+
* out-of-range value is CLAMPED to the bound (recorded as a "clamp" coercion); under STRICT tolerance
|
|
200
|
+
* it is MALFORMED (the validator's "value out of range" contract). Cross-port: ports must match the
|
|
201
|
+
* lenient-clamp / strict-reject split.
|
|
202
|
+
*/
|
|
203
|
+
function clamp(n: number, spec: FieldSpec, path: string, report: ExtractionReport, lenient: boolean): number | typeof MALFORMED {
|
|
198
204
|
let c = n;
|
|
199
205
|
if (spec.min != null && c < spec.min) c = spec.min;
|
|
200
206
|
if (spec.max != null && c > spec.max) c = spec.max;
|
|
201
|
-
if (c !== n)
|
|
207
|
+
if (c !== n) {
|
|
208
|
+
if (!lenient) return MALFORMED; // STRICT: out-of-range is invalid, not silently clamped
|
|
209
|
+
report.addCoercion({ fieldPath: path, from: stringify(n), to: stringify(c), kind: "clamp" });
|
|
210
|
+
}
|
|
202
211
|
return c;
|
|
203
212
|
}
|
|
204
213
|
|
package/src/extract/extract.ts
CHANGED
|
@@ -11,8 +11,8 @@ import type { FieldSpec, ExtractOptions, ExtractionOutcome, ExtractSchema } from
|
|
|
11
11
|
import { ExtractionReport } from "./types.js";
|
|
12
12
|
import { strip } from "./strip.js";
|
|
13
13
|
import { locateJson, locateXml } from "./locate.js";
|
|
14
|
-
import { readJson, TRUNCATED } from "./json-forgiving-reader.js";
|
|
15
|
-
import { readXml } from "./xml-forgiving-reader.js";
|
|
14
|
+
import { readJson, TRUNCATED, NULL_LITERAL } from "./json-forgiving-reader.js";
|
|
15
|
+
import { readXml, readXmlRootless, TEXT_KEY } from "./xml-forgiving-reader.js";
|
|
16
16
|
import { coerceValue, scalarCoerce, MALFORMED } from "./coerce.js";
|
|
17
17
|
|
|
18
18
|
/** The forgiving entry point: extract dirty `text` against `schema`. Never throws. */
|
|
@@ -28,16 +28,20 @@ export function extract(
|
|
|
28
28
|
const stripped = strip(text);
|
|
29
29
|
const ci = o.tolerance !== Tolerance.STRICT;
|
|
30
30
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
// XML rootless (opts.rootless): the payload's fields ARE the top-level elements — there is no
|
|
32
|
+
// enclosing root to locate — so parse the whole stripped text's top-level elements directly.
|
|
33
|
+
// Otherwise locate the <rootName> span as before. JSON is unaffected. Mirrors Java Extract.
|
|
34
|
+
let span: string | null;
|
|
34
35
|
let raw: Record<string, unknown>;
|
|
35
|
-
if (
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
if (schema.format === Format.JSON) {
|
|
37
|
+
span = locateJson(stripped);
|
|
38
|
+
raw = span == null ? {} : readJson(span);
|
|
39
|
+
} else if (o.rootless) {
|
|
40
|
+
span = stripped.length === 0 ? null : stripped;
|
|
41
|
+
raw = span == null ? {} : readXmlRootless(stripped, ci);
|
|
39
42
|
} else {
|
|
40
|
-
|
|
43
|
+
span = locateXml(stripped, schema.rootName, ci);
|
|
44
|
+
raw = span == null ? {} : readXml(span, ci);
|
|
41
45
|
}
|
|
42
46
|
|
|
43
47
|
if (isEmptyRecord(raw) && (stripped.length === 0 || span == null)) {
|
|
@@ -59,7 +63,9 @@ function extractFields(
|
|
|
59
63
|
): void {
|
|
60
64
|
for (const f of fields) {
|
|
61
65
|
const path = prefix.length === 0 ? f.name : `${prefix}.${f.name}`;
|
|
62
|
-
|
|
66
|
+
// A @xmlText field reads the element's text body (carried under the #text sentinel when the
|
|
67
|
+
// element also has attributes), not a same-named child element.
|
|
68
|
+
const present = f.textContent === true ? raw[TEXT_KEY] : lookup(raw, f.name, ci);
|
|
63
69
|
if (present === undefined) {
|
|
64
70
|
// FR-011 / Phase B: an absent field with a declared @default fills the value → DEFAULTED
|
|
65
71
|
// (which satisfies a @required field). Generalized to all field kinds: an enum default is
|
|
@@ -84,6 +90,13 @@ function extractFields(
|
|
|
84
90
|
report.set(path, FieldExtraction.MALFORMED);
|
|
85
91
|
continue;
|
|
86
92
|
}
|
|
93
|
+
if (present === NULL_LITERAL) {
|
|
94
|
+
// The JSON null literal is the caller's explicit "no value": leave the field null
|
|
95
|
+
// (do NOT apply @default — an explicit null is a value, not an omission), matching a
|
|
96
|
+
// standard JSON bind. Without this the bare `null` token leaks as the string "null".
|
|
97
|
+
report.set(path, f.required ? FieldExtraction.LOST_REQUIRED : FieldExtraction.LOST_OPTIONAL);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
87
100
|
if (f.array) {
|
|
88
101
|
// An array field: a single non-list value is treated as a one-element array
|
|
89
102
|
// (e.g. a single repeated-XML tag). Each element is coerced/recursed independently.
|
|
@@ -151,6 +164,11 @@ function extractValue(
|
|
|
151
164
|
o: ExtractOptions,
|
|
152
165
|
ci: boolean,
|
|
153
166
|
): unknown | typeof MALFORMED {
|
|
167
|
+
if (present === NULL_LITERAL) {
|
|
168
|
+
// A JSON null array element (e.g. [1, null, 3]) carries no value → drop it as malformed
|
|
169
|
+
// rather than letting the sentinel stringify.
|
|
170
|
+
return MALFORMED;
|
|
171
|
+
}
|
|
154
172
|
if (f.kind === FieldKind.OBJECT) {
|
|
155
173
|
if (f.nested != null && isPlainObject(present)) {
|
|
156
174
|
const nestedData: Record<string, unknown> = {};
|
|
@@ -159,6 +177,12 @@ function extractValue(
|
|
|
159
177
|
}
|
|
160
178
|
return MALFORMED; // object expected but scalar/non-map present
|
|
161
179
|
}
|
|
180
|
+
// A text element that also carried XML attributes is represented by readXml as a record with
|
|
181
|
+
// the body under TEXT_KEY. A scalar field reads that text (attributes ignored for scalars —
|
|
182
|
+
// preserving pre-attribute-support behaviour).
|
|
183
|
+
if (isPlainObject(present) && Object.prototype.hasOwnProperty.call(present, TEXT_KEY)) {
|
|
184
|
+
present = (present as Record<string, unknown>)[TEXT_KEY];
|
|
185
|
+
}
|
|
162
186
|
const rawStr = typeof present === "string" ? present : stringifyScalar(present);
|
|
163
187
|
return coerceValue(rawStr, f, o, path, report);
|
|
164
188
|
}
|
|
@@ -4,6 +4,14 @@
|
|
|
4
4
|
/** Sentinel: a key appeared in the text but its value was empty/cut-off (present-but-garbled). */
|
|
5
5
|
export const TRUNCATED: unique symbol = Symbol("extract.json.TRUNCATED");
|
|
6
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Sentinel: the JSON `null` literal. Distinct from a JS `null` return (which this reader uses
|
|
9
|
+
* internally for "no token / garbled") and from the 4-char string "null". The extract phase maps
|
|
10
|
+
* this to an actual null field value (JSON null → null), instead of letting the bare `null` literal
|
|
11
|
+
* leak through as the text "null".
|
|
12
|
+
*/
|
|
13
|
+
export const NULL_LITERAL: unique symbol = Symbol("extract.json.NULL_LITERAL");
|
|
14
|
+
|
|
7
15
|
/** A character is JSON-insignificant whitespace. Mirrors Java Character.isWhitespace closely enough for the corpus. */
|
|
8
16
|
function isWhitespace(c: string): boolean {
|
|
9
17
|
return c === " " || c === "\t" || c === "\n" || c === "\r" || c === "\f" || c === "\v" || /\s/.test(c);
|
|
@@ -132,11 +140,13 @@ class Reader {
|
|
|
132
140
|
return sb; // unterminated string → return what we have
|
|
133
141
|
}
|
|
134
142
|
|
|
135
|
-
private readBareScalar(): string | null {
|
|
143
|
+
private readBareScalar(): string | null | typeof NULL_LITERAL {
|
|
136
144
|
const start = this.i;
|
|
137
145
|
while (this.i < this.s.length && ",}]".indexOf(this.s.charAt(this.i)) < 0) this.i++;
|
|
138
146
|
const result = this.s.substring(start, this.i).trim();
|
|
139
|
-
|
|
147
|
+
if (result.length === 0) return null; // no token read (zero-width)
|
|
148
|
+
if (result === "null") return NULL_LITERAL; // JSON null literal → explicit null, NOT the string "null"
|
|
149
|
+
return result;
|
|
140
150
|
}
|
|
141
151
|
|
|
142
152
|
private ws(): void {
|