@metaobjectsdev/render 0.7.0-rc.9 → 0.8.0-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +8 -0
- package/dist/index.js.map +1 -1
- package/dist/prompt/output-format-renderer.d.ts +8 -0
- package/dist/prompt/output-format-renderer.d.ts.map +1 -0
- package/dist/prompt/output-format-renderer.js +164 -0
- package/dist/prompt/output-format-renderer.js.map +1 -0
- package/dist/prompt/output-format-spec.d.ts +18 -0
- package/dist/prompt/output-format-spec.d.ts.map +1 -0
- package/dist/prompt/output-format-spec.js +3 -0
- package/dist/prompt/output-format-spec.js.map +1 -0
- package/dist/prompt/prompt-field.d.ts +22 -0
- package/dist/prompt/prompt-field.d.ts.map +1 -0
- package/dist/prompt/prompt-field.js +3 -0
- package/dist/prompt/prompt-field.js.map +1 -0
- package/dist/prompt/prompt-overrides.d.ts +16 -0
- package/dist/prompt/prompt-overrides.d.ts.map +1 -0
- package/dist/prompt/prompt-overrides.js +13 -0
- package/dist/prompt/prompt-overrides.js.map +1 -0
- package/dist/prompt/prompt-style.d.ts +17 -0
- package/dist/prompt/prompt-style.d.ts.map +1 -0
- package/dist/prompt/prompt-style.js +34 -0
- package/dist/prompt/prompt-style.js.map +1 -0
- package/dist/recover/coerce.d.ts +5 -0
- package/dist/recover/coerce.d.ts.map +1 -0
- package/dist/recover/coerce.js +124 -0
- package/dist/recover/coerce.js.map +1 -0
- package/dist/recover/json-forgiving-reader.d.ts +5 -0
- package/dist/recover/json-forgiving-reader.d.ts.map +1 -0
- package/dist/recover/json-forgiving-reader.js +178 -0
- package/dist/recover/json-forgiving-reader.js.map +1 -0
- package/dist/recover/locate.d.ts +5 -0
- package/dist/recover/locate.d.ts.map +1 -0
- package/dist/recover/locate.js +75 -0
- package/dist/recover/locate.js.map +1 -0
- package/dist/recover/recover-map.d.ts +7 -0
- package/dist/recover/recover-map.d.ts.map +1 -0
- package/dist/recover/recover-map.js +36 -0
- package/dist/recover/recover-map.js.map +1 -0
- package/dist/recover/recover.d.ts +4 -0
- package/dist/recover/recover.d.ts.map +1 -0
- package/dist/recover/recover.js +115 -0
- package/dist/recover/recover.js.map +1 -0
- package/dist/recover/strip.d.ts +2 -0
- package/dist/recover/strip.d.ts.map +1 -0
- package/dist/recover/strip.js +17 -0
- package/dist/recover/strip.js.map +1 -0
- package/dist/recover/types.d.ts +117 -0
- package/dist/recover/types.d.ts.map +1 -0
- package/dist/recover/types.js +124 -0
- package/dist/recover/types.js.map +1 -0
- package/dist/recover/xml-forgiving-reader.d.ts +2 -0
- package/dist/recover/xml-forgiving-reader.d.ts.map +1 -0
- package/dist/recover/xml-forgiving-reader.js +79 -0
- package/dist/recover/xml-forgiving-reader.js.map +1 -0
- package/package.json +1 -1
- package/src/index.ts +42 -0
- package/src/prompt/output-format-renderer.ts +179 -0
- package/src/prompt/output-format-spec.ts +20 -0
- package/src/prompt/prompt-field.ts +24 -0
- package/src/prompt/prompt-overrides.ts +27 -0
- package/src/prompt/prompt-style.ts +36 -0
- package/src/recover/KNOWN_GAPS.md +35 -0
- package/src/recover/coerce.ts +141 -0
- package/src/recover/json-forgiving-reader.ts +167 -0
- package/src/recover/locate.ts +72 -0
- package/src/recover/recover-map.ts +39 -0
- package/src/recover/recover.ts +146 -0
- package/src/recover/strip.ts +17 -0
- package/src/recover/types.ts +217 -0
- package/src/recover/xml-forgiving-reader.ts +82 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
// Stage 7: canonicalize a raw scalar string per its FieldSpec. Returns the MALFORMED
|
|
2
|
+
// sentinel when present-but-uncoercible. Mirrors Java Coerce.
|
|
3
|
+
//
|
|
4
|
+
// Tier-2 divergence (documented, parity with the C# port's KNOWN_GAPS): JS has one
|
|
5
|
+
// number type. INT/LONG both truncate toward zero via Math.trunc and return `number`.
|
|
6
|
+
// Coercion uses Number(...) + Number.isFinite, NOT Java's Double.parseDouble — so JS
|
|
7
|
+
// does NOT accept Java's numeric suffixes ("42d"/"42f") or hex-float literals. The
|
|
8
|
+
// load-bearing contract (finite-only acceptance; NaN/±Infinity → MALFORMED; numeric
|
|
9
|
+
// classification) is identical across ports.
|
|
10
|
+
|
|
11
|
+
import { FieldKind, Tolerance } from "./types.js";
|
|
12
|
+
import type { FieldSpec, RecoverOptions, RecoveryReport } from "./types.js";
|
|
13
|
+
|
|
14
|
+
/** Sentinel: the value was present but could not be coerced to the declared kind/vocabulary. */
|
|
15
|
+
export const MALFORMED: unique symbol = Symbol("recover.coerce.MALFORMED");
|
|
16
|
+
|
|
17
|
+
export function coerceValue(
|
|
18
|
+
raw: string | null,
|
|
19
|
+
spec: FieldSpec,
|
|
20
|
+
opts: RecoverOptions,
|
|
21
|
+
fieldPath: string,
|
|
22
|
+
report: RecoveryReport,
|
|
23
|
+
): unknown | typeof MALFORMED {
|
|
24
|
+
if (raw == null) return MALFORMED;
|
|
25
|
+
|
|
26
|
+
if (opts.onField != null) {
|
|
27
|
+
const hooked = opts.onField(fieldPath, raw, spec);
|
|
28
|
+
if (hooked != null) {
|
|
29
|
+
report.addCoercion({ fieldPath, from: raw, to: stringify(hooked), kind: "onField" });
|
|
30
|
+
return hooked;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Per-field runtime normalizer (bounded 20% surface). Keyed by field path, then simple name.
|
|
35
|
+
const norm = opts.normalizers[fieldPath] ?? opts.normalizers[spec.name];
|
|
36
|
+
if (norm != null) {
|
|
37
|
+
const normalized = norm(raw);
|
|
38
|
+
if (normalized != null) {
|
|
39
|
+
report.addCoercion({ fieldPath, from: raw, to: stringify(normalized), kind: "normalizer" });
|
|
40
|
+
return normalized;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
const ci = opts.tolerance !== Tolerance.STRICT;
|
|
45
|
+
switch (spec.kind) {
|
|
46
|
+
case FieldKind.ENUM:
|
|
47
|
+
return coerceEnum(raw, spec, opts, fieldPath, report, ci);
|
|
48
|
+
case FieldKind.INT:
|
|
49
|
+
case FieldKind.LONG:
|
|
50
|
+
return coerceInt(raw, spec, fieldPath, report);
|
|
51
|
+
case FieldKind.DOUBLE:
|
|
52
|
+
return coerceDouble(raw, spec, fieldPath, report);
|
|
53
|
+
case FieldKind.BOOLEAN:
|
|
54
|
+
return coerceBool(raw, ci);
|
|
55
|
+
default:
|
|
56
|
+
return raw;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function coerceEnum(
|
|
61
|
+
raw: string,
|
|
62
|
+
spec: FieldSpec,
|
|
63
|
+
opts: RecoverOptions,
|
|
64
|
+
path: string,
|
|
65
|
+
report: RecoveryReport,
|
|
66
|
+
ci: boolean,
|
|
67
|
+
): unknown | typeof MALFORMED {
|
|
68
|
+
if (spec.enumValues != null) {
|
|
69
|
+
for (const v of spec.enumValues) {
|
|
70
|
+
if (v === raw) return v;
|
|
71
|
+
if (ci && v.toLowerCase() === raw.toLowerCase()) {
|
|
72
|
+
report.addCoercion({ fieldPath: path, from: raw, to: v, kind: "case" });
|
|
73
|
+
return v;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const schemaTarget = spec.enumAlias == null ? undefined : spec.enumAlias[raw];
|
|
78
|
+
const runtimeTarget = opts.aliases[raw];
|
|
79
|
+
if (runtimeTarget != null) {
|
|
80
|
+
const kind = schemaTarget != null && schemaTarget !== runtimeTarget ? "runtime-alias-override" : "alias";
|
|
81
|
+
report.addCoercion({ fieldPath: path, from: raw, to: runtimeTarget, kind });
|
|
82
|
+
return runtimeTarget;
|
|
83
|
+
}
|
|
84
|
+
if (schemaTarget != null) {
|
|
85
|
+
report.addCoercion({ fieldPath: path, from: raw, to: schemaTarget, kind: "alias" });
|
|
86
|
+
return schemaTarget;
|
|
87
|
+
}
|
|
88
|
+
return MALFORMED;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function coerceInt(raw: string, spec: FieldSpec, path: string, report: RecoveryReport): unknown | typeof MALFORMED {
|
|
92
|
+
const n = parseFiniteNumber(raw);
|
|
93
|
+
if (n === null) return MALFORMED;
|
|
94
|
+
return clamp(Math.trunc(n), spec, path, report);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function coerceDouble(raw: string, spec: FieldSpec, path: string, report: RecoveryReport): unknown | typeof MALFORMED {
|
|
98
|
+
const n = parseFiniteNumber(raw);
|
|
99
|
+
if (n === null) return MALFORMED;
|
|
100
|
+
return clamp(n, spec, path, report);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/** Parse a trimmed numeric string; null if empty, non-numeric, or non-finite (NaN/±Infinity). */
|
|
104
|
+
function parseFiniteNumber(raw: string): number | null {
|
|
105
|
+
const t = raw.trim();
|
|
106
|
+
if (t.length === 0) return null;
|
|
107
|
+
// Reject JS-only radix-prefixed literals (0x.., 0b.., 0o..) that Number() would
|
|
108
|
+
// accept but Java/C# numeric parsing rejects → MALFORMED. Keeps cross-port parity.
|
|
109
|
+
if (/^[+-]?0[xXbBoO]/.test(t)) return null;
|
|
110
|
+
const n = Number(t); // Number("") === 0, hence the empty guard above
|
|
111
|
+
return Number.isFinite(n) ? n : null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function clamp(n: number, spec: FieldSpec, path: string, report: RecoveryReport): number {
|
|
115
|
+
let c = n;
|
|
116
|
+
if (spec.min != null && c < spec.min) c = spec.min;
|
|
117
|
+
if (spec.max != null && c > spec.max) c = spec.max;
|
|
118
|
+
if (c !== n) report.addCoercion({ fieldPath: path, from: stringify(n), to: stringify(c), kind: "clamp" });
|
|
119
|
+
return c;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function coerceBool(raw: string, ci: boolean): boolean | typeof MALFORMED {
|
|
123
|
+
const t = ci ? raw.trim().toLowerCase() : raw.trim();
|
|
124
|
+
switch (t) {
|
|
125
|
+
case "true":
|
|
126
|
+
case "yes":
|
|
127
|
+
case "1":
|
|
128
|
+
return true;
|
|
129
|
+
case "false":
|
|
130
|
+
case "no":
|
|
131
|
+
case "0":
|
|
132
|
+
return false;
|
|
133
|
+
default:
|
|
134
|
+
return MALFORMED;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/** Canonical string form (locale-independent), mirroring Java String.valueOf for the corpus. */
|
|
139
|
+
function stringify(v: unknown): string {
|
|
140
|
+
return String(v);
|
|
141
|
+
}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
// Stage-4 tolerant JSON reader for the bounded corpus malformation set. Never throws.
|
|
2
|
+
// Mirrors Java JsonForgivingReader. The no-hang + TRUNCATED contracts are load-bearing.
|
|
3
|
+
|
|
4
|
+
/** Sentinel: a key appeared in the text but its value was empty/cut-off (present-but-garbled). */
|
|
5
|
+
export const TRUNCATED: unique symbol = Symbol("recover.json.TRUNCATED");
|
|
6
|
+
|
|
7
|
+
/** A character is JSON-insignificant whitespace. Mirrors Java Character.isWhitespace closely enough for the corpus. */
|
|
8
|
+
function isWhitespace(c: string): boolean {
|
|
9
|
+
return c === " " || c === "\t" || c === "\n" || c === "\r" || c === "\f" || c === "\v" || /\s/.test(c);
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function isLetterOrDigitOrUnderscore(c: string): boolean {
|
|
13
|
+
return /[A-Za-z0-9_]/.test(c);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
class Reader {
|
|
17
|
+
private s = "";
|
|
18
|
+
private i = 0;
|
|
19
|
+
|
|
20
|
+
read(span: string | null | undefined): Record<string, unknown> {
|
|
21
|
+
this.s = span ?? "";
|
|
22
|
+
this.i = 0;
|
|
23
|
+
this.ws();
|
|
24
|
+
if (this.i >= this.s.length || this.s.charAt(this.i) !== "{") return {};
|
|
25
|
+
const o = this.readValue();
|
|
26
|
+
return isPlainObject(o) ? (o as Record<string, unknown>) : {};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
private readValue(): unknown {
|
|
30
|
+
this.ws();
|
|
31
|
+
if (this.i >= this.s.length) return null;
|
|
32
|
+
const c = this.s.charAt(this.i);
|
|
33
|
+
if (c === "{") return this.readObject();
|
|
34
|
+
if (c === "[") return this.readArray();
|
|
35
|
+
if (c === '"' || c === "'") return this.readString(c);
|
|
36
|
+
return this.readBareScalar();
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
private readObject(): Record<string, unknown> {
|
|
40
|
+
const m: Record<string, unknown> = {};
|
|
41
|
+
this.i++; // consume '{'
|
|
42
|
+
for (;;) {
|
|
43
|
+
this.ws();
|
|
44
|
+
if (this.i >= this.s.length) return m; // truncation
|
|
45
|
+
if (this.s.charAt(this.i) === "}") {
|
|
46
|
+
this.i++;
|
|
47
|
+
return m;
|
|
48
|
+
}
|
|
49
|
+
const key = this.readKey();
|
|
50
|
+
if (key === null) return m; // truncation mid-key
|
|
51
|
+
this.ws();
|
|
52
|
+
if (this.i >= this.s.length || this.s.charAt(this.i) !== ":") return m; // truncation before value
|
|
53
|
+
this.i++; // consume ':'
|
|
54
|
+
this.ws();
|
|
55
|
+
if (this.i >= this.s.length) {
|
|
56
|
+
m[key] = TRUNCATED; // value cut off at EOF → present-but-garbled
|
|
57
|
+
return m;
|
|
58
|
+
}
|
|
59
|
+
const v = this.readValue();
|
|
60
|
+
if (v === null) {
|
|
61
|
+
// present key, empty/zero-width value → present-but-garbled
|
|
62
|
+
m[key] = TRUNCATED;
|
|
63
|
+
this.ws();
|
|
64
|
+
if (this.i < this.s.length && this.s.charAt(this.i) === ",") {
|
|
65
|
+
this.i++;
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
if (this.i < this.s.length && this.s.charAt(this.i) === "}") this.i++;
|
|
69
|
+
return m;
|
|
70
|
+
}
|
|
71
|
+
m[key] = v;
|
|
72
|
+
this.ws();
|
|
73
|
+
if (this.i < this.s.length && this.s.charAt(this.i) === ",") this.i++; // optional/trailing comma
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
private readArray(): unknown[] {
|
|
78
|
+
const xs: unknown[] = [];
|
|
79
|
+
this.i++; // consume '['
|
|
80
|
+
for (;;) {
|
|
81
|
+
this.ws();
|
|
82
|
+
if (this.i >= this.s.length) return xs;
|
|
83
|
+
if (this.s.charAt(this.i) === "]") {
|
|
84
|
+
this.i++;
|
|
85
|
+
return xs;
|
|
86
|
+
}
|
|
87
|
+
if (this.s.charAt(this.i) === "}") {
|
|
88
|
+
this.i++;
|
|
89
|
+
return xs;
|
|
90
|
+
} // malformed brace-close terminates array
|
|
91
|
+
const v = this.readValue();
|
|
92
|
+
if (v === null) {
|
|
93
|
+
// zero-width / no value → stop (no spin)
|
|
94
|
+
this.ws();
|
|
95
|
+
if (this.i < this.s.length && (this.s.charAt(this.i) === "]" || this.s.charAt(this.i) === "}")) this.i++;
|
|
96
|
+
return xs;
|
|
97
|
+
}
|
|
98
|
+
xs.push(v);
|
|
99
|
+
this.ws();
|
|
100
|
+
if (this.i < this.s.length && this.s.charAt(this.i) === ",") this.i++;
|
|
101
|
+
else if (this.i < this.s.length && this.s.charAt(this.i) === "]") {
|
|
102
|
+
this.i++;
|
|
103
|
+
return xs;
|
|
104
|
+
} else if (this.i >= this.s.length) return xs;
|
|
105
|
+
else return xs; // any other non-separator char → stop
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
private readKey(): string | null {
|
|
110
|
+
this.ws();
|
|
111
|
+
if (this.i >= this.s.length) return null;
|
|
112
|
+
const c = this.s.charAt(this.i);
|
|
113
|
+
if (c === '"' || c === "'") return this.readString(c);
|
|
114
|
+
const start = this.i;
|
|
115
|
+
while (this.i < this.s.length && isLetterOrDigitOrUnderscore(this.s.charAt(this.i))) this.i++;
|
|
116
|
+
return this.i > start ? this.s.substring(start, this.i) : null;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
private readString(quoteChar: string): string {
|
|
120
|
+
this.i++; // opening quote
|
|
121
|
+
let sb = "";
|
|
122
|
+
let esc = false;
|
|
123
|
+
while (this.i < this.s.length) {
|
|
124
|
+
const c = this.s.charAt(this.i++);
|
|
125
|
+
if (esc) {
|
|
126
|
+
sb += unescape(c);
|
|
127
|
+
esc = false;
|
|
128
|
+
} else if (c === "\\") esc = true;
|
|
129
|
+
else if (c === quoteChar) return sb;
|
|
130
|
+
else sb += c;
|
|
131
|
+
}
|
|
132
|
+
return sb; // unterminated string → return what we have
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
private readBareScalar(): string | null {
|
|
136
|
+
const start = this.i;
|
|
137
|
+
while (this.i < this.s.length && ",}]".indexOf(this.s.charAt(this.i)) < 0) this.i++;
|
|
138
|
+
const result = this.s.substring(start, this.i).trim();
|
|
139
|
+
return result.length === 0 ? null : result; // null = no token read (zero-width)
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
private ws(): void {
|
|
143
|
+
while (this.i < this.s.length && isWhitespace(this.s.charAt(this.i))) this.i++;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function unescape(c: string): string {
|
|
148
|
+
switch (c) {
|
|
149
|
+
case "n":
|
|
150
|
+
return "\n";
|
|
151
|
+
case "t":
|
|
152
|
+
return "\t";
|
|
153
|
+
case "r":
|
|
154
|
+
return "\r";
|
|
155
|
+
default:
|
|
156
|
+
return c;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
function isPlainObject(o: unknown): boolean {
|
|
161
|
+
return typeof o === "object" && o !== null && !Array.isArray(o);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/** Parse a tolerant JSON object span into a forgiving record. Never throws. */
|
|
165
|
+
export function readJson(span: string | null | undefined): Record<string, unknown> {
|
|
166
|
+
return new Reader().read(span);
|
|
167
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
// Stages 2-3: isolate and select the payload root span.
|
|
2
|
+
// Selection rule: first-closed-else-first-open. Mirrors Java Locate.
|
|
3
|
+
|
|
4
|
+
/** Escape regex metacharacters (equivalent of Java Pattern.quote). */
|
|
5
|
+
function quote(s: string): string {
|
|
6
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
/** First balanced {...}; if none closes, first '{' to end; null if no '{'. */
|
|
10
|
+
export function locateJson(text: string | null | undefined): string | null {
|
|
11
|
+
if (text == null) return null;
|
|
12
|
+
let firstOpen = -1;
|
|
13
|
+
for (let i = 0; i < text.length; i++) {
|
|
14
|
+
if (text.charAt(i) === "{") {
|
|
15
|
+
if (firstOpen < 0) firstOpen = i;
|
|
16
|
+
const end = scanBalanced(text, i);
|
|
17
|
+
if (end >= 0) return text.substring(i, end + 1);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
return firstOpen < 0 ? null : text.substring(firstOpen);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Returns index of the matching '}', or -1 if unterminated. String-aware. */
|
|
24
|
+
function scanBalanced(s: string, open: number): number {
|
|
25
|
+
let depth = 0;
|
|
26
|
+
let inStr = false;
|
|
27
|
+
let esc = false;
|
|
28
|
+
for (let i = open; i < s.length; i++) {
|
|
29
|
+
const c = s.charAt(i);
|
|
30
|
+
if (inStr) {
|
|
31
|
+
if (esc) esc = false;
|
|
32
|
+
else if (c === "\\") esc = true;
|
|
33
|
+
else if (c === '"') inStr = false;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
if (c === '"') inStr = true;
|
|
37
|
+
else if (c === "{") depth++;
|
|
38
|
+
else if (c === "}") {
|
|
39
|
+
depth--;
|
|
40
|
+
if (depth === 0) return i;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return -1;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/** Span of <root>...</root>; if close absent, opener to end; null if no opener. */
|
|
47
|
+
export function locateXml(
|
|
48
|
+
text: string | null | undefined,
|
|
49
|
+
rootName: string | null | undefined,
|
|
50
|
+
caseInsensitive: boolean,
|
|
51
|
+
): string | null {
|
|
52
|
+
if (text == null || rootName == null) return null;
|
|
53
|
+
const flags = caseInsensitive ? "i" : "";
|
|
54
|
+
const open = new RegExp(`<${quote(rootName)}(\\s[^>]*)?>`, flags).exec(text);
|
|
55
|
+
if (open == null) return null;
|
|
56
|
+
const start = open.index;
|
|
57
|
+
const openEnd = open.index + open[0].length;
|
|
58
|
+
|
|
59
|
+
const closeRe = new RegExp(`</${quote(rootName)}\\s*>`, flags);
|
|
60
|
+
closeRe.lastIndex = openEnd;
|
|
61
|
+
// Search for the close tag at-or-after the opener's end (Java close.find(open.end())).
|
|
62
|
+
const close = matchFrom(closeRe, text, openEnd);
|
|
63
|
+
if (close != null) return text.substring(start, close.index + close[0].length);
|
|
64
|
+
return text.substring(start);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Find the first regex match at index >= `from` (emulates Java Matcher.find(int)). */
|
|
68
|
+
function matchFrom(re: RegExp, text: string, from: number): RegExpExecArray | null {
|
|
69
|
+
const g = new RegExp(re.source, re.flags.includes("g") ? re.flags : re.flags + "g");
|
|
70
|
+
g.lastIndex = from;
|
|
71
|
+
return g.exec(text);
|
|
72
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
// Null-safe coercions from a RecoverOutcome data map onto typed values. Generated
|
|
2
|
+
// recover(...) calls these. Mirrors Java RecoverMap.
|
|
3
|
+
//
|
|
4
|
+
// Tier-2 divergence: JS has one number type, so asInt/asLong both return `number | null`
|
|
5
|
+
// and truncate toward zero via Math.trunc (Java intValue()/longValue() also truncate).
|
|
6
|
+
// The numeric helpers gate on actual numbers (mirroring Java `instanceof Number`): a
|
|
7
|
+
// non-numeric string or a boolean returns null rather than coercing.
|
|
8
|
+
|
|
9
|
+
export function asString(d: Record<string, unknown>, k: string): string | null {
|
|
10
|
+
const v = d[k];
|
|
11
|
+
if (v == null) return null;
|
|
12
|
+
return typeof v === "string" ? v : String(v);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function asInt(d: Record<string, unknown>, k: string): number | null {
|
|
16
|
+
const v = d[k];
|
|
17
|
+
return typeof v === "number" && Number.isFinite(v) ? Math.trunc(v) : null;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function asLong(d: Record<string, unknown>, k: string): number | null {
|
|
21
|
+
const v = d[k];
|
|
22
|
+
return typeof v === "number" && Number.isFinite(v) ? Math.trunc(v) : null;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function asDouble(d: Record<string, unknown>, k: string): number | null {
|
|
26
|
+
const v = d[k];
|
|
27
|
+
return typeof v === "number" && Number.isFinite(v) ? v : null;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function asBool(d: Record<string, unknown>, k: string): boolean | null {
|
|
31
|
+
const v = d[k];
|
|
32
|
+
return typeof v === "boolean" ? v : null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function asStringList(d: Record<string, unknown>, k: string): (string | null)[] | null {
|
|
36
|
+
const v = d[k];
|
|
37
|
+
if (!Array.isArray(v)) return null;
|
|
38
|
+
return v.map((e) => (e == null ? null : typeof e === "string" ? e : String(e)));
|
|
39
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
// Public entry point. Runs the staged pipeline; NEVER throws. Mirrors Java Recover.
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
Format,
|
|
5
|
+
FieldKind,
|
|
6
|
+
FieldRecovery,
|
|
7
|
+
Tolerance,
|
|
8
|
+
normalizeOptions,
|
|
9
|
+
} from "./types.js";
|
|
10
|
+
import type { FieldSpec, RecoverOptions, RecoverOutcome, RecoverSchema } from "./types.js";
|
|
11
|
+
import { RecoveryReport } from "./types.js";
|
|
12
|
+
import { strip } from "./strip.js";
|
|
13
|
+
import { locateJson, locateXml } from "./locate.js";
|
|
14
|
+
import { readJson, TRUNCATED } from "./json-forgiving-reader.js";
|
|
15
|
+
import { readXml } from "./xml-forgiving-reader.js";
|
|
16
|
+
import { coerceValue, MALFORMED } from "./coerce.js";
|
|
17
|
+
|
|
18
|
+
/** The forgiving entry point: recover dirty `text` against `schema`. Never throws. */
|
|
19
|
+
export function recover(
|
|
20
|
+
text: string | null | undefined,
|
|
21
|
+
schema: RecoverSchema,
|
|
22
|
+
opts?: Partial<RecoverOptions> | null,
|
|
23
|
+
): RecoverOutcome {
|
|
24
|
+
const o = normalizeOptions(opts);
|
|
25
|
+
const report = new RecoveryReport();
|
|
26
|
+
const data: Record<string, unknown> = {};
|
|
27
|
+
|
|
28
|
+
const stripped = strip(text);
|
|
29
|
+
const ci = o.tolerance !== Tolerance.STRICT;
|
|
30
|
+
|
|
31
|
+
const span =
|
|
32
|
+
schema.format === Format.JSON ? locateJson(stripped) : locateXml(stripped, schema.rootName, ci);
|
|
33
|
+
|
|
34
|
+
let raw: Record<string, unknown>;
|
|
35
|
+
if (span == null) {
|
|
36
|
+
raw = {};
|
|
37
|
+
} else if (schema.format === Format.JSON) {
|
|
38
|
+
raw = readJson(span);
|
|
39
|
+
} else {
|
|
40
|
+
raw = readXml(span, ci);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (isEmptyRecord(raw) && (stripped.length === 0 || span == null)) {
|
|
44
|
+
report.markEmpty();
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
extract(schema.fields, raw, "", data, report, o, ci);
|
|
48
|
+
return { data, report };
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function extract(
|
|
52
|
+
fields: readonly FieldSpec[],
|
|
53
|
+
raw: Record<string, unknown>,
|
|
54
|
+
prefix: string,
|
|
55
|
+
data: Record<string, unknown>,
|
|
56
|
+
report: RecoveryReport,
|
|
57
|
+
o: RecoverOptions,
|
|
58
|
+
ci: boolean,
|
|
59
|
+
): void {
|
|
60
|
+
for (const f of fields) {
|
|
61
|
+
const path = prefix.length === 0 ? f.name : `${prefix}.${f.name}`;
|
|
62
|
+
const present = lookup(raw, f.name, ci);
|
|
63
|
+
if (present === undefined) {
|
|
64
|
+
report.set(path, f.required ? FieldRecovery.LOST_REQUIRED : FieldRecovery.LOST_OPTIONAL);
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
if (present === TRUNCATED) {
|
|
68
|
+
// present-but-garbled (empty/cut-off value)
|
|
69
|
+
report.set(path, FieldRecovery.MALFORMED);
|
|
70
|
+
continue;
|
|
71
|
+
}
|
|
72
|
+
if (f.array) {
|
|
73
|
+
// An array field: a single non-list value is treated as a one-element array
|
|
74
|
+
// (e.g. a single repeated-XML tag). Each element is coerced/recursed independently.
|
|
75
|
+
const elements: unknown[] = Array.isArray(present) ? present : [present];
|
|
76
|
+
const out: unknown[] = [];
|
|
77
|
+
let anyMalformed = false;
|
|
78
|
+
for (let idx = 0; idx < elements.length; idx++) {
|
|
79
|
+
const v = extractValue(f, elements[idx], `${path}[${idx}]`, report, o, ci);
|
|
80
|
+
if (v === MALFORMED) anyMalformed = true;
|
|
81
|
+
else out.push(v);
|
|
82
|
+
}
|
|
83
|
+
// Cross-port contract: a MALFORMED array still places its successfully-coerced
|
|
84
|
+
// elements into data (partial recovery), UNLIKE a MALFORMED scalar which is absent.
|
|
85
|
+
data[f.name] = out;
|
|
86
|
+
report.set(path, anyMalformed ? FieldRecovery.MALFORMED : FieldRecovery.RECOVERED);
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
if (Array.isArray(present)) {
|
|
90
|
+
// a list where a singular value was expected
|
|
91
|
+
report.set(path, FieldRecovery.MALFORMED);
|
|
92
|
+
continue;
|
|
93
|
+
}
|
|
94
|
+
const v = extractValue(f, present, path, report, o, ci);
|
|
95
|
+
if (v === MALFORMED) {
|
|
96
|
+
report.set(path, FieldRecovery.MALFORMED);
|
|
97
|
+
} else {
|
|
98
|
+
data[f.name] = v;
|
|
99
|
+
report.set(path, FieldRecovery.RECOVERED);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Coerce one (non-array) element: nested-object recursion or scalar coercion. Returns MALFORMED on failure. */
|
|
105
|
+
function extractValue(
|
|
106
|
+
f: FieldSpec,
|
|
107
|
+
present: unknown,
|
|
108
|
+
path: string,
|
|
109
|
+
report: RecoveryReport,
|
|
110
|
+
o: RecoverOptions,
|
|
111
|
+
ci: boolean,
|
|
112
|
+
): unknown | typeof MALFORMED {
|
|
113
|
+
if (f.kind === FieldKind.OBJECT) {
|
|
114
|
+
if (f.nested != null && isPlainObject(present)) {
|
|
115
|
+
const nestedData: Record<string, unknown> = {};
|
|
116
|
+
extract(f.nested.fields, present as Record<string, unknown>, path, nestedData, report, o, ci);
|
|
117
|
+
return nestedData;
|
|
118
|
+
}
|
|
119
|
+
return MALFORMED; // object expected but scalar/non-map present
|
|
120
|
+
}
|
|
121
|
+
const rawStr = typeof present === "string" ? present : stringifyScalar(present);
|
|
122
|
+
return coerceValue(rawStr, f, o, path, report);
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Case-folding lookup honoring tolerance. Returns `undefined` for absent (mirrors Java null). */
|
|
126
|
+
function lookup(raw: Record<string, unknown>, name: string, ci: boolean): unknown {
|
|
127
|
+
if (Object.prototype.hasOwnProperty.call(raw, name)) return raw[name];
|
|
128
|
+
if (ci) {
|
|
129
|
+
const lower = name.toLowerCase();
|
|
130
|
+
for (const k of Object.keys(raw)) if (k.toLowerCase() === lower) return raw[k];
|
|
131
|
+
}
|
|
132
|
+
return undefined;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function isPlainObject(o: unknown): boolean {
|
|
136
|
+
return typeof o === "object" && o !== null && !Array.isArray(o);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function isEmptyRecord(o: Record<string, unknown>): boolean {
|
|
140
|
+
return Object.keys(o).length === 0;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/** Mirror Java String.valueOf for non-string forgiving-reader scalars. */
|
|
144
|
+
function stringifyScalar(v: unknown): string {
|
|
145
|
+
return String(v);
|
|
146
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// Stage 1: remove markdown code-fence markers. Prose around the payload is left for Locate.
|
|
2
|
+
// Mirrors Java Strip.
|
|
3
|
+
|
|
4
|
+
// Captures the body inside a fenced block; optional language tag (json/xml/etc) is dropped.
|
|
5
|
+
const FENCE = /```[a-zA-Z0-9_-]*\s*\r?\n([\s\S]*?)\r?\n?```/;
|
|
6
|
+
|
|
7
|
+
export function strip(raw: string | null | undefined): string {
|
|
8
|
+
if (raw == null) return "";
|
|
9
|
+
const m = FENCE.exec(raw);
|
|
10
|
+
if (m && m.index >= 0) {
|
|
11
|
+
const before = raw.substring(0, m.index);
|
|
12
|
+
const body = m[1] ?? "";
|
|
13
|
+
const after = raw.substring(m.index + m[0].length);
|
|
14
|
+
return (before + body + after).trim();
|
|
15
|
+
}
|
|
16
|
+
return raw.trim();
|
|
17
|
+
}
|