@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { n as TNode } from "../parser-BfdEfWDg.mjs";
|
|
2
|
+
import { LossyValue } from "./lossy.mjs";
|
|
3
|
+
|
|
4
|
+
//#region src/converters/fromLossy.d.ts
|
|
5
|
+
/**
|
|
6
|
+
* Convert a lossy JS object back to a `(TNode | string)[]` DOM tree.
|
|
7
|
+
*
|
|
8
|
+
* Accepts the same shapes that `lossy()` returns:
|
|
9
|
+
* - A single `LossyValue` (e.g. `{ root: { ... } }`)
|
|
10
|
+
* - An array of `LossyValue` (multiple roots)
|
|
11
|
+
*
|
|
12
|
+
* @param input - A lossy value or array of lossy values.
|
|
13
|
+
* @returns A DOM array suitable for `write()` or further processing.
|
|
14
|
+
*/
|
|
15
|
+
declare function fromLossy(input: LossyValue | LossyValue[]): (TNode | string)[];
|
|
16
|
+
//#endregion
|
|
17
|
+
export { fromLossy };
|
|
18
|
+
//# sourceMappingURL=fromLossy.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fromLossy.d.mts","names":[],"sources":["../../src/converters/fromLossy.ts"],"mappings":";;;;;;;;;;;;;;iBAiDgB,SAAA,CACd,KAAA,EAAO,UAAA,GAAa,UAAA,MAClB,KAAA"}
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
//#region src/converters/fromLossy.ts
|
|
2
|
+
const DOLLAR = 36;
|
|
3
|
+
/**
|
|
4
|
+
* Convert a lossy JS object back to a `(TNode | string)[]` DOM tree.
|
|
5
|
+
*
|
|
6
|
+
* Accepts the same shapes that `lossy()` returns:
|
|
7
|
+
* - A single `LossyValue` (e.g. `{ root: { ... } }`)
|
|
8
|
+
* - An array of `LossyValue` (multiple roots)
|
|
9
|
+
*
|
|
10
|
+
* @param input - A lossy value or array of lossy values.
|
|
11
|
+
* @returns A DOM array suitable for `write()` or further processing.
|
|
12
|
+
*/
|
|
13
|
+
function fromLossy(input) {
|
|
14
|
+
if (Array.isArray(input)) {
|
|
15
|
+
const result = [];
|
|
16
|
+
for (let i = 0; i < input.length; i++) {
|
|
17
|
+
const item = input[i];
|
|
18
|
+
if (typeof item === "string") result.push(item);
|
|
19
|
+
else if (item === null) continue;
|
|
20
|
+
else convertTopLevelObject(item, result);
|
|
21
|
+
}
|
|
22
|
+
return result;
|
|
23
|
+
}
|
|
24
|
+
if (input === null || typeof input === "string") return typeof input === "string" ? [input] : [];
|
|
25
|
+
const result = [];
|
|
26
|
+
convertTopLevelObject(input, result);
|
|
27
|
+
return result;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Convert a top-level `{ tagName: value }` object into TNode(s) and push
|
|
31
|
+
* them onto the result array.
|
|
32
|
+
*/
|
|
33
|
+
function convertTopLevelObject(object, result) {
|
|
34
|
+
const keys = Object.keys(object);
|
|
35
|
+
for (let i = 0; i < keys.length; i++) {
|
|
36
|
+
const tagName = keys[i];
|
|
37
|
+
result.push(convertElement(tagName, object[tagName]));
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Convert a tag name + lossy value into a TNode.
|
|
42
|
+
*/
|
|
43
|
+
function convertElement(tagName, value) {
|
|
44
|
+
if (value === null) return {
|
|
45
|
+
tagName,
|
|
46
|
+
attributes: null,
|
|
47
|
+
children: []
|
|
48
|
+
};
|
|
49
|
+
if (typeof value === "string") return {
|
|
50
|
+
tagName,
|
|
51
|
+
attributes: null,
|
|
52
|
+
children: [value]
|
|
53
|
+
};
|
|
54
|
+
if (Array.isArray(value)) return convertElement(tagName, value[0] ?? null);
|
|
55
|
+
const objectValue = value;
|
|
56
|
+
const objectKeys = Object.keys(objectValue);
|
|
57
|
+
let attributes = null;
|
|
58
|
+
const children = [];
|
|
59
|
+
for (let i = 0; i < objectKeys.length; i++) {
|
|
60
|
+
const key = objectKeys[i];
|
|
61
|
+
if (key === "$$") {
|
|
62
|
+
const mixedArray = objectValue.$$;
|
|
63
|
+
for (let j = 0; j < mixedArray.length; j++) {
|
|
64
|
+
const mixedEntry = mixedArray[j];
|
|
65
|
+
if (typeof mixedEntry === "string") children.push(mixedEntry);
|
|
66
|
+
else {
|
|
67
|
+
const entryTagName = Object.keys(mixedEntry)[0];
|
|
68
|
+
children.push(convertElement(entryTagName, mixedEntry[entryTagName]));
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
} else if (key.charCodeAt(0) === DOLLAR) {
|
|
72
|
+
if (attributes === null) attributes = Object.create(null);
|
|
73
|
+
const attributeName = key.substring(1);
|
|
74
|
+
const attributeValue = objectValue[key];
|
|
75
|
+
attributes[attributeName] = attributeValue === null ? null : String(attributeValue);
|
|
76
|
+
} else {
|
|
77
|
+
const childValue = objectValue[key];
|
|
78
|
+
if (Array.isArray(childValue)) for (let j = 0; j < childValue.length; j++) children.push(convertElement(key, childValue[j]));
|
|
79
|
+
else children.push(convertElement(key, childValue));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
return {
|
|
83
|
+
tagName,
|
|
84
|
+
attributes,
|
|
85
|
+
children
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
//#endregion
|
|
89
|
+
export { fromLossy };
|
|
90
|
+
|
|
91
|
+
//# sourceMappingURL=fromLossy.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fromLossy.mjs","names":[],"sources":["../../src/converters/fromLossy.ts"],"sourcesContent":["/**\n * fromLossy — convert a lossy JS object back to a TNode DOM tree.\n *\n * This is the inverse of `lossy()`. Because the lossy format does not preserve\n * sibling order between different tag names, the reconstruction is best-effort:\n * elements appear in JS object key insertion order, and arrays expand in\n * sequence. Mixed content (`$$` arrays) preserves interleaving exactly.\n *\n * Lossy format rules (reversed):\n * - `null` → empty element (no attributes, no children)\n * - `string` → text-only element (one string child)\n * - `$`-prefixed keys → attributes (prefix stripped)\n * - `$$` array → mixed content (strings become text, objects become elements)\n * - Non-`$` keys → element children (arrays expand to repeated siblings)\n * - Top-level single-key object → single root element\n * - Top-level array → multiple root elements\n *\n * @example\n * ```ts\n * import { fromLossy } from \"@eksml/xml/from-lossy\";\n * import { lossy } from \"@eksml/xml/lossy\";\n * import { write } from \"@eksml/xml/writer\";\n *\n * const obj = lossy('<root><item>hello</item><item>world</item></root>');\n * const dom = fromLossy(obj);\n * const xml = write(dom);\n * ```\n */\n\nimport type { TNode } from '#src/parser.ts';\nimport type {\n LossyValue,\n LossyObject,\n LossyMixedEntry,\n} from '#src/converters/lossy.ts';\n// @generated:char-codes:begin\nconst DOLLAR = 36; // $\n// @generated:char-codes:end\n\n/**\n * Convert a lossy JS object back to a `(TNode | string)[]` DOM tree.\n *\n * Accepts the same shapes that `lossy()` returns:\n * - A single `LossyValue` (e.g. `{ root: { ... } }`)\n * - An array of `LossyValue` (multiple roots)\n *\n * @param input - A lossy value or array of lossy values.\n * @returns A DOM array suitable for `write()` or further processing.\n */\nexport function fromLossy(\n input: LossyValue | LossyValue[],\n): (TNode | string)[] {\n // Array of top-level values\n if (Array.isArray(input)) {\n const result: (TNode | string)[] = [];\n for (let i = 0; i < input.length; i++) {\n const item = input[i]!;\n if (typeof item === 'string') {\n result.push(item);\n } else if (item === null) {\n // Top-level null — unusual, skip\n continue;\n } else {\n // Each object in the array is { tagName: value }\n convertTopLevelObject(item, result);\n }\n }\n return result;\n }\n\n // Single value\n if (input === null || typeof input === 'string') {\n // Bare null or string at top level — can't form a TNode without a tag name\n return typeof input === 'string' ? [input] : [];\n }\n\n // Single-root object: { rootTag: value }\n const result: (TNode | string)[] = [];\n convertTopLevelObject(input, result);\n return result;\n}\n\n/**\n * Convert a top-level `{ tagName: value }` object into TNode(s) and push\n * them onto the result array.\n */\nfunction convertTopLevelObject(\n object: LossyObject,\n result: (TNode | string)[],\n): void {\n const keys = Object.keys(object);\n for (let i = 0; i < keys.length; i++) {\n const tagName = keys[i]!;\n result.push(convertElement(tagName, object[tagName]!));\n }\n}\n\n/**\n * Convert a tag name + lossy value into a TNode.\n */\nfunction convertElement(\n tagName: string,\n value: LossyValue | LossyValue[] | LossyMixedEntry[],\n): TNode {\n // null → empty element\n if (value === null) {\n return { tagName, attributes: null, children: [] };\n }\n\n // string → text-only element\n if (typeof value === 'string') {\n return { tagName, attributes: null, children: [value] };\n }\n\n // Array → this is a repeated-sibling array at the parent level,\n // but convertElement is called per-value, so arrays here shouldn't\n // normally occur. Defensive: treat as first element.\n if (Array.isArray(value)) {\n // This case is handled by the caller expanding arrays.\n // If somehow called directly, use first item.\n return convertElement(tagName, value[0] ?? null);\n }\n\n // LossyObject → extract attributes ($-prefixed), check for $$ mixed content,\n // then process element-only children\n const objectValue = value as LossyObject;\n const objectKeys = Object.keys(objectValue);\n\n let attributes: Record<string, string | null> | null = null;\n const children: (TNode | string)[] = [];\n let hasMixed = false;\n\n for (let i = 0; i < objectKeys.length; i++) {\n const key = objectKeys[i]!;\n\n if (key === '$$') {\n // Mixed content array\n hasMixed = true;\n const mixedArray = objectValue.$$ as LossyMixedEntry[];\n for (let j = 0; j < mixedArray.length; j++) {\n const mixedEntry = mixedArray[j]!;\n if (typeof mixedEntry === 'string') {\n children.push(mixedEntry);\n } else {\n // { tagName: value } — single-key object\n const entryKeys = Object.keys(mixedEntry);\n const entryTagName = entryKeys[0]!;\n children.push(\n convertElement(\n entryTagName,\n (mixedEntry as LossyObject)[entryTagName]!,\n ),\n );\n }\n }\n } else if (key.charCodeAt(0) === DOLLAR) {\n // Attribute — strip the $ prefix\n if (attributes === null) {\n attributes = Object.create(null) as Record<string, string | null>;\n }\n const attributeName = key.substring(1);\n const attributeValue = objectValue[key];\n attributes[attributeName] =\n attributeValue === null ? null : String(attributeValue);\n } else {\n // Element child(ren)\n const childValue = objectValue[key]!;\n if (Array.isArray(childValue)) {\n // Repeated siblings\n for (let j = 0; j < childValue.length; j++) {\n children.push(convertElement(key, childValue[j]!));\n }\n } else {\n children.push(convertElement(key, childValue as LossyValue));\n }\n }\n }\n\n return { tagName, attributes, children };\n}\n"],"mappings":";AAoCA,MAAM,SAAS;;;;;;;;;;;AAaf,SAAgB,UACd,OACoB;AAEpB,KAAI,MAAM,QAAQ,MAAM,EAAE;EACxB,MAAM,SAA6B,EAAE;AACrC,OAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;GACrC,MAAM,OAAO,MAAM;AACnB,OAAI,OAAO,SAAS,SAClB,QAAO,KAAK,KAAK;YACR,SAAS,KAElB;OAGA,uBAAsB,MAAM,OAAO;;AAGvC,SAAO;;AAIT,KAAI,UAAU,QAAQ,OAAO,UAAU,SAErC,QAAO,OAAO,UAAU,WAAW,CAAC,MAAM,GAAG,EAAE;CAIjD,MAAM,SAA6B,EAAE;AACrC,uBAAsB,OAAO,OAAO;AACpC,QAAO;;;;;;AAOT,SAAS,sBACP,QACA,QACM;CACN,MAAM,OAAO,OAAO,KAAK,OAAO;AAChC,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;EACpC,MAAM,UAAU,KAAK;AACrB,SAAO,KAAK,eAAe,SAAS,OAAO,SAAU,CAAC;;;;;;AAO1D,SAAS,eACP,SACA,OACO;AAEP,KAAI,UAAU,KACZ,QAAO;EAAE;EAAS,YAAY;EAAM,UAAU,EAAE;EAAE;AAIpD,KAAI,OAAO,UAAU,SACnB,QAAO;EAAE;EAAS,YAAY;EAAM,UAAU,CAAC,MAAM;EAAE;AAMzD,KAAI,MAAM,QAAQ,MAAM,CAGtB,QAAO,eAAe,SAAS,MAAM,MAAM,KAAK;CAKlD,MAAM,cAAc;CACpB,MAAM,aAAa,OAAO,KAAK,YAAY;CAE3C,IAAI,aAAmD;CACvD,MAAM,WAA+B,EAAE;AAGvC,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;EAC1C,MAAM,MAAM,WAAW;AAEvB,MAAI,QAAQ,MAAM;GAGhB,MAAM,aAAa,YAAY;AAC/B,QAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,KAAK;IAC1C,MAAM,aAAa,WAAW;AAC9B,QAAI,OAAO,eAAe,SACxB,UAAS,KAAK,WAAW;SACpB;KAGL,MAAM,eADY,OAAO,KAAK,WAAW,CACV;AAC/B,cAAS,KACP,eACE,cACC,WAA2B,cAC7B,CACF;;;aAGI,IAAI,WAAW,EAAE,KAAK,QAAQ;AAEvC,OAAI,eAAe,KACjB,cAAa,OAAO,OAAO,KAAK;GAElC,MAAM,gBAAgB,IAAI,UAAU,EAAE;GACtC,MAAM,iBAAiB,YAAY;AACnC,cAAW,iBACT,mBAAmB,OAAO,OAAO,OAAO,eAAe;SACpD;GAEL,MAAM,aAAa,YAAY;AAC/B,OAAI,MAAM,QAAQ,WAAW,CAE3B,MAAK,IAAI,IAAI,GAAG,IAAI,WAAW,QAAQ,IACrC,UAAS,KAAK,eAAe,KAAK,WAAW,GAAI,CAAC;OAGpD,UAAS,KAAK,eAAe,KAAK,WAAyB,CAAC;;;AAKlE,QAAO;EAAE;EAAS;EAAY;EAAU"}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { n as TNode, t as ParseOptions } from "../parser-BfdEfWDg.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/converters/lossless.d.ts
|
|
4
|
+
/** A single entry in the JSON output array. */
|
|
5
|
+
type LosslessEntry = {
|
|
6
|
+
[tagName: string]: LosslessEntry[];
|
|
7
|
+
} | {
|
|
8
|
+
$text: string;
|
|
9
|
+
} | {
|
|
10
|
+
$attr: Record<string, string | null>;
|
|
11
|
+
} | {
|
|
12
|
+
$comment: string;
|
|
13
|
+
};
|
|
14
|
+
/** Options for lossless. */
|
|
15
|
+
interface LosslessOptions extends ParseOptions {}
|
|
16
|
+
/**
|
|
17
|
+
* Convert a single `TNode | string` item into its lossless representation.
|
|
18
|
+
*
|
|
19
|
+
* - `TNode` → `{ tagName: [ ...children ] }`
|
|
20
|
+
* - `string` → `{ $text: "..." }` or `{ $comment: "..." }`
|
|
21
|
+
*
|
|
22
|
+
* Used internally by `XmlParseStream` when `output: 'lossless'` is set.
|
|
23
|
+
*
|
|
24
|
+
* @internal
|
|
25
|
+
*/
|
|
26
|
+
declare function convertItemToLossless(item: TNode | string): LosslessEntry;
|
|
27
|
+
/**
|
|
28
|
+
* Parse an XML/HTML string or convert a pre-parsed DOM tree into an
|
|
29
|
+
* order-preserving JSON-friendly structure.
|
|
30
|
+
*
|
|
31
|
+
* @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array
|
|
32
|
+
* @param options - Parsing options (only used when `input` is a string)
|
|
33
|
+
* @returns Array of top-level JSON entries
|
|
34
|
+
*/
|
|
35
|
+
declare function lossless(input: string, options?: LosslessOptions): LosslessEntry[];
|
|
36
|
+
declare function lossless(input: (TNode | string)[]): LosslessEntry[];
|
|
37
|
+
//#endregion
|
|
38
|
+
export { LosslessEntry, LosslessOptions, convertItemToLossless, lossless };
|
|
39
|
+
//# sourceMappingURL=lossless.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lossless.d.mts","names":[],"sources":["../../src/converters/lossless.ts"],"mappings":";;;;KAkCY,aAAA;EAAA,CACL,OAAA,WAAkB,aAAA;AAAA;EACnB,KAAA;AAAA;EACA,KAAA,EAAO,MAAA;AAAA;EACP,QAAA;AAAA;;UAGW,eAAA,SAAwB,YAAA;;;;;;;;;;;iBAyCzB,qBAAA,CAAsB,IAAA,EAAM,KAAA,YAAiB,aAAA;;;;;;;;;iBAa7C,QAAA,CACd,KAAA,UACA,OAAA,GAAU,eAAA,GACT,aAAA;AAAA,iBACa,QAAA,CAAS,KAAA,GAAQ,KAAA,eAAoB,aAAA"}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { t as parse } from "../parser-CYq309aR.mjs";
|
|
2
|
+
//#region src/converters/lossless.ts
|
|
3
|
+
/**
|
|
4
|
+
* lossless — XML-to-JSON converter producing an order-preserving format.
|
|
5
|
+
*
|
|
6
|
+
* Each element becomes a single-key object `{ tagName: children[] }`.
|
|
7
|
+
* Text nodes become `{ $text: "..." }`.
|
|
8
|
+
* Attributes become `{ $attr: { ... } }` as the first entry in the children array.
|
|
9
|
+
* Comments (when kept) become `{ $comment: "..." }`.
|
|
10
|
+
*
|
|
11
|
+
* All marker keys are valid JS identifiers so you can use dot notation:
|
|
12
|
+
* `entry.$attr.id`, `entry.$text`, `entry.$comment`.
|
|
13
|
+
*
|
|
14
|
+
* The format preserves element order, mixed content, and attributes losslessly,
|
|
15
|
+
* and is fully JSON-serializable.
|
|
16
|
+
*
|
|
17
|
+
* @example
|
|
18
|
+
* ```ts
|
|
19
|
+
* import { lossless } from "@eksml/xml/lossless";
|
|
20
|
+
*
|
|
21
|
+
* const result = lossless('<root attr="1"><item>hell<b>o</b></item></root>');
|
|
22
|
+
* // [
|
|
23
|
+
* // { "root": [
|
|
24
|
+
* // { $attr: { "attr": "1" } },
|
|
25
|
+
* // { "item": [
|
|
26
|
+
* // { $text: "hell" },
|
|
27
|
+
* // { "b": [{ $text: "o" }] }
|
|
28
|
+
* // ]}
|
|
29
|
+
* // ]}
|
|
30
|
+
* // ]
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
function convertNode(node) {
|
|
34
|
+
const children = [];
|
|
35
|
+
if (node.attributes !== null) children.push({ $attr: node.attributes });
|
|
36
|
+
for (let i = 0; i < node.children.length; i++) {
|
|
37
|
+
const child = node.children[i];
|
|
38
|
+
if (typeof child === "string") children.push(convertString(child));
|
|
39
|
+
else children.push(convertNode(child));
|
|
40
|
+
}
|
|
41
|
+
return { [node.tagName]: children };
|
|
42
|
+
}
|
|
43
|
+
function convertString(text) {
|
|
44
|
+
if (text.startsWith("<!--") && text.endsWith("-->")) return { $comment: text.substring(4, text.length - 3) };
|
|
45
|
+
return { $text: text };
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Convert a single `TNode | string` item into its lossless representation.
|
|
49
|
+
*
|
|
50
|
+
* - `TNode` → `{ tagName: [ ...children ] }`
|
|
51
|
+
* - `string` → `{ $text: "..." }` or `{ $comment: "..." }`
|
|
52
|
+
*
|
|
53
|
+
* Used internally by `XmlParseStream` when `output: 'lossless'` is set.
|
|
54
|
+
*
|
|
55
|
+
* @internal
|
|
56
|
+
*/
|
|
57
|
+
function convertItemToLossless(item) {
|
|
58
|
+
if (typeof item === "string") return convertString(item);
|
|
59
|
+
return convertNode(item);
|
|
60
|
+
}
|
|
61
|
+
function lossless(input, options) {
|
|
62
|
+
const dom = typeof input === "string" ? parse(input, { ...options }) : input;
|
|
63
|
+
const result = [];
|
|
64
|
+
for (let i = 0; i < dom.length; i++) {
|
|
65
|
+
const node = dom[i];
|
|
66
|
+
if (typeof node === "string") result.push(convertString(node));
|
|
67
|
+
else result.push(convertNode(node));
|
|
68
|
+
}
|
|
69
|
+
return result;
|
|
70
|
+
}
|
|
71
|
+
//#endregion
|
|
72
|
+
export { convertItemToLossless, lossless };
|
|
73
|
+
|
|
74
|
+
//# sourceMappingURL=lossless.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lossless.mjs","names":[],"sources":["../../src/converters/lossless.ts"],"sourcesContent":["/**\n * lossless — XML-to-JSON converter producing an order-preserving format.\n *\n * Each element becomes a single-key object `{ tagName: children[] }`.\n * Text nodes become `{ $text: \"...\" }`.\n * Attributes become `{ $attr: { ... } }` as the first entry in the children array.\n * Comments (when kept) become `{ $comment: \"...\" }`.\n *\n * All marker keys are valid JS identifiers so you can use dot notation:\n * `entry.$attr.id`, `entry.$text`, `entry.$comment`.\n *\n * The format preserves element order, mixed content, and attributes losslessly,\n * and is fully JSON-serializable.\n *\n * @example\n * ```ts\n * import { lossless } from \"@eksml/xml/lossless\";\n *\n * const result = lossless('<root attr=\"1\"><item>hell<b>o</b></item></root>');\n * // [\n * // { \"root\": [\n * // { $attr: { \"attr\": \"1\" } },\n * // { \"item\": [\n * // { $text: \"hell\" },\n * // { \"b\": [{ $text: \"o\" }] }\n * // ]}\n * // ]}\n * // ]\n * ```\n */\n\nimport { parse, type TNode, type ParseOptions } from '#src/parser.ts';\n\n/** A single entry in the JSON output array. */\nexport type LosslessEntry =\n | { [tagName: string]: LosslessEntry[] }\n | { $text: string }\n | { $attr: Record<string, string | null> }\n | { $comment: string };\n\n/** Options for lossless. */\nexport interface LosslessOptions extends ParseOptions {}\n\nfunction convertNode(node: TNode): LosslessEntry {\n const children: LosslessEntry[] = [];\n\n // Attributes go first as { $attr: { ... } }\n if (node.attributes !== null) {\n children.push({ $attr: node.attributes });\n }\n\n // Then child nodes\n for (let i = 0; i < node.children.length; i++) {\n const child = node.children[i]!;\n if (typeof child === 'string') {\n children.push(convertString(child));\n } else {\n children.push(convertNode(child));\n }\n }\n\n return { [node.tagName]: children };\n}\n\nfunction convertString(text: string): LosslessEntry {\n // Comments from parse() come as \"<!-- ... -->\"\n if (text.startsWith('<!--') && text.endsWith('-->')) {\n return { $comment: text.substring(4, text.length - 3) };\n }\n return { $text: text };\n}\n\n/**\n * Convert a single `TNode | string` item into its lossless representation.\n *\n * - `TNode` → `{ tagName: [ ...children ] }`\n * - `string` → `{ $text: \"...\" }` or `{ $comment: \"...\" }`\n *\n * Used internally by `XmlParseStream` when `output: 'lossless'` is set.\n *\n * @internal\n */\nexport function convertItemToLossless(item: TNode | string): LosslessEntry {\n if (typeof item === 'string') return convertString(item);\n return convertNode(item);\n}\n\n/**\n * Parse an XML/HTML string or convert a pre-parsed DOM tree into an\n * order-preserving JSON-friendly structure.\n *\n * @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array\n * @param options - Parsing options (only used when `input` is a string)\n * @returns Array of top-level JSON entries\n */\nexport function lossless(\n input: string,\n options?: LosslessOptions,\n): LosslessEntry[];\nexport function lossless(input: (TNode | string)[]): LosslessEntry[];\nexport function lossless(\n input: string | (TNode | string)[],\n options?: LosslessOptions,\n): LosslessEntry[] {\n const dom = typeof input === 'string' ? parse(input, { ...options }) : input;\n const result: LosslessEntry[] = [];\n for (let i = 0; i < dom.length; i++) {\n const node = dom[i]!;\n if (typeof node === 'string') {\n result.push(convertString(node));\n } else {\n result.push(convertNode(node));\n }\n }\n return result;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA2CA,SAAS,YAAY,MAA4B;CAC/C,MAAM,WAA4B,EAAE;AAGpC,KAAI,KAAK,eAAe,KACtB,UAAS,KAAK,EAAE,OAAO,KAAK,YAAY,CAAC;AAI3C,MAAK,IAAI,IAAI,GAAG,IAAI,KAAK,SAAS,QAAQ,KAAK;EAC7C,MAAM,QAAQ,KAAK,SAAS;AAC5B,MAAI,OAAO,UAAU,SACnB,UAAS,KAAK,cAAc,MAAM,CAAC;MAEnC,UAAS,KAAK,YAAY,MAAM,CAAC;;AAIrC,QAAO,GAAG,KAAK,UAAU,UAAU;;AAGrC,SAAS,cAAc,MAA6B;AAElD,KAAI,KAAK,WAAW,OAAO,IAAI,KAAK,SAAS,MAAM,CACjD,QAAO,EAAE,UAAU,KAAK,UAAU,GAAG,KAAK,SAAS,EAAE,EAAE;AAEzD,QAAO,EAAE,OAAO,MAAM;;;;;;;;;;;;AAaxB,SAAgB,sBAAsB,MAAqC;AACzE,KAAI,OAAO,SAAS,SAAU,QAAO,cAAc,KAAK;AACxD,QAAO,YAAY,KAAK;;AAgB1B,SAAgB,SACd,OACA,SACiB;CACjB,MAAM,MAAM,OAAO,UAAU,WAAW,MAAM,OAAO,EAAE,GAAG,SAAS,CAAC,GAAG;CACvE,MAAM,SAA0B,EAAE;AAClC,MAAK,IAAI,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;EACnC,MAAM,OAAO,IAAI;AACjB,MAAI,OAAO,SAAS,SAClB,QAAO,KAAK,cAAc,KAAK,CAAC;MAEhC,QAAO,KAAK,YAAY,KAAK,CAAC;;AAGlC,QAAO"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { n as TNode, t as ParseOptions } from "../parser-BfdEfWDg.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/converters/lossy.d.ts
|
|
4
|
+
/** Options for lossy. */
|
|
5
|
+
interface LossyOptions extends ParseOptions {}
|
|
6
|
+
/** The value of a converted element — null (empty), string, or an object with keys. */
|
|
7
|
+
type LossyValue = null | string | LossyObject;
|
|
8
|
+
/** An element converted to a keyed object. */
|
|
9
|
+
interface LossyObject {
|
|
10
|
+
[key: string]: LossyValue | LossyValue[] | LossyMixedEntry[];
|
|
11
|
+
}
|
|
12
|
+
/** An entry in a `$$` mixed-content array. */
|
|
13
|
+
type LossyMixedEntry = string | {
|
|
14
|
+
[tagName: string]: LossyValue;
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* Convert a single `TNode | string` item into its lossy representation.
|
|
18
|
+
*
|
|
19
|
+
* - `TNode` → `{ tagName: convertedValue }` (same shape as the multi-root
|
|
20
|
+
* branch of `lossy()`)
|
|
21
|
+
* - `string` → passed through as-is
|
|
22
|
+
*
|
|
23
|
+
* Used internally by `XmlParseStream` when `output: 'lossy'` is set.
|
|
24
|
+
*
|
|
25
|
+
* @internal
|
|
26
|
+
*/
|
|
27
|
+
declare function convertItemToLossy(item: TNode | string): LossyValue;
|
|
28
|
+
/**
|
|
29
|
+
* Parse an XML/HTML string or convert a pre-parsed DOM tree into the most
|
|
30
|
+
* simplified lossy JS object format.
|
|
31
|
+
*
|
|
32
|
+
* @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array
|
|
33
|
+
* @param options - Parsing options (only used when `input` is a string)
|
|
34
|
+
* @returns A LossyValue representing the document. For a single root element
|
|
35
|
+
* this is typically `{ rootTag: ... }`. For multiple top-level nodes
|
|
36
|
+
* an array is returned.
|
|
37
|
+
*/
|
|
38
|
+
declare function lossy(input: string, options?: LossyOptions): LossyValue | LossyValue[];
|
|
39
|
+
declare function lossy(input: (TNode | string)[]): LossyValue | LossyValue[];
|
|
40
|
+
//#endregion
|
|
41
|
+
export { LossyMixedEntry, LossyObject, LossyOptions, LossyValue, convertItemToLossy, lossy };
|
|
42
|
+
//# sourceMappingURL=lossy.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lossy.d.mts","names":[],"sources":["../../src/converters/lossy.ts"],"mappings":";;;;UA2DiB,YAAA,SAAqB,YAAA;AAyKtC;AAAA,KAtKY,UAAA,mBAA6B,WAAA;;UAGxB,WAAA;EAAA,CACd,GAAA,WAAc,UAAA,GAAa,UAAA,KAAe,eAAA;AAAA;;KAIjC,eAAA;EAAA,CAA8B,OAAA,WAAkB,UAAA;AAAA;;;;;;;;;;;;iBA2I5C,kBAAA,CAAmB,IAAA,EAAM,KAAA,YAAiB,UAAA;;;;;;;;;;;iBAe1C,KAAA,CACd,KAAA,UACA,OAAA,GAAU,YAAA,GACT,UAAA,GAAa,UAAA;AAAA,iBACA,KAAA,CAAM,KAAA,GAAQ,KAAA,eAAoB,UAAA,GAAa,UAAA"}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import { t as parse } from "../parser-CYq309aR.mjs";
|
|
2
|
+
//#region src/converters/lossy.ts
|
|
3
|
+
/**
|
|
4
|
+
* lossy — simplified lossy XML-to-JS object converter.
|
|
5
|
+
*
|
|
6
|
+
* Produces the most compact JS object representation possible. Element
|
|
7
|
+
* ordering between different tag names is not preserved. The format rules are:
|
|
8
|
+
*
|
|
9
|
+
* - **Text-only element**: collapses to a plain string value.
|
|
10
|
+
* `<name>text</name>` → `{ "name": "text" }`
|
|
11
|
+
*
|
|
12
|
+
* - **Empty/void element**: collapses to `null`.
|
|
13
|
+
* `<br/>` → `{ "br": null }`
|
|
14
|
+
*
|
|
15
|
+
* - **Attributes**: prefixed with `$` on the element object.
|
|
16
|
+
* `<a href="/">link</a>` → `{ "a": { $href: "/", $$: ["link"] } }`
|
|
17
|
+
*
|
|
18
|
+
* - **Mixed content** (text + child elements): children go into an ordered
|
|
19
|
+
* `$$` array preserving exact interleaving of text and element objects.
|
|
20
|
+
* `<p>Hello <b>world</b></p>` → `{ "p": { $$: ["Hello ", { "b": "world" }] } }`
|
|
21
|
+
*
|
|
22
|
+
* - **Element-only children** (no text): keyed object with tag names.
|
|
23
|
+
* `<root><a>1</a><b>2</b></root>` → `{ "root": { "a": "1", "b": "2" } }`
|
|
24
|
+
*
|
|
25
|
+
* - **Repeated same-name siblings**: become an array.
|
|
26
|
+
* Two `<item>` → `"item": [val1, val2]`; one `<item>` → `"item": val1`
|
|
27
|
+
*
|
|
28
|
+
* All marker keys are valid JS identifiers so you can use dot notation:
|
|
29
|
+
* `node.$href`, `node.$$`, etc.
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* ```ts
|
|
33
|
+
* import { lossy } from "@eksml/xml/lossy";
|
|
34
|
+
*
|
|
35
|
+
* const result = lossy(`
|
|
36
|
+
* <thing>
|
|
37
|
+
* <second with="attributes">Text</second>
|
|
38
|
+
* <third>More text</third>
|
|
39
|
+
* <second another="attribute">Even more text
|
|
40
|
+
* <fourth>Nested text</fourth>
|
|
41
|
+
* </second>
|
|
42
|
+
* </thing>
|
|
43
|
+
* `);
|
|
44
|
+
* // {
|
|
45
|
+
* // "thing": {
|
|
46
|
+
* // "second": [
|
|
47
|
+
* // { $with: "attributes", $$: ["Text"] },
|
|
48
|
+
* // { $another: "attribute", $$: ["Even more text\n ", { "fourth": "Nested text" }] }
|
|
49
|
+
* // ],
|
|
50
|
+
* // "third": "More text"
|
|
51
|
+
* // }
|
|
52
|
+
* // }
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
const DOLLAR = 36;
|
|
56
|
+
/**
|
|
57
|
+
* Convert a single TNode into its simplified lossy value.
|
|
58
|
+
*
|
|
59
|
+
* Single-pass algorithm: starts optimistically building an element-only object,
|
|
60
|
+
* and upgrades to mixed-content ($$ array) if text nodes are encountered
|
|
61
|
+
* alongside element nodes (or attributes exist with text).
|
|
62
|
+
*/
|
|
63
|
+
function convertNode(node) {
|
|
64
|
+
const children = node.children;
|
|
65
|
+
const childrenLength = children ? children.length : 0;
|
|
66
|
+
const attributes = node.attributes;
|
|
67
|
+
const hasAttributes = attributes !== null;
|
|
68
|
+
if (childrenLength === 0 && !hasAttributes) return null;
|
|
69
|
+
if (childrenLength === 1 && typeof children[0] === "string" && !hasAttributes) return children[0];
|
|
70
|
+
const elementObject = Object.create(null);
|
|
71
|
+
if (hasAttributes) for (const key in attributes) elementObject["$" + key] = attributes[key];
|
|
72
|
+
if (childrenLength === 0) return elementObject;
|
|
73
|
+
let hasElements = false;
|
|
74
|
+
let hasText = false;
|
|
75
|
+
let mixed = null;
|
|
76
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
77
|
+
const child = children[i];
|
|
78
|
+
if (typeof child === "string") {
|
|
79
|
+
hasText = true;
|
|
80
|
+
if (mixed !== null) mixed.push(child);
|
|
81
|
+
else if (hasElements || hasAttributes) {
|
|
82
|
+
for (const propertyKey in elementObject) if (propertyKey.charCodeAt(0) !== DOLLAR) delete elementObject[propertyKey];
|
|
83
|
+
mixed = [];
|
|
84
|
+
for (let j = 0; j < i; j++) {
|
|
85
|
+
const previousChild = children[j];
|
|
86
|
+
if (typeof previousChild === "string") mixed.push(previousChild);
|
|
87
|
+
else mixed.push({ [previousChild.tagName]: convertNode(previousChild) });
|
|
88
|
+
}
|
|
89
|
+
mixed.push(child);
|
|
90
|
+
}
|
|
91
|
+
} else {
|
|
92
|
+
if (!hasElements && hasText && !hasAttributes) {
|
|
93
|
+
hasElements = true;
|
|
94
|
+
mixed = [];
|
|
95
|
+
for (let j = 0; j < i; j++) mixed.push(children[j]);
|
|
96
|
+
mixed.push({ [child.tagName]: convertNode(child) });
|
|
97
|
+
continue;
|
|
98
|
+
}
|
|
99
|
+
hasElements = true;
|
|
100
|
+
const tag = child.tagName;
|
|
101
|
+
const convertedValue = convertNode(child);
|
|
102
|
+
if (mixed !== null) mixed.push({ [tag]: convertedValue });
|
|
103
|
+
else if (!(tag in elementObject)) elementObject[tag] = convertedValue;
|
|
104
|
+
else if (!Array.isArray(elementObject[tag])) elementObject[tag] = [elementObject[tag], convertedValue];
|
|
105
|
+
else elementObject[tag].push(convertedValue);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (mixed !== null) {
|
|
109
|
+
elementObject.$$ = mixed;
|
|
110
|
+
return elementObject;
|
|
111
|
+
}
|
|
112
|
+
if (hasElements) return elementObject;
|
|
113
|
+
let text = "";
|
|
114
|
+
for (let i = 0; i < childrenLength; i++) text += children[i];
|
|
115
|
+
return text;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Convert a single `TNode | string` item into its lossy representation.
|
|
119
|
+
*
|
|
120
|
+
* - `TNode` → `{ tagName: convertedValue }` (same shape as the multi-root
|
|
121
|
+
* branch of `lossy()`)
|
|
122
|
+
* - `string` → passed through as-is
|
|
123
|
+
*
|
|
124
|
+
* Used internally by `XmlParseStream` when `output: 'lossy'` is set.
|
|
125
|
+
*
|
|
126
|
+
* @internal
|
|
127
|
+
*/
|
|
128
|
+
function convertItemToLossy(item) {
|
|
129
|
+
if (typeof item === "string") return item;
|
|
130
|
+
return { [item.tagName]: convertNode(item) };
|
|
131
|
+
}
|
|
132
|
+
function lossy(input, options) {
|
|
133
|
+
const dom = typeof input === "string" ? parse(input, { ...options }) : input;
|
|
134
|
+
const nodes = [];
|
|
135
|
+
for (let i = 0; i < dom.length; i++) {
|
|
136
|
+
const node = dom[i];
|
|
137
|
+
if (typeof node === "string") {
|
|
138
|
+
if (node.trim().length > 0) nodes.push(node);
|
|
139
|
+
} else if (node.tagName[0] === "?") continue;
|
|
140
|
+
else nodes.push(node);
|
|
141
|
+
}
|
|
142
|
+
if (nodes.length === 1) {
|
|
143
|
+
const node = nodes[0];
|
|
144
|
+
if (typeof node === "string") return node;
|
|
145
|
+
return { [node.tagName]: convertNode(node) };
|
|
146
|
+
}
|
|
147
|
+
const result = [];
|
|
148
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
149
|
+
const node = nodes[i];
|
|
150
|
+
if (typeof node === "string") result.push(node);
|
|
151
|
+
else result.push({ [node.tagName]: convertNode(node) });
|
|
152
|
+
}
|
|
153
|
+
return result;
|
|
154
|
+
}
|
|
155
|
+
//#endregion
|
|
156
|
+
export { convertItemToLossy, lossy };
|
|
157
|
+
|
|
158
|
+
//# sourceMappingURL=lossy.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"lossy.mjs","names":[],"sources":["../../src/converters/lossy.ts"],"sourcesContent":["/**\n * lossy — simplified lossy XML-to-JS object converter.\n *\n * Produces the most compact JS object representation possible. Element\n * ordering between different tag names is not preserved. The format rules are:\n *\n * - **Text-only element**: collapses to a plain string value.\n * `<name>text</name>` → `{ \"name\": \"text\" }`\n *\n * - **Empty/void element**: collapses to `null`.\n * `<br/>` → `{ \"br\": null }`\n *\n * - **Attributes**: prefixed with `$` on the element object.\n * `<a href=\"/\">link</a>` → `{ \"a\": { $href: \"/\", $$: [\"link\"] } }`\n *\n * - **Mixed content** (text + child elements): children go into an ordered\n * `$$` array preserving exact interleaving of text and element objects.\n * `<p>Hello <b>world</b></p>` → `{ \"p\": { $$: [\"Hello \", { \"b\": \"world\" }] } }`\n *\n * - **Element-only children** (no text): keyed object with tag names.\n * `<root><a>1</a><b>2</b></root>` → `{ \"root\": { \"a\": \"1\", \"b\": \"2\" } }`\n *\n * - **Repeated same-name siblings**: become an array.\n * Two `<item>` → `\"item\": [val1, val2]`; one `<item>` → `\"item\": val1`\n *\n * All marker keys are valid JS identifiers so you can use dot notation:\n * `node.$href`, `node.$$`, etc.\n *\n * @example\n * ```ts\n * import { lossy } from \"@eksml/xml/lossy\";\n *\n * const result = lossy(`\n * <thing>\n * <second with=\"attributes\">Text</second>\n * <third>More text</third>\n * <second another=\"attribute\">Even more text\n * <fourth>Nested text</fourth>\n * </second>\n * </thing>\n * `);\n * // {\n * // \"thing\": {\n * // \"second\": [\n * // { $with: \"attributes\", $$: [\"Text\"] },\n * // { $another: \"attribute\", $$: [\"Even more text\\n \", { \"fourth\": \"Nested text\" }] }\n * // ],\n * // \"third\": \"More text\"\n * // }\n * // }\n * ```\n */\n\nimport { parse, type TNode, type ParseOptions } from '#src/parser.ts';\n// @generated:char-codes:begin\nconst DOLLAR = 36; // $\n// @generated:char-codes:end\n\n/** Options for lossy. */\nexport interface LossyOptions extends ParseOptions {}\n\n/** The value of a converted element — null (empty), string, or an object with keys. */\nexport type LossyValue = null | string | LossyObject;\n\n/** An element converted to a keyed object. */\nexport interface LossyObject {\n [key: string]: LossyValue | LossyValue[] | LossyMixedEntry[];\n}\n\n/** An entry in a `$$` mixed-content array. */\nexport type LossyMixedEntry = string | { [tagName: string]: LossyValue };\n\n/**\n * Convert a single TNode into its simplified lossy value.\n *\n * Single-pass algorithm: starts optimistically building an element-only object,\n * and upgrades to mixed-content ($$ array) if text nodes are encountered\n * alongside element nodes (or attributes exist with text).\n */\nfunction convertNode(node: TNode): LossyValue {\n const children = node.children;\n const childrenLength = children ? children.length : 0;\n const attributes = node.attributes;\n const hasAttributes = attributes !== null;\n\n // --- Empty element ---\n if (childrenLength === 0 && !hasAttributes) {\n return null;\n }\n\n // --- Text-only element, no attributes ---\n if (\n childrenLength === 1 &&\n typeof children[0] === 'string' &&\n !hasAttributes\n ) {\n return children[0];\n }\n\n // --- Build object with attributes ---\n // Use null-prototype object to prevent __proto__ / constructor pollution\n const elementObject: LossyObject = Object.create(null);\n\n if (hasAttributes) {\n for (const key in attributes) {\n elementObject['$' + key] = attributes[key]!;\n }\n }\n\n if (childrenLength === 0) {\n // Empty element with attributes only\n return elementObject;\n }\n\n // --- Single-pass: build element-only object, upgrade to mixed if needed ---\n // Track whether we've seen elements and/or text so far.\n // When text appears alongside elements (or attrs), switch to $$ mode,\n // retroactively converting already-processed children.\n let hasElements = false;\n let hasText = false;\n let mixed: LossyMixedEntry[] | null = null;\n\n for (let i = 0; i < childrenLength; i++) {\n const child = children[i]!;\n if (typeof child === 'string') {\n hasText = true;\n if (mixed !== null) {\n // Already in mixed mode\n mixed.push(child);\n } else if (hasElements || hasAttributes) {\n // Upgrade to mixed mode — retroactively convert prior children.\n // Remove element keys that were speculatively added to elementObject.\n for (const propertyKey in elementObject) {\n if (propertyKey.charCodeAt(0) !== DOLLAR)\n delete elementObject[propertyKey]; // keep $-prefixed attrs\n }\n mixed = [];\n for (let j = 0; j < i; j++) {\n const previousChild = children[j]!;\n if (typeof previousChild === 'string') {\n mixed.push(previousChild);\n } else {\n mixed.push({ [previousChild.tagName]: convertNode(previousChild) });\n }\n }\n mixed.push(child);\n }\n // If !hasElements && !hasAttributes, we're still in potential text-only mode\n } else {\n if (!hasElements && hasText && !hasAttributes) {\n // First element after text-only so far — upgrade to mixed mode\n hasElements = true;\n mixed = [];\n for (let j = 0; j < i; j++) {\n mixed.push(children[j] as string);\n }\n mixed.push({ [child.tagName]: convertNode(child) });\n continue;\n }\n hasElements = true;\n const tag = child.tagName;\n const convertedValue = convertNode(child);\n if (mixed !== null) {\n mixed.push({ [tag]: convertedValue });\n } else {\n if (!(tag in elementObject)) {\n elementObject[tag] = convertedValue;\n } else if (!Array.isArray(elementObject[tag])) {\n elementObject[tag] = [\n elementObject[tag] as LossyValue,\n convertedValue,\n ];\n } else {\n (elementObject[tag] as LossyValue[]).push(convertedValue);\n }\n }\n }\n }\n\n // If we switched to mixed mode, attach and return\n if (mixed !== null) {\n elementObject.$$ = mixed;\n return elementObject;\n }\n\n // If we had elements, elementObject is already populated — return it\n if (hasElements) {\n return elementObject;\n }\n\n // --- Text-only, no attributes, multiple text nodes (edge case) ---\n let text = '';\n for (let i = 0; i < childrenLength; i++) {\n text += children[i] as string;\n }\n return text;\n}\n\n/**\n * Convert a single `TNode | string` item into its lossy representation.\n *\n * - `TNode` → `{ tagName: convertedValue }` (same shape as the multi-root\n * branch of `lossy()`)\n * - `string` → passed through as-is\n *\n * Used internally by `XmlParseStream` when `output: 'lossy'` is set.\n *\n * @internal\n */\nexport function convertItemToLossy(item: TNode | string): LossyValue {\n if (typeof item === 'string') return item;\n return { [item.tagName]: convertNode(item) } as LossyObject;\n}\n\n/**\n * Parse an XML/HTML string or convert a pre-parsed DOM tree into the most\n * simplified lossy JS object format.\n *\n * @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array\n * @param options - Parsing options (only used when `input` is a string)\n * @returns A LossyValue representing the document. For a single root element\n * this is typically `{ rootTag: ... }`. For multiple top-level nodes\n * an array is returned.\n */\nexport function lossy(\n input: string,\n options?: LossyOptions,\n): LossyValue | LossyValue[];\nexport function lossy(input: (TNode | string)[]): LossyValue | LossyValue[];\nexport function lossy(\n input: string | (TNode | string)[],\n options?: LossyOptions,\n): LossyValue | LossyValue[] {\n const dom = typeof input === 'string' ? parse(input, { ...options }) : input;\n\n // Filter out whitespace-only top-level text and processing instructions\n // (e.g. <?xml version=\"1.0\"?>) which are metadata, not content.\n const nodes: (TNode | string)[] = [];\n for (let i = 0; i < dom.length; i++) {\n const node = dom[i]!;\n if (typeof node === 'string') {\n // Keep non-whitespace text at top level\n if (node.trim().length > 0) {\n nodes.push(node);\n }\n } else if (node.tagName[0] === '?') {\n // Skip processing instructions (<?xml?>, <?xsl?>, etc.)\n continue;\n } else {\n nodes.push(node);\n }\n }\n\n // Single root element — return as { rootTag: value }\n if (nodes.length === 1) {\n const node = nodes[0]!;\n if (typeof node === 'string') {\n return node;\n }\n return { [node.tagName]: convertNode(node) } as LossyObject;\n }\n\n // Multiple top-level nodes — return array\n const result: LossyValue[] = [];\n for (let i = 0; i < nodes.length; i++) {\n const node = nodes[i]!;\n if (typeof node === 'string') {\n result.push(node);\n } else {\n result.push({ [node.tagName]: convertNode(node) } as LossyObject);\n }\n }\n return result;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAuDA,MAAM,SAAS;;;;;;;;AAwBf,SAAS,YAAY,MAAyB;CAC5C,MAAM,WAAW,KAAK;CACtB,MAAM,iBAAiB,WAAW,SAAS,SAAS;CACpD,MAAM,aAAa,KAAK;CACxB,MAAM,gBAAgB,eAAe;AAGrC,KAAI,mBAAmB,KAAK,CAAC,cAC3B,QAAO;AAIT,KACE,mBAAmB,KACnB,OAAO,SAAS,OAAO,YACvB,CAAC,cAED,QAAO,SAAS;CAKlB,MAAM,gBAA6B,OAAO,OAAO,KAAK;AAEtD,KAAI,cACF,MAAK,MAAM,OAAO,WAChB,eAAc,MAAM,OAAO,WAAW;AAI1C,KAAI,mBAAmB,EAErB,QAAO;CAOT,IAAI,cAAc;CAClB,IAAI,UAAU;CACd,IAAI,QAAkC;AAEtC,MAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,KAAK;EACvC,MAAM,QAAQ,SAAS;AACvB,MAAI,OAAO,UAAU,UAAU;AAC7B,aAAU;AACV,OAAI,UAAU,KAEZ,OAAM,KAAK,MAAM;YACR,eAAe,eAAe;AAGvC,SAAK,MAAM,eAAe,cACxB,KAAI,YAAY,WAAW,EAAE,KAAK,OAChC,QAAO,cAAc;AAEzB,YAAQ,EAAE;AACV,SAAK,IAAI,IAAI,GAAG,IAAI,GAAG,KAAK;KAC1B,MAAM,gBAAgB,SAAS;AAC/B,SAAI,OAAO,kBAAkB,SAC3B,OAAM,KAAK,cAAc;SAEzB,OAAM,KAAK,GAAG,cAAc,UAAU,YAAY,cAAc,EAAE,CAAC;;AAGvE,UAAM,KAAK,MAAM;;SAGd;AACL,OAAI,CAAC,eAAe,WAAW,CAAC,eAAe;AAE7C,kBAAc;AACd,YAAQ,EAAE;AACV,SAAK,IAAI,IAAI,GAAG,IAAI,GAAG,IACrB,OAAM,KAAK,SAAS,GAAa;AAEnC,UAAM,KAAK,GAAG,MAAM,UAAU,YAAY,MAAM,EAAE,CAAC;AACnD;;AAEF,iBAAc;GACd,MAAM,MAAM,MAAM;GAClB,MAAM,iBAAiB,YAAY,MAAM;AACzC,OAAI,UAAU,KACZ,OAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC;YAEjC,EAAE,OAAO,eACX,eAAc,OAAO;YACZ,CAAC,MAAM,QAAQ,cAAc,KAAK,CAC3C,eAAc,OAAO,CACnB,cAAc,MACd,eACD;OAEA,eAAc,KAAsB,KAAK,eAAe;;;AAOjE,KAAI,UAAU,MAAM;AAClB,gBAAc,KAAK;AACnB,SAAO;;AAIT,KAAI,YACF,QAAO;CAIT,IAAI,OAAO;AACX,MAAK,IAAI,IAAI,GAAG,IAAI,gBAAgB,IAClC,SAAQ,SAAS;AAEnB,QAAO;;;;;;;;;;;;;AAcT,SAAgB,mBAAmB,MAAkC;AACnE,KAAI,OAAO,SAAS,SAAU,QAAO;AACrC,QAAO,GAAG,KAAK,UAAU,YAAY,KAAK,EAAE;;AAkB9C,SAAgB,MACd,OACA,SAC2B;CAC3B,MAAM,MAAM,OAAO,UAAU,WAAW,MAAM,OAAO,EAAE,GAAG,SAAS,CAAC,GAAG;CAIvE,MAAM,QAA4B,EAAE;AACpC,MAAK,IAAI,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;EACnC,MAAM,OAAO,IAAI;AACjB,MAAI,OAAO,SAAS;OAEd,KAAK,MAAM,CAAC,SAAS,EACvB,OAAM,KAAK,KAAK;aAET,KAAK,QAAQ,OAAO,IAE7B;MAEA,OAAM,KAAK,KAAK;;AAKpB,KAAI,MAAM,WAAW,GAAG;EACtB,MAAM,OAAO,MAAM;AACnB,MAAI,OAAO,SAAS,SAClB,QAAO;AAET,SAAO,GAAG,KAAK,UAAU,YAAY,KAAK,EAAE;;CAI9C,MAAM,SAAuB,EAAE;AAC/B,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACrC,MAAM,OAAO,MAAM;AACnB,MAAI,OAAO,SAAS,SAClB,QAAO,KAAK,KAAK;MAEjB,QAAO,KAAK,GAAG,KAAK,UAAU,YAAY,KAAK,EAAE,CAAgB;;AAGrE,QAAO"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
//#region src/utilities/htmlConstants.ts
|
|
2
|
+
/**
|
|
3
|
+
* Standard HTML void elements that are self-closing and never have children.
|
|
4
|
+
* Used as the default for `selfClosingTags` when `html: true`.
|
|
5
|
+
*/
|
|
6
|
+
const HTML_VOID_ELEMENTS = [
|
|
7
|
+
"area",
|
|
8
|
+
"base",
|
|
9
|
+
"br",
|
|
10
|
+
"col",
|
|
11
|
+
"embed",
|
|
12
|
+
"hr",
|
|
13
|
+
"img",
|
|
14
|
+
"input",
|
|
15
|
+
"link",
|
|
16
|
+
"meta",
|
|
17
|
+
"param",
|
|
18
|
+
"source",
|
|
19
|
+
"track",
|
|
20
|
+
"wbr"
|
|
21
|
+
];
|
|
22
|
+
/**
|
|
23
|
+
* HTML elements whose content is raw text (not parsed as markup).
|
|
24
|
+
* Used as the default for `rawContentTags` when `html: true`.
|
|
25
|
+
*/
|
|
26
|
+
const HTML_RAW_CONTENT_TAGS = ["script", "style"];
|
|
27
|
+
//#endregion
|
|
28
|
+
export { HTML_VOID_ELEMENTS as n, HTML_RAW_CONTENT_TAGS as t };
|
|
29
|
+
|
|
30
|
+
//# sourceMappingURL=htmlConstants-D6fsKbZ-.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"htmlConstants-D6fsKbZ-.mjs","names":[],"sources":["../src/utilities/htmlConstants.ts"],"sourcesContent":["/**\n * Standard HTML void elements that are self-closing and never have children.\n * Used as the default for `selfClosingTags` when `html: true`.\n */\nexport const HTML_VOID_ELEMENTS = [\n 'area',\n 'base',\n 'br',\n 'col',\n 'embed',\n 'hr',\n 'img',\n 'input',\n 'link',\n 'meta',\n 'param',\n 'source',\n 'track',\n 'wbr',\n];\n\n/**\n * HTML elements whose content is raw text (not parsed as markup).\n * Used as the default for `rawContentTags` when `html: true`.\n */\nexport const HTML_RAW_CONTENT_TAGS = ['script', 'style'];\n"],"mappings":";;;;;AAIA,MAAa,qBAAqB;CAChC;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;;;;;AAMD,MAAa,wBAAwB,CAAC,UAAU,QAAQ"}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
//#region src/parser.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* A parsed XML node
|
|
4
|
+
*/
|
|
5
|
+
interface TNode {
|
|
6
|
+
tagName: string;
|
|
7
|
+
/**
|
|
8
|
+
* Element attributes, or `null` if the element has no attributes.
|
|
9
|
+
* Values can be:
|
|
10
|
+
* - string: attribute with a value (e.g., `<div id="test">` -> `{id: "test"}`)
|
|
11
|
+
* - null: attribute without a value (e.g., `<input disabled>` -> `{disabled: null}`)
|
|
12
|
+
* - empty string: attribute with empty value (e.g., `<input value="">` -> `{value: ""}`)
|
|
13
|
+
*/
|
|
14
|
+
attributes: Record<string, string | null> | null;
|
|
15
|
+
children: (TNode | string)[];
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Options for parsing XML
|
|
19
|
+
*/
|
|
20
|
+
interface ParseOptions {
|
|
21
|
+
/** Starting position in the string */
|
|
22
|
+
pos?: number;
|
|
23
|
+
/**
|
|
24
|
+
* Array of tag names that are self-closing (void elements) and don't need closing tags.
|
|
25
|
+
* In XML mode (default), this defaults to `[]` — self-closing is detected only by `/>` syntax.
|
|
26
|
+
* In HTML mode (`html: true`), this defaults to the standard HTML void elements.
|
|
27
|
+
* Can be overridden explicitly regardless of mode.
|
|
28
|
+
*/
|
|
29
|
+
selfClosingTags?: string[];
|
|
30
|
+
/**
|
|
31
|
+
* Array of tag names whose content should be treated as raw text, not parsed as XML/HTML.
|
|
32
|
+
* The parser will scan for the matching `</tagName>` close tag and emit everything between
|
|
33
|
+
* as a single text child node.
|
|
34
|
+
*
|
|
35
|
+
* In XML mode (default), this defaults to `[]`.
|
|
36
|
+
* In HTML mode (`html: true`), this defaults to `["script", "style"]`.
|
|
37
|
+
* Can be overridden explicitly regardless of mode.
|
|
38
|
+
*/
|
|
39
|
+
rawContentTags?: string[];
|
|
40
|
+
/**
|
|
41
|
+
* Enable HTML parsing mode. When `true`, sets sensible defaults for:
|
|
42
|
+
* - `selfClosingTags`: standard HTML void elements (area, base, br, col, embed, hr, img, input, link, meta, param, source, track, wbr)
|
|
43
|
+
* - `rawContentTags`: elements whose content is raw text (script, style)
|
|
44
|
+
*
|
|
45
|
+
* These defaults can be overridden by explicitly passing `selfClosingTags` or `rawContentTags`.
|
|
46
|
+
*/
|
|
47
|
+
html?: boolean;
|
|
48
|
+
/** Keep XML comments in the output */
|
|
49
|
+
keepComments?: boolean;
|
|
50
|
+
/** Trim whitespace from text nodes and discard whitespace-only text nodes */
|
|
51
|
+
trimWhitespace?: boolean;
|
|
52
|
+
/**
|
|
53
|
+
* Strict mode: throw on malformed XML instead of recovering silently.
|
|
54
|
+
* Catches unclosed comments, CDATA sections, processing instructions,
|
|
55
|
+
* close tags, and open tags that reach end-of-input without closing.
|
|
56
|
+
*/
|
|
57
|
+
strict?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Decode XML/HTML entities in text content and attribute values.
|
|
60
|
+
* When enabled, named entities (`&`, `<`, etc.), decimal character
|
|
61
|
+
* references (`ä`), and hex character references (`ä`) are decoded.
|
|
62
|
+
*
|
|
63
|
+
* In HTML mode (`html: true`), the full set of HTML named entities is
|
|
64
|
+
* supported (e.g. ` `, `©`, `—`). In XML mode, only the
|
|
65
|
+
* five standard XML entities plus numeric references are decoded.
|
|
66
|
+
*
|
|
67
|
+
* CDATA sections are never decoded regardless of this setting.
|
|
68
|
+
*
|
|
69
|
+
* Defaults to `false` — entities are preserved as-is in the output.
|
|
70
|
+
*/
|
|
71
|
+
entities?: boolean;
|
|
72
|
+
/** Attribute name to search for (used with attrValue) */
|
|
73
|
+
attrName?: string;
|
|
74
|
+
/** Attribute value to search for (regex pattern) */
|
|
75
|
+
attrValue?: string;
|
|
76
|
+
/** Filter function to apply to nodes */
|
|
77
|
+
filter?: (node: TNode, index: number, depth: number, path: string) => boolean;
|
|
78
|
+
}
|
|
79
|
+
/** Internal options extending ParseOptions — not part of the public API. */
|
|
80
|
+
interface InternalParseOptions extends ParseOptions {
|
|
81
|
+
/** If true, the returned object will have a pos property indicating where parsing stopped */
|
|
82
|
+
setPos?: boolean;
|
|
83
|
+
/** Parse a single node instead of a list of nodes */
|
|
84
|
+
parseNode?: boolean;
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Parse XML/HTML into a DOM Object with minimal validation and fault tolerance
|
|
88
|
+
* @param S - The XML string to parse
|
|
89
|
+
* @param options - Parsing options
|
|
90
|
+
* @returns Array of parsed nodes and text content
|
|
91
|
+
*/
|
|
92
|
+
declare function parse(S: string, options?: ParseOptions | InternalParseOptions): (TNode | string)[];
|
|
93
|
+
//#endregion
|
|
94
|
+
export { TNode as n, parse as r, ParseOptions as t };
|
|
95
|
+
//# sourceMappingURL=parser-BfdEfWDg.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser-BfdEfWDg.d.mts","names":[],"sources":["../src/parser.ts"],"mappings":";;AAyBA;;UAAiB,KAAA;EACf,OAAA;EAAA;;;;;;;EAQA,UAAA,EAAY,MAAA;EACZ,QAAA,GAAW,KAAA;AAAA;;;;UAaI,YAAA;EA2Bf;EAzBA,GAAA;EA6BA;;;;;;EAtBA,eAAA;EAgDU;;;;;;AACX;;;EAvCC,cAAA;EA0CqC;;;;;AAwBvC;;EA1DE,IAAA;EA4DU;EA1DV,YAAA;EA2DE;EAzDF,cAAA;EAyDO;;;;;EAnDP,MAAA;EAmDO;;;;;;;;;;;;;EArCP,QAAA;;EAEA,QAAA;;EAEA,SAAA;;EAEA,MAAA,IAAU,IAAA,EAAM,KAAA,EAAO,KAAA,UAAe,KAAA,UAAe,IAAA;AAAA;;UAI7C,oBAAA,SAA6B,YAAA;;EAErC,MAAA;;EAEA,SAAA;AAAA;;;;;;;iBAoBc,KAAA,CACd,CAAA,UACA,OAAA,GAAU,YAAA,GAAe,oBAAA,IACvB,KAAA"}
|