@eksml/xml 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +588 -0
- package/dist/converters/fromLossless.d.mts +14 -0
- package/dist/converters/fromLossless.d.mts.map +1 -0
- package/dist/converters/fromLossless.mjs +35 -0
- package/dist/converters/fromLossless.mjs.map +1 -0
- package/dist/converters/fromLossy.d.mts +18 -0
- package/dist/converters/fromLossy.d.mts.map +1 -0
- package/dist/converters/fromLossy.mjs +91 -0
- package/dist/converters/fromLossy.mjs.map +1 -0
- package/dist/converters/lossless.d.mts +39 -0
- package/dist/converters/lossless.d.mts.map +1 -0
- package/dist/converters/lossless.mjs +74 -0
- package/dist/converters/lossless.mjs.map +1 -0
- package/dist/converters/lossy.d.mts +42 -0
- package/dist/converters/lossy.d.mts.map +1 -0
- package/dist/converters/lossy.mjs +158 -0
- package/dist/converters/lossy.mjs.map +1 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs +30 -0
- package/dist/htmlConstants-D6fsKbZ-.mjs.map +1 -0
- package/dist/parser-BfdEfWDg.d.mts +95 -0
- package/dist/parser-BfdEfWDg.d.mts.map +1 -0
- package/dist/parser-CYq309aR.mjs +479 -0
- package/dist/parser-CYq309aR.mjs.map +1 -0
- package/dist/parser.d.mts +2 -0
- package/dist/parser.mjs +2 -0
- package/dist/sax.d.mts +64 -0
- package/dist/sax.d.mts.map +1 -0
- package/dist/sax.mjs +70 -0
- package/dist/sax.mjs.map +1 -0
- package/dist/saxEngine-BDnD7ruG.mjs +750 -0
- package/dist/saxEngine-BDnD7ruG.mjs.map +1 -0
- package/dist/utilities/index.d.mts +88 -0
- package/dist/utilities/index.d.mts.map +1 -0
- package/dist/utilities/index.mjs +87 -0
- package/dist/utilities/index.mjs.map +1 -0
- package/dist/writer.d.mts +58 -0
- package/dist/writer.d.mts.map +1 -0
- package/dist/writer.mjs +357 -0
- package/dist/writer.mjs.map +1 -0
- package/dist/xmlParseStream.d.mts +138 -0
- package/dist/xmlParseStream.d.mts.map +1 -0
- package/dist/xmlParseStream.mjs +313 -0
- package/dist/xmlParseStream.mjs.map +1 -0
- package/package.json +100 -0
- package/src/converters/fromLossless.ts +80 -0
- package/src/converters/fromLossy.ts +180 -0
- package/src/converters/lossless.ts +116 -0
- package/src/converters/lossy.ts +274 -0
- package/src/parser.ts +728 -0
- package/src/sax.ts +157 -0
- package/src/saxEngine.ts +1157 -0
- package/src/utilities/escapeRegExp.ts +19 -0
- package/src/utilities/filter.ts +63 -0
- package/src/utilities/getElementById.ts +21 -0
- package/src/utilities/getElementsByClassName.ts +22 -0
- package/src/utilities/htmlConstants.ts +26 -0
- package/src/utilities/index.ts +7 -0
- package/src/utilities/isElementNode.ts +19 -0
- package/src/utilities/isTextNode.ts +19 -0
- package/src/utilities/toContentString.ts +23 -0
- package/src/writer.ts +650 -0
- package/src/xmlParseStream.ts +597 -0
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fromLossy — convert a lossy JS object back to a TNode DOM tree.
|
|
3
|
+
*
|
|
4
|
+
* This is the inverse of `lossy()`. Because the lossy format does not preserve
|
|
5
|
+
* sibling order between different tag names, the reconstruction is best-effort:
|
|
6
|
+
* elements appear in JS object key insertion order, and arrays expand in
|
|
7
|
+
* sequence. Mixed content (`$$` arrays) preserves interleaving exactly.
|
|
8
|
+
*
|
|
9
|
+
* Lossy format rules (reversed):
|
|
10
|
+
* - `null` → empty element (no attributes, no children)
|
|
11
|
+
* - `string` → text-only element (one string child)
|
|
12
|
+
* - `$`-prefixed keys → attributes (prefix stripped)
|
|
13
|
+
* - `$$` array → mixed content (strings become text, objects become elements)
|
|
14
|
+
* - Non-`$` keys → element children (arrays expand to repeated siblings)
|
|
15
|
+
* - Top-level single-key object → single root element
|
|
16
|
+
* - Top-level array → multiple root elements
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```ts
|
|
20
|
+
* import { fromLossy } from "@eksml/xml/from-lossy";
|
|
21
|
+
* import { lossy } from "@eksml/xml/lossy";
|
|
22
|
+
* import { write } from "@eksml/xml/writer";
|
|
23
|
+
*
|
|
24
|
+
* const obj = lossy('<root><item>hello</item><item>world</item></root>');
|
|
25
|
+
* const dom = fromLossy(obj);
|
|
26
|
+
* const xml = write(dom);
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import type { TNode } from '#src/parser.ts';
|
|
31
|
+
import type {
|
|
32
|
+
LossyValue,
|
|
33
|
+
LossyObject,
|
|
34
|
+
LossyMixedEntry,
|
|
35
|
+
} from '#src/converters/lossy.ts';
|
|
36
|
+
// @generated:char-codes:begin
|
|
37
|
+
const DOLLAR = 36; // $
|
|
38
|
+
// @generated:char-codes:end
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Convert a lossy JS object back to a `(TNode | string)[]` DOM tree.
|
|
42
|
+
*
|
|
43
|
+
* Accepts the same shapes that `lossy()` returns:
|
|
44
|
+
* - A single `LossyValue` (e.g. `{ root: { ... } }`)
|
|
45
|
+
* - An array of `LossyValue` (multiple roots)
|
|
46
|
+
*
|
|
47
|
+
* @param input - A lossy value or array of lossy values.
|
|
48
|
+
* @returns A DOM array suitable for `write()` or further processing.
|
|
49
|
+
*/
|
|
50
|
+
export function fromLossy(
|
|
51
|
+
input: LossyValue | LossyValue[],
|
|
52
|
+
): (TNode | string)[] {
|
|
53
|
+
// Array of top-level values
|
|
54
|
+
if (Array.isArray(input)) {
|
|
55
|
+
const result: (TNode | string)[] = [];
|
|
56
|
+
for (let i = 0; i < input.length; i++) {
|
|
57
|
+
const item = input[i]!;
|
|
58
|
+
if (typeof item === 'string') {
|
|
59
|
+
result.push(item);
|
|
60
|
+
} else if (item === null) {
|
|
61
|
+
// Top-level null — unusual, skip
|
|
62
|
+
continue;
|
|
63
|
+
} else {
|
|
64
|
+
// Each object in the array is { tagName: value }
|
|
65
|
+
convertTopLevelObject(item, result);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return result;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Single value
|
|
72
|
+
if (input === null || typeof input === 'string') {
|
|
73
|
+
// Bare null or string at top level — can't form a TNode without a tag name
|
|
74
|
+
return typeof input === 'string' ? [input] : [];
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Single-root object: { rootTag: value }
|
|
78
|
+
const result: (TNode | string)[] = [];
|
|
79
|
+
convertTopLevelObject(input, result);
|
|
80
|
+
return result;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Convert a top-level `{ tagName: value }` object into TNode(s) and push
|
|
85
|
+
* them onto the result array.
|
|
86
|
+
*/
|
|
87
|
+
function convertTopLevelObject(
|
|
88
|
+
object: LossyObject,
|
|
89
|
+
result: (TNode | string)[],
|
|
90
|
+
): void {
|
|
91
|
+
const keys = Object.keys(object);
|
|
92
|
+
for (let i = 0; i < keys.length; i++) {
|
|
93
|
+
const tagName = keys[i]!;
|
|
94
|
+
result.push(convertElement(tagName, object[tagName]!));
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Convert a tag name + lossy value into a TNode.
|
|
100
|
+
*/
|
|
101
|
+
function convertElement(
|
|
102
|
+
tagName: string,
|
|
103
|
+
value: LossyValue | LossyValue[] | LossyMixedEntry[],
|
|
104
|
+
): TNode {
|
|
105
|
+
// null → empty element
|
|
106
|
+
if (value === null) {
|
|
107
|
+
return { tagName, attributes: null, children: [] };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// string → text-only element
|
|
111
|
+
if (typeof value === 'string') {
|
|
112
|
+
return { tagName, attributes: null, children: [value] };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Array → this is a repeated-sibling array at the parent level,
|
|
116
|
+
// but convertElement is called per-value, so arrays here shouldn't
|
|
117
|
+
// normally occur. Defensive: treat as first element.
|
|
118
|
+
if (Array.isArray(value)) {
|
|
119
|
+
// This case is handled by the caller expanding arrays.
|
|
120
|
+
// If somehow called directly, use first item.
|
|
121
|
+
return convertElement(tagName, value[0] ?? null);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// LossyObject → extract attributes ($-prefixed), check for $$ mixed content,
|
|
125
|
+
// then process element-only children
|
|
126
|
+
const objectValue = value as LossyObject;
|
|
127
|
+
const objectKeys = Object.keys(objectValue);
|
|
128
|
+
|
|
129
|
+
let attributes: Record<string, string | null> | null = null;
|
|
130
|
+
const children: (TNode | string)[] = [];
|
|
131
|
+
let hasMixed = false;
|
|
132
|
+
|
|
133
|
+
for (let i = 0; i < objectKeys.length; i++) {
|
|
134
|
+
const key = objectKeys[i]!;
|
|
135
|
+
|
|
136
|
+
if (key === '$$') {
|
|
137
|
+
// Mixed content array
|
|
138
|
+
hasMixed = true;
|
|
139
|
+
const mixedArray = objectValue.$$ as LossyMixedEntry[];
|
|
140
|
+
for (let j = 0; j < mixedArray.length; j++) {
|
|
141
|
+
const mixedEntry = mixedArray[j]!;
|
|
142
|
+
if (typeof mixedEntry === 'string') {
|
|
143
|
+
children.push(mixedEntry);
|
|
144
|
+
} else {
|
|
145
|
+
// { tagName: value } — single-key object
|
|
146
|
+
const entryKeys = Object.keys(mixedEntry);
|
|
147
|
+
const entryTagName = entryKeys[0]!;
|
|
148
|
+
children.push(
|
|
149
|
+
convertElement(
|
|
150
|
+
entryTagName,
|
|
151
|
+
(mixedEntry as LossyObject)[entryTagName]!,
|
|
152
|
+
),
|
|
153
|
+
);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} else if (key.charCodeAt(0) === DOLLAR) {
|
|
157
|
+
// Attribute — strip the $ prefix
|
|
158
|
+
if (attributes === null) {
|
|
159
|
+
attributes = Object.create(null) as Record<string, string | null>;
|
|
160
|
+
}
|
|
161
|
+
const attributeName = key.substring(1);
|
|
162
|
+
const attributeValue = objectValue[key];
|
|
163
|
+
attributes[attributeName] =
|
|
164
|
+
attributeValue === null ? null : String(attributeValue);
|
|
165
|
+
} else {
|
|
166
|
+
// Element child(ren)
|
|
167
|
+
const childValue = objectValue[key]!;
|
|
168
|
+
if (Array.isArray(childValue)) {
|
|
169
|
+
// Repeated siblings
|
|
170
|
+
for (let j = 0; j < childValue.length; j++) {
|
|
171
|
+
children.push(convertElement(key, childValue[j]!));
|
|
172
|
+
}
|
|
173
|
+
} else {
|
|
174
|
+
children.push(convertElement(key, childValue as LossyValue));
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return { tagName, attributes, children };
|
|
180
|
+
}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lossless — XML-to-JSON converter producing an order-preserving format.
|
|
3
|
+
*
|
|
4
|
+
* Each element becomes a single-key object `{ tagName: children[] }`.
|
|
5
|
+
* Text nodes become `{ $text: "..." }`.
|
|
6
|
+
* Attributes become `{ $attr: { ... } }` as the first entry in the children array.
|
|
7
|
+
* Comments (when kept) become `{ $comment: "..." }`.
|
|
8
|
+
*
|
|
9
|
+
* All marker keys are valid JS identifiers so you can use dot notation:
|
|
10
|
+
* `entry.$attr.id`, `entry.$text`, `entry.$comment`.
|
|
11
|
+
*
|
|
12
|
+
* The format preserves element order, mixed content, and attributes losslessly,
|
|
13
|
+
* and is fully JSON-serializable.
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```ts
|
|
17
|
+
* import { lossless } from "@eksml/xml/lossless";
|
|
18
|
+
*
|
|
19
|
+
* const result = lossless('<root attr="1"><item>hell<b>o</b></item></root>');
|
|
20
|
+
* // [
|
|
21
|
+
* // { "root": [
|
|
22
|
+
* // { $attr: { "attr": "1" } },
|
|
23
|
+
* // { "item": [
|
|
24
|
+
* // { $text: "hell" },
|
|
25
|
+
* // { "b": [{ $text: "o" }] }
|
|
26
|
+
* // ]}
|
|
27
|
+
* // ]}
|
|
28
|
+
* // ]
|
|
29
|
+
* ```
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { parse, type TNode, type ParseOptions } from '#src/parser.ts';
|
|
33
|
+
|
|
34
|
+
/** A single entry in the JSON output array. */
|
|
35
|
+
export type LosslessEntry =
|
|
36
|
+
| { [tagName: string]: LosslessEntry[] }
|
|
37
|
+
| { $text: string }
|
|
38
|
+
| { $attr: Record<string, string | null> }
|
|
39
|
+
| { $comment: string };
|
|
40
|
+
|
|
41
|
+
/** Options for lossless. */
|
|
42
|
+
export interface LosslessOptions extends ParseOptions {}
|
|
43
|
+
|
|
44
|
+
function convertNode(node: TNode): LosslessEntry {
|
|
45
|
+
const children: LosslessEntry[] = [];
|
|
46
|
+
|
|
47
|
+
// Attributes go first as { $attr: { ... } }
|
|
48
|
+
if (node.attributes !== null) {
|
|
49
|
+
children.push({ $attr: node.attributes });
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Then child nodes
|
|
53
|
+
for (let i = 0; i < node.children.length; i++) {
|
|
54
|
+
const child = node.children[i]!;
|
|
55
|
+
if (typeof child === 'string') {
|
|
56
|
+
children.push(convertString(child));
|
|
57
|
+
} else {
|
|
58
|
+
children.push(convertNode(child));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return { [node.tagName]: children };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function convertString(text: string): LosslessEntry {
|
|
66
|
+
// Comments from parse() come as "<!-- ... -->"
|
|
67
|
+
if (text.startsWith('<!--') && text.endsWith('-->')) {
|
|
68
|
+
return { $comment: text.substring(4, text.length - 3) };
|
|
69
|
+
}
|
|
70
|
+
return { $text: text };
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Convert a single `TNode | string` item into its lossless representation.
|
|
75
|
+
*
|
|
76
|
+
* - `TNode` → `{ tagName: [ ...children ] }`
|
|
77
|
+
* - `string` → `{ $text: "..." }` or `{ $comment: "..." }`
|
|
78
|
+
*
|
|
79
|
+
* Used internally by `XmlParseStream` when `output: 'lossless'` is set.
|
|
80
|
+
*
|
|
81
|
+
* @internal
|
|
82
|
+
*/
|
|
83
|
+
export function convertItemToLossless(item: TNode | string): LosslessEntry {
|
|
84
|
+
if (typeof item === 'string') return convertString(item);
|
|
85
|
+
return convertNode(item);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Parse an XML/HTML string or convert a pre-parsed DOM tree into an
|
|
90
|
+
* order-preserving JSON-friendly structure.
|
|
91
|
+
*
|
|
92
|
+
* @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array
|
|
93
|
+
* @param options - Parsing options (only used when `input` is a string)
|
|
94
|
+
* @returns Array of top-level JSON entries
|
|
95
|
+
*/
|
|
96
|
+
export function lossless(
|
|
97
|
+
input: string,
|
|
98
|
+
options?: LosslessOptions,
|
|
99
|
+
): LosslessEntry[];
|
|
100
|
+
export function lossless(input: (TNode | string)[]): LosslessEntry[];
|
|
101
|
+
export function lossless(
|
|
102
|
+
input: string | (TNode | string)[],
|
|
103
|
+
options?: LosslessOptions,
|
|
104
|
+
): LosslessEntry[] {
|
|
105
|
+
const dom = typeof input === 'string' ? parse(input, { ...options }) : input;
|
|
106
|
+
const result: LosslessEntry[] = [];
|
|
107
|
+
for (let i = 0; i < dom.length; i++) {
|
|
108
|
+
const node = dom[i]!;
|
|
109
|
+
if (typeof node === 'string') {
|
|
110
|
+
result.push(convertString(node));
|
|
111
|
+
} else {
|
|
112
|
+
result.push(convertNode(node));
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lossy — simplified lossy XML-to-JS object converter.
|
|
3
|
+
*
|
|
4
|
+
* Produces the most compact JS object representation possible. Element
|
|
5
|
+
* ordering between different tag names is not preserved. The format rules are:
|
|
6
|
+
*
|
|
7
|
+
* - **Text-only element**: collapses to a plain string value.
|
|
8
|
+
* `<name>text</name>` → `{ "name": "text" }`
|
|
9
|
+
*
|
|
10
|
+
* - **Empty/void element**: collapses to `null`.
|
|
11
|
+
* `<br/>` → `{ "br": null }`
|
|
12
|
+
*
|
|
13
|
+
* - **Attributes**: prefixed with `$` on the element object.
|
|
14
|
+
* `<a href="/">link</a>` → `{ "a": { $href: "/", $$: ["link"] } }`
|
|
15
|
+
*
|
|
16
|
+
* - **Mixed content** (text + child elements): children go into an ordered
|
|
17
|
+
* `$$` array preserving exact interleaving of text and element objects.
|
|
18
|
+
* `<p>Hello <b>world</b></p>` → `{ "p": { $$: ["Hello ", { "b": "world" }] } }`
|
|
19
|
+
*
|
|
20
|
+
* - **Element-only children** (no text): keyed object with tag names.
|
|
21
|
+
* `<root><a>1</a><b>2</b></root>` → `{ "root": { "a": "1", "b": "2" } }`
|
|
22
|
+
*
|
|
23
|
+
* - **Repeated same-name siblings**: become an array.
|
|
24
|
+
* Two `<item>` → `"item": [val1, val2]`; one `<item>` → `"item": val1`
|
|
25
|
+
*
|
|
26
|
+
* All marker keys are valid JS identifiers so you can use dot notation:
|
|
27
|
+
* `node.$href`, `node.$$`, etc.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```ts
|
|
31
|
+
* import { lossy } from "@eksml/xml/lossy";
|
|
32
|
+
*
|
|
33
|
+
* const result = lossy(`
|
|
34
|
+
* <thing>
|
|
35
|
+
* <second with="attributes">Text</second>
|
|
36
|
+
* <third>More text</third>
|
|
37
|
+
* <second another="attribute">Even more text
|
|
38
|
+
* <fourth>Nested text</fourth>
|
|
39
|
+
* </second>
|
|
40
|
+
* </thing>
|
|
41
|
+
* `);
|
|
42
|
+
* // {
|
|
43
|
+
* // "thing": {
|
|
44
|
+
* // "second": [
|
|
45
|
+
* // { $with: "attributes", $$: ["Text"] },
|
|
46
|
+
* // { $another: "attribute", $$: ["Even more text\n ", { "fourth": "Nested text" }] }
|
|
47
|
+
* // ],
|
|
48
|
+
* // "third": "More text"
|
|
49
|
+
* // }
|
|
50
|
+
* // }
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
import { parse, type TNode, type ParseOptions } from '#src/parser.ts';
|
|
55
|
+
// @generated:char-codes:begin
|
|
56
|
+
const DOLLAR = 36; // $
|
|
57
|
+
// @generated:char-codes:end
|
|
58
|
+
|
|
59
|
+
/** Options for lossy. */
|
|
60
|
+
export interface LossyOptions extends ParseOptions {}
|
|
61
|
+
|
|
62
|
+
/** The value of a converted element — null (empty), string, or an object with keys. */
|
|
63
|
+
export type LossyValue = null | string | LossyObject;
|
|
64
|
+
|
|
65
|
+
/** An element converted to a keyed object. */
|
|
66
|
+
export interface LossyObject {
|
|
67
|
+
[key: string]: LossyValue | LossyValue[] | LossyMixedEntry[];
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/** An entry in a `$$` mixed-content array. */
|
|
71
|
+
export type LossyMixedEntry = string | { [tagName: string]: LossyValue };
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Convert a single TNode into its simplified lossy value.
|
|
75
|
+
*
|
|
76
|
+
* Single-pass algorithm: starts optimistically building an element-only object,
|
|
77
|
+
* and upgrades to mixed-content ($$ array) if text nodes are encountered
|
|
78
|
+
* alongside element nodes (or attributes exist with text).
|
|
79
|
+
*/
|
|
80
|
+
function convertNode(node: TNode): LossyValue {
|
|
81
|
+
const children = node.children;
|
|
82
|
+
const childrenLength = children ? children.length : 0;
|
|
83
|
+
const attributes = node.attributes;
|
|
84
|
+
const hasAttributes = attributes !== null;
|
|
85
|
+
|
|
86
|
+
// --- Empty element ---
|
|
87
|
+
if (childrenLength === 0 && !hasAttributes) {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// --- Text-only element, no attributes ---
|
|
92
|
+
if (
|
|
93
|
+
childrenLength === 1 &&
|
|
94
|
+
typeof children[0] === 'string' &&
|
|
95
|
+
!hasAttributes
|
|
96
|
+
) {
|
|
97
|
+
return children[0];
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// --- Build object with attributes ---
|
|
101
|
+
// Use null-prototype object to prevent __proto__ / constructor pollution
|
|
102
|
+
const elementObject: LossyObject = Object.create(null);
|
|
103
|
+
|
|
104
|
+
if (hasAttributes) {
|
|
105
|
+
for (const key in attributes) {
|
|
106
|
+
elementObject['$' + key] = attributes[key]!;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (childrenLength === 0) {
|
|
111
|
+
// Empty element with attributes only
|
|
112
|
+
return elementObject;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// --- Single-pass: build element-only object, upgrade to mixed if needed ---
|
|
116
|
+
// Track whether we've seen elements and/or text so far.
|
|
117
|
+
// When text appears alongside elements (or attrs), switch to $$ mode,
|
|
118
|
+
// retroactively converting already-processed children.
|
|
119
|
+
let hasElements = false;
|
|
120
|
+
let hasText = false;
|
|
121
|
+
let mixed: LossyMixedEntry[] | null = null;
|
|
122
|
+
|
|
123
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
124
|
+
const child = children[i]!;
|
|
125
|
+
if (typeof child === 'string') {
|
|
126
|
+
hasText = true;
|
|
127
|
+
if (mixed !== null) {
|
|
128
|
+
// Already in mixed mode
|
|
129
|
+
mixed.push(child);
|
|
130
|
+
} else if (hasElements || hasAttributes) {
|
|
131
|
+
// Upgrade to mixed mode — retroactively convert prior children.
|
|
132
|
+
// Remove element keys that were speculatively added to elementObject.
|
|
133
|
+
for (const propertyKey in elementObject) {
|
|
134
|
+
if (propertyKey.charCodeAt(0) !== DOLLAR)
|
|
135
|
+
delete elementObject[propertyKey]; // keep $-prefixed attrs
|
|
136
|
+
}
|
|
137
|
+
mixed = [];
|
|
138
|
+
for (let j = 0; j < i; j++) {
|
|
139
|
+
const previousChild = children[j]!;
|
|
140
|
+
if (typeof previousChild === 'string') {
|
|
141
|
+
mixed.push(previousChild);
|
|
142
|
+
} else {
|
|
143
|
+
mixed.push({ [previousChild.tagName]: convertNode(previousChild) });
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
mixed.push(child);
|
|
147
|
+
}
|
|
148
|
+
// If !hasElements && !hasAttributes, we're still in potential text-only mode
|
|
149
|
+
} else {
|
|
150
|
+
if (!hasElements && hasText && !hasAttributes) {
|
|
151
|
+
// First element after text-only so far — upgrade to mixed mode
|
|
152
|
+
hasElements = true;
|
|
153
|
+
mixed = [];
|
|
154
|
+
for (let j = 0; j < i; j++) {
|
|
155
|
+
mixed.push(children[j] as string);
|
|
156
|
+
}
|
|
157
|
+
mixed.push({ [child.tagName]: convertNode(child) });
|
|
158
|
+
continue;
|
|
159
|
+
}
|
|
160
|
+
hasElements = true;
|
|
161
|
+
const tag = child.tagName;
|
|
162
|
+
const convertedValue = convertNode(child);
|
|
163
|
+
if (mixed !== null) {
|
|
164
|
+
mixed.push({ [tag]: convertedValue });
|
|
165
|
+
} else {
|
|
166
|
+
if (!(tag in elementObject)) {
|
|
167
|
+
elementObject[tag] = convertedValue;
|
|
168
|
+
} else if (!Array.isArray(elementObject[tag])) {
|
|
169
|
+
elementObject[tag] = [
|
|
170
|
+
elementObject[tag] as LossyValue,
|
|
171
|
+
convertedValue,
|
|
172
|
+
];
|
|
173
|
+
} else {
|
|
174
|
+
(elementObject[tag] as LossyValue[]).push(convertedValue);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// If we switched to mixed mode, attach and return
|
|
181
|
+
if (mixed !== null) {
|
|
182
|
+
elementObject.$$ = mixed;
|
|
183
|
+
return elementObject;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// If we had elements, elementObject is already populated — return it
|
|
187
|
+
if (hasElements) {
|
|
188
|
+
return elementObject;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// --- Text-only, no attributes, multiple text nodes (edge case) ---
|
|
192
|
+
let text = '';
|
|
193
|
+
for (let i = 0; i < childrenLength; i++) {
|
|
194
|
+
text += children[i] as string;
|
|
195
|
+
}
|
|
196
|
+
return text;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Convert a single `TNode | string` item into its lossy representation.
|
|
201
|
+
*
|
|
202
|
+
* - `TNode` → `{ tagName: convertedValue }` (same shape as the multi-root
|
|
203
|
+
* branch of `lossy()`)
|
|
204
|
+
* - `string` → passed through as-is
|
|
205
|
+
*
|
|
206
|
+
* Used internally by `XmlParseStream` when `output: 'lossy'` is set.
|
|
207
|
+
*
|
|
208
|
+
* @internal
|
|
209
|
+
*/
|
|
210
|
+
export function convertItemToLossy(item: TNode | string): LossyValue {
|
|
211
|
+
if (typeof item === 'string') return item;
|
|
212
|
+
return { [item.tagName]: convertNode(item) } as LossyObject;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
/**
|
|
216
|
+
* Parse an XML/HTML string or convert a pre-parsed DOM tree into the most
|
|
217
|
+
* simplified lossy JS object format.
|
|
218
|
+
*
|
|
219
|
+
* @param input - An XML/HTML string, or a pre-parsed `(TNode | string)[]` DOM array
|
|
220
|
+
* @param options - Parsing options (only used when `input` is a string)
|
|
221
|
+
* @returns A LossyValue representing the document. For a single root element
|
|
222
|
+
* this is typically `{ rootTag: ... }`. For multiple top-level nodes
|
|
223
|
+
* an array is returned.
|
|
224
|
+
*/
|
|
225
|
+
export function lossy(
|
|
226
|
+
input: string,
|
|
227
|
+
options?: LossyOptions,
|
|
228
|
+
): LossyValue | LossyValue[];
|
|
229
|
+
export function lossy(input: (TNode | string)[]): LossyValue | LossyValue[];
|
|
230
|
+
export function lossy(
|
|
231
|
+
input: string | (TNode | string)[],
|
|
232
|
+
options?: LossyOptions,
|
|
233
|
+
): LossyValue | LossyValue[] {
|
|
234
|
+
const dom = typeof input === 'string' ? parse(input, { ...options }) : input;
|
|
235
|
+
|
|
236
|
+
// Filter out whitespace-only top-level text and processing instructions
|
|
237
|
+
// (e.g. <?xml version="1.0"?>) which are metadata, not content.
|
|
238
|
+
const nodes: (TNode | string)[] = [];
|
|
239
|
+
for (let i = 0; i < dom.length; i++) {
|
|
240
|
+
const node = dom[i]!;
|
|
241
|
+
if (typeof node === 'string') {
|
|
242
|
+
// Keep non-whitespace text at top level
|
|
243
|
+
if (node.trim().length > 0) {
|
|
244
|
+
nodes.push(node);
|
|
245
|
+
}
|
|
246
|
+
} else if (node.tagName[0] === '?') {
|
|
247
|
+
// Skip processing instructions (<?xml?>, <?xsl?>, etc.)
|
|
248
|
+
continue;
|
|
249
|
+
} else {
|
|
250
|
+
nodes.push(node);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// Single root element — return as { rootTag: value }
|
|
255
|
+
if (nodes.length === 1) {
|
|
256
|
+
const node = nodes[0]!;
|
|
257
|
+
if (typeof node === 'string') {
|
|
258
|
+
return node;
|
|
259
|
+
}
|
|
260
|
+
return { [node.tagName]: convertNode(node) } as LossyObject;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Multiple top-level nodes — return array
|
|
264
|
+
const result: LossyValue[] = [];
|
|
265
|
+
for (let i = 0; i < nodes.length; i++) {
|
|
266
|
+
const node = nodes[i]!;
|
|
267
|
+
if (typeof node === 'string') {
|
|
268
|
+
result.push(node);
|
|
269
|
+
} else {
|
|
270
|
+
result.push({ [node.tagName]: convertNode(node) } as LossyObject);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
return result;
|
|
274
|
+
}
|