xml-to-html-converter 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -30
- package/dist/index.d.ts +9 -84
- package/dist/index.js +97 -160
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -10,12 +10,40 @@ A zero-dependency Node.js package for converting XML to HTML. Currently in pre-1
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## v0.1.x: XML Node Extraction & Scaffolding
|
|
14
|
+
|
|
15
|
+
Version `0.1.x` is focused entirely on parsing raw XML into a structured tree of nodes. The `scaffold` function walks an XML string and produces an array of `XmlNode` objects, each carrying its role, its raw source text, and its position in the document, both globally across the full document and locally within its parent.
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
interface XmlNode {
|
|
19
|
+
role: XmlNodeRole;
|
|
20
|
+
raw: string;
|
|
21
|
+
globalIndex: number;
|
|
22
|
+
localIndex: number;
|
|
23
|
+
children?: XmlNode[];
|
|
24
|
+
malformed?: true;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
type XmlNodeRole =
|
|
28
|
+
| "closeTag"
|
|
29
|
+
| "comment"
|
|
30
|
+
| "doctype"
|
|
31
|
+
| "openTag"
|
|
32
|
+
| "processingInstruction"
|
|
33
|
+
| "selfTag"
|
|
34
|
+
| "textLeaf";
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
This scaffold is the foundation everything else will be built on. No transformation, no HTML output, no opinions about content, just an accurate, traversable representation of what the XML says.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
13
41
|
> **Where I am right now**
|
|
14
42
|
>
|
|
15
|
-
> `v0.x` is building the scaffold
|
|
43
|
+
> `v0.x` is building the scaffold: a structural tree of every node in your XML document, each carrying its raw source string and its exact position in the document. This scaffold is what the HTML converter will walk when it's built.
|
|
16
44
|
>
|
|
17
|
-
> - **`scaffold(xml)`** reads any XML string and returns a nested
|
|
18
|
-
> - Every
|
|
45
|
+
> - **`scaffold(xml)`** reads any XML string and returns a nested node tree
|
|
46
|
+
> - Every node knows its `role`, its `raw` source string, its `globalIndex` in the document, and its `localIndex` within its parent
|
|
19
47
|
> - Broken XML is never thrown - malformed nodes are flagged with `malformed: true` in place and the tree is built regardless
|
|
20
48
|
>
|
|
21
49
|
> `v1.0.0` is when this package becomes what it says it is: a full XML-to-HTML converter. Everything before that is the work to get there.
|
|
@@ -45,7 +73,7 @@ const tree = scaffold(`
|
|
|
45
73
|
`);
|
|
46
74
|
```
|
|
47
75
|
|
|
48
|
-
`scaffold` returns a flat array of root-level
|
|
76
|
+
`scaffold` returns a flat array of root-level nodes. Each `openTag` node carries its children nested inside it:
|
|
49
77
|
|
|
50
78
|
```json
|
|
51
79
|
[
|
|
@@ -58,25 +86,25 @@ const tree = scaffold(`
|
|
|
58
86
|
{
|
|
59
87
|
"role": "openTag",
|
|
60
88
|
"raw": "<bookstore>",
|
|
61
|
-
"globalIndex":
|
|
62
|
-
"localIndex":
|
|
89
|
+
"globalIndex": 1,
|
|
90
|
+
"localIndex": 1,
|
|
63
91
|
"children": [
|
|
64
92
|
{
|
|
65
93
|
"role": "openTag",
|
|
66
94
|
"raw": "<book category=\"cooking\">",
|
|
67
|
-
"globalIndex":
|
|
68
|
-
"localIndex":
|
|
95
|
+
"globalIndex": 2,
|
|
96
|
+
"localIndex": 0,
|
|
69
97
|
"children": [
|
|
70
98
|
{
|
|
71
99
|
"role": "openTag",
|
|
72
100
|
"raw": "<title lang=\"en\">",
|
|
73
|
-
"globalIndex":
|
|
74
|
-
"localIndex":
|
|
101
|
+
"globalIndex": 3,
|
|
102
|
+
"localIndex": 0,
|
|
75
103
|
"children": [
|
|
76
104
|
{
|
|
77
105
|
"role": "textLeaf",
|
|
78
106
|
"raw": "Everyday Italian",
|
|
79
|
-
"globalIndex":
|
|
107
|
+
"globalIndex": 4,
|
|
80
108
|
"localIndex": 0
|
|
81
109
|
}
|
|
82
110
|
]
|
|
@@ -90,31 +118,32 @@ const tree = scaffold(`
|
|
|
90
118
|
|
|
91
119
|
---
|
|
92
120
|
|
|
93
|
-
##
|
|
121
|
+
## Node Shape
|
|
94
122
|
|
|
95
|
-
Every
|
|
123
|
+
Every node in the tree has the following fields:
|
|
96
124
|
|
|
97
|
-
| Field | Type
|
|
98
|
-
| ------------- |
|
|
99
|
-
| `role` | `
|
|
100
|
-
| `raw` | `string`
|
|
101
|
-
| `globalIndex` | `number`
|
|
102
|
-
| `localIndex` | `number`
|
|
103
|
-
| `children` | `
|
|
104
|
-
| `malformed` | `true`
|
|
125
|
+
| Field | Type | Description |
|
|
126
|
+
| ------------- | ------------- | --------------------------------------------------- |
|
|
127
|
+
| `role` | `XmlNodeRole` | What kind of node this is |
|
|
128
|
+
| `raw` | `string` | The exact source string, untouched |
|
|
129
|
+
| `globalIndex` | `number` | Position in the entire document (never resets) |
|
|
130
|
+
| `localIndex` | `number` | Position within the parent's children array |
|
|
131
|
+
| `children` | `XmlNode[]` | Present only on `openTag` - the nested nodes inside |
|
|
132
|
+
| `malformed` | `true` | Present only when the structure is broken |
|
|
105
133
|
|
|
106
134
|
---
|
|
107
135
|
|
|
108
|
-
##
|
|
136
|
+
## Node Roles
|
|
109
137
|
|
|
110
|
-
| Role | Has children | Description
|
|
111
|
-
| ----------------------- | ------------ |
|
|
112
|
-
| `openTag` | yes | An opening tag, e.g. `<book category="web">`
|
|
113
|
-
| `selfTag` | no | A self-closing tag, e.g. `<br/>`
|
|
114
|
-
| `closeTag` | no | Only appears when stray (no matching open)
|
|
115
|
-
| `processingInstruction` | no | e.g. `<?xml version="1.0"?>`
|
|
116
|
-
| `comment` | no | e.g. `<!-- a comment -->`
|
|
117
|
-
| `textLeaf` | no | Text content between tags
|
|
138
|
+
| Role | Has children | Description |
|
|
139
|
+
| ----------------------- | ------------ | --------------------------------------------------- |
|
|
140
|
+
| `openTag` | yes | An opening tag, e.g. `<book category="web">` |
|
|
141
|
+
| `selfTag` | no | A self-closing tag, e.g. `<br/>` |
|
|
142
|
+
| `closeTag` | no | Only appears when stray (no matching open) |
|
|
143
|
+
| `processingInstruction` | no | e.g. `<?xml version="1.0"?>` |
|
|
144
|
+
| `comment` | no | e.g. `<!-- a comment -->` |
|
|
145
|
+
| `textLeaf` | no | Text content between tags, including CDATA sections |
|
|
146
|
+
| `doctype` | no | e.g. `<!DOCTYPE html>` or `<!DOCTYPE root [...]>` |
|
|
118
147
|
|
|
119
148
|
---
|
|
120
149
|
|
|
@@ -171,6 +200,27 @@ const tree = scaffold("<root><unclosed><valid>text</valid></root>");
|
|
|
171
200
|
|
|
172
201
|
---
|
|
173
202
|
|
|
203
|
+
## Exports
|
|
204
|
+
|
|
205
|
+
```ts
|
|
206
|
+
import { scaffold, isMalformed } from "xml-to-html-converter";
|
|
207
|
+
import type {
|
|
208
|
+
XmlNode,
|
|
209
|
+
XmlNodeRole,
|
|
210
|
+
MalformedXmlNode,
|
|
211
|
+
} from "xml-to-html-converter";
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
| Export | Kind | Description |
|
|
215
|
+
| ------------------ | -------- | --------------------------------------------------- |
|
|
216
|
+
| `scaffold` | function | Parses an XML string and returns a node tree |
|
|
217
|
+
| `isMalformed` | function | Type guard, narrows `XmlNode` to `MalformedXmlNode` |
|
|
218
|
+
| `XmlNode` | type | The shape of every node in the tree |
|
|
219
|
+
| `XmlNodeRole` | type | Union of all valid role strings |
|
|
220
|
+
| `MalformedXmlNode` | type | `XmlNode` narrowed to `{ malformed: true }` |
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
174
224
|
## Requirements
|
|
175
225
|
|
|
176
226
|
Node.js `>=20.0.0`
|
package/dist/index.d.ts
CHANGED
|
@@ -1,88 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
}
|
|
5
|
-
interface ElementNode {
|
|
6
|
-
type: 'element';
|
|
7
|
-
tag: string;
|
|
8
|
-
attributes: Record<string, string>;
|
|
9
|
-
children: Node[];
|
|
10
|
-
malformed?: true;
|
|
11
|
-
}
|
|
12
|
-
interface TextNode {
|
|
13
|
-
type: 'text';
|
|
14
|
-
value: string;
|
|
15
|
-
}
|
|
16
|
-
interface CommentNode {
|
|
17
|
-
type: 'comment';
|
|
18
|
-
value: string;
|
|
19
|
-
}
|
|
20
|
-
interface CDataNode {
|
|
21
|
-
type: 'cdata';
|
|
22
|
-
value: string;
|
|
23
|
-
}
|
|
24
|
-
interface ProcessingInstructionNode {
|
|
25
|
-
type: 'processing-instruction';
|
|
26
|
-
target: string;
|
|
27
|
-
attributes: Record<string, string>;
|
|
28
|
-
}
|
|
29
|
-
interface MalformedNode {
|
|
30
|
-
type: 'malformed';
|
|
31
|
-
raw: string;
|
|
32
|
-
malformed: true;
|
|
33
|
-
}
|
|
34
|
-
type Node = ElementNode | TextNode | CommentNode | CDataNode | ProcessingInstructionNode | MalformedNode;
|
|
35
|
-
|
|
36
|
-
declare function parse(xml: string): DocumentNode;
|
|
37
|
-
|
|
38
|
-
declare const TokenType: {
|
|
39
|
-
readonly PROCESSING_INSTRUCTION: "processing-instruction";
|
|
40
|
-
readonly ELEMENT_OPEN: "element-open";
|
|
41
|
-
readonly ELEMENT_CLOSE: "element-close";
|
|
42
|
-
readonly SELF_CLOSING: "self-closing";
|
|
43
|
-
readonly TEXT: "text";
|
|
44
|
-
readonly COMMENT: "comment";
|
|
45
|
-
readonly CDATA: "cdata";
|
|
46
|
-
readonly MALFORMED: "malformed";
|
|
47
|
-
};
|
|
48
|
-
interface ProcessingInstructionToken {
|
|
49
|
-
type: typeof TokenType.PROCESSING_INSTRUCTION;
|
|
50
|
-
target: string;
|
|
51
|
-
attributes: Record<string, string>;
|
|
52
|
-
}
|
|
53
|
-
interface ElementOpenToken {
|
|
54
|
-
type: typeof TokenType.ELEMENT_OPEN;
|
|
55
|
-
tag: string;
|
|
56
|
-
attributes: Record<string, string>;
|
|
57
|
-
}
|
|
58
|
-
interface ElementCloseToken {
|
|
59
|
-
type: typeof TokenType.ELEMENT_CLOSE;
|
|
60
|
-
tag: string;
|
|
61
|
-
}
|
|
62
|
-
interface SelfClosingToken {
|
|
63
|
-
type: typeof TokenType.SELF_CLOSING;
|
|
64
|
-
tag: string;
|
|
65
|
-
attributes: Record<string, string>;
|
|
66
|
-
}
|
|
67
|
-
interface TextToken {
|
|
68
|
-
type: typeof TokenType.TEXT;
|
|
69
|
-
value: string;
|
|
70
|
-
}
|
|
71
|
-
interface CommentToken {
|
|
72
|
-
type: typeof TokenType.COMMENT;
|
|
73
|
-
value: string;
|
|
74
|
-
}
|
|
75
|
-
interface CDataToken {
|
|
76
|
-
type: typeof TokenType.CDATA;
|
|
77
|
-
value: string;
|
|
78
|
-
}
|
|
79
|
-
interface MalformedToken {
|
|
80
|
-
type: typeof TokenType.MALFORMED;
|
|
1
|
+
type XmlNodeRole = "openTag" | "closeTag" | "selfTag" | "processingInstruction" | "comment" | "textLeaf";
|
|
2
|
+
interface XmlNode {
|
|
3
|
+
role: XmlNodeRole;
|
|
81
4
|
raw: string;
|
|
5
|
+
globalIndex: number;
|
|
6
|
+
localIndex: number;
|
|
7
|
+
children?: XmlNode[];
|
|
8
|
+
malformed?: true;
|
|
82
9
|
}
|
|
83
|
-
type Token = ProcessingInstructionToken | ElementOpenToken | ElementCloseToken | SelfClosingToken | TextToken | CommentToken | CDataToken | MalformedToken;
|
|
84
|
-
type ContentToken = Exclude<Token, ElementOpenToken | ElementCloseToken>;
|
|
85
10
|
|
|
86
|
-
declare function
|
|
11
|
+
declare function scaffold(xml: string): XmlNode[];
|
|
87
12
|
|
|
88
|
-
export { type
|
|
13
|
+
export { type XmlNode, type XmlNodeRole, scaffold };
|
package/dist/index.js
CHANGED
|
@@ -1,179 +1,116 @@
|
|
|
1
|
-
// src/modules/
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
SELF_CLOSING: "self-closing",
|
|
7
|
-
TEXT: "text",
|
|
8
|
-
COMMENT: "comment",
|
|
9
|
-
CDATA: "cdata",
|
|
10
|
-
MALFORMED: "malformed"
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
// src/modules/tokenizer/tokenizer.ts
|
|
14
|
-
var WHITESPACE = /\s/;
|
|
15
|
-
function parseAttributes(raw) {
|
|
16
|
-
const attributes = {};
|
|
17
|
-
const pattern = /(\S+?)\s*=\s*(["'])([^"']*)\2/g;
|
|
18
|
-
let match;
|
|
19
|
-
while ((match = pattern.exec(raw)) !== null) {
|
|
20
|
-
attributes[match[1]] = match[3];
|
|
21
|
-
}
|
|
22
|
-
return attributes;
|
|
1
|
+
// src/modules/scaffold/scaffold.ts
|
|
2
|
+
function scaffold(xml) {
|
|
3
|
+
const counter = { value: 0 };
|
|
4
|
+
const { xmlNodes } = collectXmlNodes(xml, 0, null, counter);
|
|
5
|
+
return xmlNodes;
|
|
23
6
|
}
|
|
24
|
-
function
|
|
25
|
-
|
|
26
|
-
const end2 = xml.indexOf("<", position);
|
|
27
|
-
const value = xml.slice(position, end2 === -1 ? xml.length : end2);
|
|
28
|
-
return {
|
|
29
|
-
token: value.trim() ? { type: TokenType.TEXT, value } : null,
|
|
30
|
-
end: end2 === -1 ? xml.length : end2
|
|
31
|
-
};
|
|
32
|
-
}
|
|
33
|
-
const next = xml[position + 1];
|
|
34
|
-
if (next === "?") {
|
|
35
|
-
const closeIndex = xml.indexOf("?>", position);
|
|
36
|
-
if (closeIndex === -1)
|
|
37
|
-
return {
|
|
38
|
-
token: { type: TokenType.MALFORMED, raw: xml.slice(position) },
|
|
39
|
-
end: xml.length
|
|
40
|
-
};
|
|
41
|
-
const end2 = closeIndex + 2;
|
|
42
|
-
const inner2 = xml.slice(position + 2, end2 - 2).trim();
|
|
43
|
-
const space2 = inner2.search(WHITESPACE);
|
|
44
|
-
return {
|
|
45
|
-
token: {
|
|
46
|
-
type: TokenType.PROCESSING_INSTRUCTION,
|
|
47
|
-
target: space2 === -1 ? inner2 : inner2.slice(0, space2),
|
|
48
|
-
attributes: parseAttributes(inner2)
|
|
49
|
-
},
|
|
50
|
-
end: end2
|
|
51
|
-
};
|
|
52
|
-
}
|
|
53
|
-
if (next === "!" && xml[position + 2] === "-") {
|
|
54
|
-
const closeIndex = xml.indexOf("-->", position);
|
|
55
|
-
if (closeIndex === -1)
|
|
56
|
-
return {
|
|
57
|
-
token: { type: TokenType.MALFORMED, raw: xml.slice(position) },
|
|
58
|
-
end: xml.length
|
|
59
|
-
};
|
|
60
|
-
const end2 = closeIndex + 3;
|
|
61
|
-
return {
|
|
62
|
-
token: {
|
|
63
|
-
type: TokenType.COMMENT,
|
|
64
|
-
value: xml.slice(position + 4, end2 - 3)
|
|
65
|
-
},
|
|
66
|
-
end: end2
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
if (next === "!" && xml[position + 2] === "[") {
|
|
70
|
-
const closeIndex = xml.indexOf("]]>", position);
|
|
71
|
-
if (closeIndex === -1)
|
|
72
|
-
return {
|
|
73
|
-
token: { type: TokenType.MALFORMED, raw: xml.slice(position) },
|
|
74
|
-
end: xml.length
|
|
75
|
-
};
|
|
76
|
-
const end2 = closeIndex + 3;
|
|
77
|
-
return {
|
|
78
|
-
token: { type: TokenType.CDATA, value: xml.slice(position + 9, end2 - 3) },
|
|
79
|
-
end: end2
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
const end = xml.indexOf(">", position) + 1;
|
|
83
|
-
if (!end)
|
|
84
|
-
return {
|
|
85
|
-
token: { type: TokenType.MALFORMED, raw: xml.slice(position) },
|
|
86
|
-
end: xml.length
|
|
87
|
-
};
|
|
88
|
-
const raw = xml.slice(position + 1, end - 1).trim();
|
|
89
|
-
if (raw[0] === "/")
|
|
90
|
-
return {
|
|
91
|
-
token: { type: TokenType.ELEMENT_CLOSE, tag: raw.slice(1).trim() },
|
|
92
|
-
end
|
|
93
|
-
};
|
|
94
|
-
const selfClosing = raw[raw.length - 1] === "/";
|
|
95
|
-
const inner = selfClosing ? raw.slice(0, -1).trim() : raw;
|
|
96
|
-
const space = inner.search(WHITESPACE);
|
|
97
|
-
const tag = space === -1 ? inner : inner.slice(0, space);
|
|
98
|
-
const type = selfClosing ? TokenType.SELF_CLOSING : TokenType.ELEMENT_OPEN;
|
|
99
|
-
return { token: { type, tag, attributes: parseAttributes(inner) }, end };
|
|
100
|
-
}
|
|
101
|
-
function tokenize(xml) {
|
|
102
|
-
const tokens = [];
|
|
103
|
-
let position = 0;
|
|
7
|
+
function collectXmlNodes(xml, position, parentTag, counter) {
|
|
8
|
+
const xmlNodes = [];
|
|
104
9
|
while (position < xml.length) {
|
|
105
|
-
const
|
|
106
|
-
if (
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
const tokens = tokenize(xml);
|
|
115
|
-
const cursor = { position: 0 };
|
|
116
|
-
const { children } = collectChildren(tokens, cursor, null);
|
|
117
|
-
return { type: "document", children };
|
|
118
|
-
}
|
|
119
|
-
function collectChildren(tokens, cursor, parentTag) {
|
|
120
|
-
const children = [];
|
|
121
|
-
while (cursor.position < tokens.length) {
|
|
122
|
-
const token = tokens[cursor.position];
|
|
123
|
-
cursor.position++;
|
|
124
|
-
if (token.type === TokenType.ELEMENT_CLOSE) {
|
|
125
|
-
if (token.tag === parentTag) return { children, closed: true };
|
|
126
|
-
children.push({
|
|
127
|
-
type: "malformed",
|
|
128
|
-
raw: `</${token.tag}>`,
|
|
10
|
+
const xmlNodeData = extractXmlNodes(xml, position);
|
|
11
|
+
if (xmlNodeData.role === "closeTag") {
|
|
12
|
+
if (xmlNodeData.tag === parentTag)
|
|
13
|
+
return { xmlNodes, position: xmlNodeData.end, closed: true };
|
|
14
|
+
xmlNodes.push({
|
|
15
|
+
role: "closeTag",
|
|
16
|
+
raw: xmlNodeData.raw,
|
|
17
|
+
globalIndex: counter.value++,
|
|
18
|
+
localIndex: xmlNodes.length,
|
|
129
19
|
malformed: true
|
|
130
20
|
});
|
|
21
|
+
position = xmlNodeData.end;
|
|
131
22
|
continue;
|
|
132
23
|
}
|
|
133
|
-
if (
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
24
|
+
if (xmlNodeData.role === "openTag" && !xmlNodeData.malformed) {
|
|
25
|
+
const globalIndex = counter.value++;
|
|
26
|
+
const localIndex = xmlNodes.length;
|
|
27
|
+
const nested = collectXmlNodes(
|
|
28
|
+
xml,
|
|
29
|
+
xmlNodeData.end,
|
|
30
|
+
xmlNodeData.tag,
|
|
31
|
+
counter
|
|
138
32
|
);
|
|
139
|
-
const
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
33
|
+
const xmlNode2 = {
|
|
34
|
+
role: "openTag",
|
|
35
|
+
raw: xmlNodeData.raw,
|
|
36
|
+
globalIndex,
|
|
37
|
+
localIndex,
|
|
38
|
+
children: nested.xmlNodes
|
|
144
39
|
};
|
|
145
|
-
if (!closed)
|
|
146
|
-
|
|
40
|
+
if (!nested.closed) xmlNode2.malformed = true;
|
|
41
|
+
xmlNodes.push(xmlNode2);
|
|
42
|
+
position = nested.position;
|
|
147
43
|
continue;
|
|
148
44
|
}
|
|
149
|
-
|
|
45
|
+
const xmlNode = {
|
|
46
|
+
role: xmlNodeData.role,
|
|
47
|
+
raw: xmlNodeData.raw,
|
|
48
|
+
globalIndex: counter.value++,
|
|
49
|
+
localIndex: xmlNodes.length
|
|
50
|
+
};
|
|
51
|
+
if (xmlNodeData.malformed) xmlNode.malformed = true;
|
|
52
|
+
if (xmlNodeData.role === "openTag") xmlNode.children = [];
|
|
53
|
+
xmlNodes.push(xmlNode);
|
|
54
|
+
position = xmlNodeData.end;
|
|
150
55
|
}
|
|
151
|
-
return {
|
|
56
|
+
return { xmlNodes, position, closed: parentTag === null };
|
|
152
57
|
}
|
|
153
|
-
function
|
|
154
|
-
if (
|
|
155
|
-
|
|
156
|
-
if (token.type === TokenType.COMMENT)
|
|
157
|
-
return { type: "comment", value: token.value };
|
|
158
|
-
if (token.type === TokenType.CDATA)
|
|
159
|
-
return { type: "cdata", value: token.value };
|
|
160
|
-
if (token.type === TokenType.SELF_CLOSING)
|
|
58
|
+
function extractXmlNodes(xml, position) {
|
|
59
|
+
if (xml[position] !== "<") {
|
|
60
|
+
const end2 = xml.indexOf("<", position);
|
|
161
61
|
return {
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
62
|
+
raw: xml.slice(position, end2 === -1 ? xml.length : end2),
|
|
63
|
+
role: "textLeaf",
|
|
64
|
+
tag: "",
|
|
65
|
+
end: end2 === -1 ? xml.length : end2
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
if (xml[position + 1] === "?") {
|
|
69
|
+
const end2 = xml.indexOf("?>", position + 2);
|
|
70
|
+
return end2 === -1 ? {
|
|
71
|
+
raw: xml.slice(position),
|
|
72
|
+
role: "processingInstruction",
|
|
73
|
+
tag: "",
|
|
74
|
+
end: xml.length
|
|
75
|
+
} : {
|
|
76
|
+
raw: xml.slice(position, end2 + 2),
|
|
77
|
+
role: "processingInstruction",
|
|
78
|
+
tag: "",
|
|
79
|
+
end: end2 + 2
|
|
166
80
|
};
|
|
167
|
-
|
|
81
|
+
}
|
|
82
|
+
if (xml[position + 1] === "!" && xml[position + 2] === "-") {
|
|
83
|
+
const end2 = xml.indexOf("-->", position + 4);
|
|
84
|
+
return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
|
|
85
|
+
raw: xml.slice(position, end2 + 3),
|
|
86
|
+
role: "comment",
|
|
87
|
+
tag: "",
|
|
88
|
+
end: end2 + 3
|
|
89
|
+
};
|
|
90
|
+
}
|
|
91
|
+
const closeAt = xml.indexOf(">", position);
|
|
92
|
+
if (closeAt === -1)
|
|
168
93
|
return {
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
94
|
+
raw: xml.slice(position),
|
|
95
|
+
role: "openTag",
|
|
96
|
+
tag: "",
|
|
97
|
+
end: xml.length,
|
|
98
|
+
malformed: true
|
|
172
99
|
};
|
|
173
|
-
|
|
100
|
+
const raw = xml.slice(position, closeAt + 1);
|
|
101
|
+
const end = closeAt + 1;
|
|
102
|
+
const inner = xml.slice(position + 1, closeAt).trim();
|
|
103
|
+
if (inner.startsWith("/")) {
|
|
104
|
+
const tag2 = inner.slice(1).trim().split(/\s/)[0] ?? "";
|
|
105
|
+
return { raw, role: "closeTag", tag: tag2, end };
|
|
106
|
+
}
|
|
107
|
+
if (inner.endsWith("/")) {
|
|
108
|
+
const tag2 = inner.slice(0, -1).trim().split(/\s/)[0] ?? "";
|
|
109
|
+
return { raw, role: "selfTag", tag: tag2, end };
|
|
110
|
+
}
|
|
111
|
+
const tag = inner.split(/\s/)[0] ?? "";
|
|
112
|
+
return { raw, role: "openTag", tag, end };
|
|
174
113
|
}
|
|
175
114
|
export {
|
|
176
|
-
|
|
177
|
-
parse,
|
|
178
|
-
tokenize
|
|
115
|
+
scaffold
|
|
179
116
|
};
|