xml-to-html-converter 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -30
- package/dist/index.d.ts +11 -82
- package/dist/index.js +139 -157
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -10,12 +10,40 @@ A zero-dependency Node.js package for converting XML to HTML. Currently in pre-1
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## v0.1.x: XML Node Extraction & Scaffolding
|
|
14
|
+
|
|
15
|
+
Version `0.1.x` is focused entirely on parsing raw XML into a structured tree of nodes. The `scaffold` function walks an XML string and produces an array of `XmlNode` objects, each carrying its role, its raw source text, and its position in the document, both globally across the full document and locally within its parent.
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
interface XmlNode {
|
|
19
|
+
role: XmlNodeRole;
|
|
20
|
+
raw: string;
|
|
21
|
+
globalIndex: number;
|
|
22
|
+
localIndex: number;
|
|
23
|
+
children?: XmlNode[];
|
|
24
|
+
malformed?: true;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
type XmlNodeRole =
|
|
28
|
+
| "closeTag"
|
|
29
|
+
| "comment"
|
|
30
|
+
| "doctype"
|
|
31
|
+
| "openTag"
|
|
32
|
+
| "processingInstruction"
|
|
33
|
+
| "selfTag"
|
|
34
|
+
| "textLeaf";
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
This scaffold is the foundation everything else will be built on. No transformation, no HTML output, no opinions about content, just an accurate, traversable representation of what the XML says.
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
13
41
|
> **Where I am right now**
|
|
14
42
|
>
|
|
15
|
-
> `v0.x` is building the scaffold
|
|
43
|
+
> `v0.x` is building the scaffold: a structural tree of every node in your XML document, each carrying its raw source string and its exact position in the document. This scaffold is what the HTML converter will walk when it's built.
|
|
16
44
|
>
|
|
17
|
-
> - **`scaffold(xml)`** reads any XML string and returns a nested
|
|
18
|
-
> - Every
|
|
45
|
+
> - **`scaffold(xml)`** reads any XML string and returns a nested node tree
|
|
46
|
+
> - Every node knows its `role`, its `raw` source string, its `globalIndex` in the document, and its `localIndex` within its parent
|
|
19
47
|
> - Broken XML is never thrown - malformed nodes are flagged with `malformed: true` in place and the tree is built regardless
|
|
20
48
|
>
|
|
21
49
|
> `v1.0.0` is when this package becomes what it says it is: a full XML-to-HTML converter. Everything before that is the work to get there.
|
|
@@ -45,7 +73,7 @@ const tree = scaffold(`
|
|
|
45
73
|
`);
|
|
46
74
|
```
|
|
47
75
|
|
|
48
|
-
`scaffold` returns a flat array of root-level
|
|
76
|
+
`scaffold` returns a flat array of root-level nodes. Each `openTag` node carries its children nested inside it:
|
|
49
77
|
|
|
50
78
|
```json
|
|
51
79
|
[
|
|
@@ -58,25 +86,25 @@ const tree = scaffold(`
|
|
|
58
86
|
{
|
|
59
87
|
"role": "openTag",
|
|
60
88
|
"raw": "<bookstore>",
|
|
61
|
-
"globalIndex":
|
|
62
|
-
"localIndex":
|
|
89
|
+
"globalIndex": 1,
|
|
90
|
+
"localIndex": 1,
|
|
63
91
|
"children": [
|
|
64
92
|
{
|
|
65
93
|
"role": "openTag",
|
|
66
94
|
"raw": "<book category=\"cooking\">",
|
|
67
|
-
"globalIndex":
|
|
68
|
-
"localIndex":
|
|
95
|
+
"globalIndex": 2,
|
|
96
|
+
"localIndex": 0,
|
|
69
97
|
"children": [
|
|
70
98
|
{
|
|
71
99
|
"role": "openTag",
|
|
72
100
|
"raw": "<title lang=\"en\">",
|
|
73
|
-
"globalIndex":
|
|
74
|
-
"localIndex":
|
|
101
|
+
"globalIndex": 3,
|
|
102
|
+
"localIndex": 0,
|
|
75
103
|
"children": [
|
|
76
104
|
{
|
|
77
105
|
"role": "textLeaf",
|
|
78
106
|
"raw": "Everyday Italian",
|
|
79
|
-
"globalIndex":
|
|
107
|
+
"globalIndex": 4,
|
|
80
108
|
"localIndex": 0
|
|
81
109
|
}
|
|
82
110
|
]
|
|
@@ -90,31 +118,32 @@ const tree = scaffold(`
|
|
|
90
118
|
|
|
91
119
|
---
|
|
92
120
|
|
|
93
|
-
##
|
|
121
|
+
## Node Shape
|
|
94
122
|
|
|
95
|
-
Every
|
|
123
|
+
Every node in the tree has the following fields:
|
|
96
124
|
|
|
97
|
-
| Field | Type
|
|
98
|
-
| ------------- |
|
|
99
|
-
| `role` | `
|
|
100
|
-
| `raw` | `string`
|
|
101
|
-
| `globalIndex` | `number`
|
|
102
|
-
| `localIndex` | `number`
|
|
103
|
-
| `children` | `
|
|
104
|
-
| `malformed` | `true`
|
|
125
|
+
| Field | Type | Description |
|
|
126
|
+
| ------------- | ------------- | --------------------------------------------------- |
|
|
127
|
+
| `role` | `XmlNodeRole` | What kind of node this is |
|
|
128
|
+
| `raw` | `string` | The exact source string, untouched |
|
|
129
|
+
| `globalIndex` | `number` | Position in the entire document (never resets) |
|
|
130
|
+
| `localIndex` | `number` | Position within the parent's children array |
|
|
131
|
+
| `children` | `XmlNode[]` | Present only on `openTag` - the nested nodes inside |
|
|
132
|
+
| `malformed` | `true` | Present only when the structure is broken |
|
|
105
133
|
|
|
106
134
|
---
|
|
107
135
|
|
|
108
|
-
##
|
|
136
|
+
## Node Roles
|
|
109
137
|
|
|
110
|
-
| Role | Has children | Description
|
|
111
|
-
| ----------------------- | ------------ |
|
|
112
|
-
| `openTag` | yes | An opening tag, e.g. `<book category="web">`
|
|
113
|
-
| `selfTag` | no | A self-closing tag, e.g. `<br/>`
|
|
114
|
-
| `closeTag` | no | Only appears when stray (no matching open)
|
|
115
|
-
| `processingInstruction` | no | e.g. `<?xml version="1.0"?>`
|
|
116
|
-
| `comment` | no | e.g. `<!-- a comment -->`
|
|
117
|
-
| `textLeaf` | no | Text content between tags
|
|
138
|
+
| Role | Has children | Description |
|
|
139
|
+
| ----------------------- | ------------ | --------------------------------------------------- |
|
|
140
|
+
| `openTag` | yes | An opening tag, e.g. `<book category="web">` |
|
|
141
|
+
| `selfTag` | no | A self-closing tag, e.g. `<br/>` |
|
|
142
|
+
| `closeTag` | no | Only appears when stray (no matching open) |
|
|
143
|
+
| `processingInstruction` | no | e.g. `<?xml version="1.0"?>` |
|
|
144
|
+
| `comment` | no | e.g. `<!-- a comment -->` |
|
|
145
|
+
| `textLeaf` | no | Text content between tags, including CDATA sections |
|
|
146
|
+
| `doctype` | no | e.g. `<!DOCTYPE html>` or `<!DOCTYPE root [...]>` |
|
|
118
147
|
|
|
119
148
|
---
|
|
120
149
|
|
|
@@ -171,6 +200,27 @@ const tree = scaffold("<root><unclosed><valid>text</valid></root>");
|
|
|
171
200
|
|
|
172
201
|
---
|
|
173
202
|
|
|
203
|
+
## Exports
|
|
204
|
+
|
|
205
|
+
```ts
|
|
206
|
+
import { scaffold, isMalformed } from "xml-to-html-converter";
|
|
207
|
+
import type {
|
|
208
|
+
XmlNode,
|
|
209
|
+
XmlNodeRole,
|
|
210
|
+
MalformedXmlNode,
|
|
211
|
+
} from "xml-to-html-converter";
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
| Export | Kind | Description |
|
|
215
|
+
| ------------------ | -------- | --------------------------------------------------- |
|
|
216
|
+
| `scaffold` | function | Parses an XML string and returns a node tree |
|
|
217
|
+
| `isMalformed` | function | Type guard, narrows `XmlNode` to `MalformedXmlNode` |
|
|
218
|
+
| `XmlNode` | type | The shape of every node in the tree |
|
|
219
|
+
| `XmlNodeRole` | type | Union of all valid role strings |
|
|
220
|
+
| `MalformedXmlNode` | type | `XmlNode` narrowed to `{ malformed: true }` |
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
174
224
|
## Requirements
|
|
175
225
|
|
|
176
226
|
Node.js `>=20.0.0`
|
package/dist/index.d.ts
CHANGED
|
@@ -1,88 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
attributes: Record<string, string>;
|
|
9
|
-
children: Node[];
|
|
1
|
+
type XmlNodeRole = "closeTag" | "comment" | "doctype" | "openTag" | "processingInstruction" | "selfTag" | "textLeaf";
|
|
2
|
+
interface XmlNode {
|
|
3
|
+
role: XmlNodeRole;
|
|
4
|
+
raw: string;
|
|
5
|
+
globalIndex: number;
|
|
6
|
+
localIndex: number;
|
|
7
|
+
children?: XmlNode[];
|
|
10
8
|
malformed?: true;
|
|
11
9
|
}
|
|
12
|
-
|
|
13
|
-
type: 'text';
|
|
14
|
-
value: string;
|
|
15
|
-
}
|
|
16
|
-
interface CommentNode {
|
|
17
|
-
type: 'comment';
|
|
18
|
-
value: string;
|
|
19
|
-
}
|
|
20
|
-
interface CDataNode {
|
|
21
|
-
type: 'cdata';
|
|
22
|
-
value: string;
|
|
23
|
-
}
|
|
24
|
-
interface ProcessingInstructionNode {
|
|
25
|
-
type: 'processing-instruction';
|
|
26
|
-
target: string;
|
|
27
|
-
attributes: Record<string, string>;
|
|
28
|
-
}
|
|
29
|
-
interface MalformedNode {
|
|
30
|
-
type: 'malformed';
|
|
31
|
-
raw: string;
|
|
10
|
+
type MalformedXmlNode = XmlNode & {
|
|
32
11
|
malformed: true;
|
|
33
|
-
}
|
|
34
|
-
type Node = ElementNode | TextNode | CommentNode | CDataNode | ProcessingInstructionNode | MalformedNode;
|
|
35
|
-
|
|
36
|
-
declare function parse(xml: string): DocumentNode;
|
|
37
|
-
|
|
38
|
-
declare const TokenType: {
|
|
39
|
-
readonly PROCESSING_INSTRUCTION: "processing-instruction";
|
|
40
|
-
readonly ELEMENT_OPEN: "element-open";
|
|
41
|
-
readonly ELEMENT_CLOSE: "element-close";
|
|
42
|
-
readonly SELF_CLOSING: "self-closing";
|
|
43
|
-
readonly TEXT: "text";
|
|
44
|
-
readonly COMMENT: "comment";
|
|
45
|
-
readonly CDATA: "cdata";
|
|
46
|
-
readonly MALFORMED: "malformed";
|
|
47
12
|
};
|
|
48
|
-
|
|
49
|
-
type: typeof TokenType.PROCESSING_INSTRUCTION;
|
|
50
|
-
target: string;
|
|
51
|
-
attributes: Record<string, string>;
|
|
52
|
-
}
|
|
53
|
-
interface ElementOpenToken {
|
|
54
|
-
type: typeof TokenType.ELEMENT_OPEN;
|
|
55
|
-
tag: string;
|
|
56
|
-
attributes: Record<string, string>;
|
|
57
|
-
}
|
|
58
|
-
interface ElementCloseToken {
|
|
59
|
-
type: typeof TokenType.ELEMENT_CLOSE;
|
|
60
|
-
tag: string;
|
|
61
|
-
}
|
|
62
|
-
interface SelfClosingToken {
|
|
63
|
-
type: typeof TokenType.SELF_CLOSING;
|
|
64
|
-
tag: string;
|
|
65
|
-
attributes: Record<string, string>;
|
|
66
|
-
}
|
|
67
|
-
interface TextToken {
|
|
68
|
-
type: typeof TokenType.TEXT;
|
|
69
|
-
value: string;
|
|
70
|
-
}
|
|
71
|
-
interface CommentToken {
|
|
72
|
-
type: typeof TokenType.COMMENT;
|
|
73
|
-
value: string;
|
|
74
|
-
}
|
|
75
|
-
interface CDataToken {
|
|
76
|
-
type: typeof TokenType.CDATA;
|
|
77
|
-
value: string;
|
|
78
|
-
}
|
|
79
|
-
interface MalformedToken {
|
|
80
|
-
type: typeof TokenType.MALFORMED;
|
|
81
|
-
raw: string;
|
|
82
|
-
}
|
|
83
|
-
type Token = ProcessingInstructionToken | ElementOpenToken | ElementCloseToken | SelfClosingToken | TextToken | CommentToken | CDataToken | MalformedToken;
|
|
84
|
-
type ContentToken = Exclude<Token, ElementOpenToken | ElementCloseToken>;
|
|
13
|
+
declare function isMalformed(node: XmlNode): node is MalformedXmlNode;
|
|
85
14
|
|
|
86
|
-
declare function
|
|
15
|
+
declare function scaffold(xml: string): XmlNode[];
|
|
87
16
|
|
|
88
|
-
export { type
|
|
17
|
+
export { type MalformedXmlNode, type XmlNode, type XmlNodeRole, isMalformed, scaffold };
|
package/dist/index.js
CHANGED
|
@@ -1,179 +1,161 @@
|
|
|
1
|
-
// src/modules/
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
1
|
+
// src/modules/scaffold/scaffold.ts
|
|
2
|
+
function scaffold(xml) {
|
|
3
|
+
const counter = { value: 0 };
|
|
4
|
+
const { xmlNodes } = collectXmlNodes(xml, 0, null, counter);
|
|
5
|
+
return xmlNodes;
|
|
6
|
+
}
|
|
7
|
+
function collectXmlNodes(xml, position, parentTag, counter) {
|
|
8
|
+
const xmlNodes = [];
|
|
9
|
+
while (position < xml.length) {
|
|
10
|
+
const xmlNodeData = extractXmlNodes(xml, position);
|
|
11
|
+
if (xmlNodeData.role === "textLeaf" && xmlNodeData.raw.trim() === "") {
|
|
12
|
+
position = xmlNodeData.end;
|
|
13
|
+
continue;
|
|
14
|
+
}
|
|
15
|
+
if (xmlNodeData.role === "closeTag") {
|
|
16
|
+
if (xmlNodeData.tag === parentTag)
|
|
17
|
+
return { xmlNodes, position: xmlNodeData.end, closed: true };
|
|
18
|
+
xmlNodes.push({
|
|
19
|
+
role: "closeTag",
|
|
20
|
+
raw: xmlNodeData.raw,
|
|
21
|
+
globalIndex: counter.value++,
|
|
22
|
+
localIndex: xmlNodes.length,
|
|
23
|
+
malformed: true
|
|
24
|
+
});
|
|
25
|
+
position = xmlNodeData.end;
|
|
26
|
+
continue;
|
|
27
|
+
}
|
|
28
|
+
if (xmlNodeData.role === "openTag" && !xmlNodeData.malformed) {
|
|
29
|
+
const globalIndex = counter.value++;
|
|
30
|
+
const localIndex = xmlNodes.length;
|
|
31
|
+
const nested = collectXmlNodes(
|
|
32
|
+
xml,
|
|
33
|
+
xmlNodeData.end,
|
|
34
|
+
xmlNodeData.tag,
|
|
35
|
+
counter
|
|
36
|
+
);
|
|
37
|
+
const xmlNode2 = {
|
|
38
|
+
role: "openTag",
|
|
39
|
+
raw: xmlNodeData.raw,
|
|
40
|
+
globalIndex,
|
|
41
|
+
localIndex,
|
|
42
|
+
children: nested.xmlNodes
|
|
43
|
+
};
|
|
44
|
+
if (!nested.closed) xmlNode2.malformed = true;
|
|
45
|
+
xmlNodes.push(xmlNode2);
|
|
46
|
+
position = nested.position;
|
|
47
|
+
continue;
|
|
48
|
+
}
|
|
49
|
+
const xmlNode = {
|
|
50
|
+
role: xmlNodeData.role,
|
|
51
|
+
raw: xmlNodeData.raw,
|
|
52
|
+
globalIndex: counter.value++,
|
|
53
|
+
localIndex: xmlNodes.length
|
|
54
|
+
};
|
|
55
|
+
if (xmlNodeData.malformed) xmlNode.malformed = true;
|
|
56
|
+
if (xmlNodeData.role === "openTag") xmlNode.children = [];
|
|
57
|
+
xmlNodes.push(xmlNode);
|
|
58
|
+
position = xmlNodeData.end;
|
|
59
|
+
}
|
|
60
|
+
return { xmlNodes, position, closed: parentTag === null };
|
|
61
|
+
}
|
|
62
|
+
function findTagClose(xml, position) {
|
|
63
|
+
let i = position;
|
|
64
|
+
while (i < xml.length) {
|
|
65
|
+
const ch = xml[i];
|
|
66
|
+
if (ch === '"' || ch === "'") {
|
|
67
|
+
const closeQuote = xml.indexOf(ch, i + 1);
|
|
68
|
+
i = closeQuote === -1 ? xml.length : closeQuote + 1;
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
if (ch === ">") return i;
|
|
72
|
+
i++;
|
|
21
73
|
}
|
|
22
|
-
return
|
|
74
|
+
return -1;
|
|
23
75
|
}
|
|
24
|
-
function
|
|
76
|
+
function extractXmlNodes(xml, position) {
|
|
25
77
|
if (xml[position] !== "<") {
|
|
26
78
|
const end2 = xml.indexOf("<", position);
|
|
27
|
-
const value = xml.slice(position, end2 === -1 ? xml.length : end2);
|
|
28
79
|
return {
|
|
29
|
-
|
|
80
|
+
raw: xml.slice(position, end2 === -1 ? xml.length : end2),
|
|
81
|
+
role: "textLeaf",
|
|
82
|
+
tag: "",
|
|
30
83
|
end: end2 === -1 ? xml.length : end2
|
|
31
84
|
};
|
|
32
85
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
token: {
|
|
46
|
-
type: TokenType.PROCESSING_INSTRUCTION,
|
|
47
|
-
target: space2 === -1 ? inner2 : inner2.slice(0, space2),
|
|
48
|
-
attributes: parseAttributes(inner2)
|
|
49
|
-
},
|
|
50
|
-
end: end2
|
|
86
|
+
if (xml[position + 1] === "?") {
|
|
87
|
+
const end2 = xml.indexOf("?>", position + 2);
|
|
88
|
+
return end2 === -1 ? {
|
|
89
|
+
raw: xml.slice(position),
|
|
90
|
+
role: "processingInstruction",
|
|
91
|
+
tag: "",
|
|
92
|
+
end: xml.length
|
|
93
|
+
} : {
|
|
94
|
+
raw: xml.slice(position, end2 + 2),
|
|
95
|
+
role: "processingInstruction",
|
|
96
|
+
tag: "",
|
|
97
|
+
end: end2 + 2
|
|
51
98
|
};
|
|
52
99
|
}
|
|
53
|
-
if (
|
|
54
|
-
const
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
const end2 = closeIndex + 3;
|
|
61
|
-
return {
|
|
62
|
-
token: {
|
|
63
|
-
type: TokenType.COMMENT,
|
|
64
|
-
value: xml.slice(position + 4, end2 - 3)
|
|
65
|
-
},
|
|
66
|
-
end: end2
|
|
100
|
+
if (xml[position + 1] === "!" && xml[position + 2] === "[") {
|
|
101
|
+
const end2 = xml.indexOf("]]>", position + 3);
|
|
102
|
+
return end2 === -1 ? { raw: xml.slice(position), role: "textLeaf", tag: "", end: xml.length } : {
|
|
103
|
+
raw: xml.slice(position, end2 + 3),
|
|
104
|
+
role: "textLeaf",
|
|
105
|
+
tag: "",
|
|
106
|
+
end: end2 + 3
|
|
67
107
|
};
|
|
68
108
|
}
|
|
69
|
-
if (
|
|
70
|
-
const
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const end2 = closeIndex + 3;
|
|
77
|
-
return {
|
|
78
|
-
token: { type: TokenType.CDATA, value: xml.slice(position + 9, end2 - 3) },
|
|
79
|
-
end: end2
|
|
109
|
+
if (xml[position + 1] === "!" && xml[position + 2] === "-" && xml[position + 3] === "-") {
|
|
110
|
+
const end2 = xml.indexOf("-->", position + 4);
|
|
111
|
+
return end2 === -1 ? { raw: xml.slice(position), role: "comment", tag: "", end: xml.length } : {
|
|
112
|
+
raw: xml.slice(position, end2 + 3),
|
|
113
|
+
role: "comment",
|
|
114
|
+
tag: "",
|
|
115
|
+
end: end2 + 3
|
|
80
116
|
};
|
|
81
117
|
}
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
118
|
+
if (xml.startsWith("<!DOCTYPE", position)) {
|
|
119
|
+
const bracketOpen = xml.indexOf("[", position);
|
|
120
|
+
const firstClose = xml.indexOf(">", position);
|
|
121
|
+
const hasBracket = bracketOpen !== -1 && bracketOpen < firstClose;
|
|
122
|
+
if (hasBracket) {
|
|
123
|
+
const bracketClose = xml.indexOf("]>", bracketOpen);
|
|
124
|
+
const end3 = bracketClose === -1 ? xml.length : bracketClose + 2;
|
|
125
|
+
return { raw: xml.slice(position, end3), role: "doctype", tag: "", end: end3 };
|
|
126
|
+
}
|
|
127
|
+
const end2 = firstClose === -1 ? xml.length : firstClose + 1;
|
|
128
|
+
return { raw: xml.slice(position, end2), role: "doctype", tag: "", end: end2 };
|
|
129
|
+
}
|
|
130
|
+
const closeAt = findTagClose(xml, position + 1);
|
|
131
|
+
if (closeAt === -1)
|
|
90
132
|
return {
|
|
91
|
-
|
|
92
|
-
|
|
133
|
+
raw: xml.slice(position),
|
|
134
|
+
role: "openTag",
|
|
135
|
+
tag: "",
|
|
136
|
+
end: xml.length,
|
|
137
|
+
malformed: true
|
|
93
138
|
};
|
|
94
|
-
const
|
|
95
|
-
const
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
}
|
|
101
|
-
function tokenize(xml) {
|
|
102
|
-
const tokens = [];
|
|
103
|
-
let position = 0;
|
|
104
|
-
while (position < xml.length) {
|
|
105
|
-
const { token, end } = nextToken(xml, position);
|
|
106
|
-
if (token) tokens.push(token);
|
|
107
|
-
position = end;
|
|
139
|
+
const raw = xml.slice(position, closeAt + 1);
|
|
140
|
+
const end = closeAt + 1;
|
|
141
|
+
const inner = xml.slice(position + 1, closeAt).trim();
|
|
142
|
+
if (inner.startsWith("/")) {
|
|
143
|
+
const tag2 = inner.slice(1).trim().split(/\s/)[0] ?? "";
|
|
144
|
+
return { raw, role: "closeTag", tag: tag2, end };
|
|
108
145
|
}
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
// src/modules/parser/parser.ts
|
|
113
|
-
function parse(xml) {
|
|
114
|
-
const tokens = tokenize(xml);
|
|
115
|
-
const cursor = { position: 0 };
|
|
116
|
-
const { children } = collectChildren(tokens, cursor, null);
|
|
117
|
-
return { type: "document", children };
|
|
118
|
-
}
|
|
119
|
-
function collectChildren(tokens, cursor, parentTag) {
|
|
120
|
-
const children = [];
|
|
121
|
-
while (cursor.position < tokens.length) {
|
|
122
|
-
const token = tokens[cursor.position];
|
|
123
|
-
cursor.position++;
|
|
124
|
-
if (token.type === TokenType.ELEMENT_CLOSE) {
|
|
125
|
-
if (token.tag === parentTag) return { children, closed: true };
|
|
126
|
-
children.push({
|
|
127
|
-
type: "malformed",
|
|
128
|
-
raw: `</${token.tag}>`,
|
|
129
|
-
malformed: true
|
|
130
|
-
});
|
|
131
|
-
continue;
|
|
132
|
-
}
|
|
133
|
-
if (token.type === TokenType.ELEMENT_OPEN) {
|
|
134
|
-
const { children: elementChildren, closed } = collectChildren(
|
|
135
|
-
tokens,
|
|
136
|
-
cursor,
|
|
137
|
-
token.tag
|
|
138
|
-
);
|
|
139
|
-
const node = {
|
|
140
|
-
type: "element",
|
|
141
|
-
tag: token.tag,
|
|
142
|
-
attributes: token.attributes,
|
|
143
|
-
children: elementChildren
|
|
144
|
-
};
|
|
145
|
-
if (!closed) node.malformed = true;
|
|
146
|
-
children.push(node);
|
|
147
|
-
continue;
|
|
148
|
-
}
|
|
149
|
-
children.push(tokenToNode(token));
|
|
146
|
+
if (inner.endsWith("/")) {
|
|
147
|
+
const tag2 = inner.slice(0, -1).trim().split(/\s/)[0] ?? "";
|
|
148
|
+
return { raw, role: "selfTag", tag: tag2, end };
|
|
150
149
|
}
|
|
151
|
-
|
|
150
|
+
const tag = inner.split(/\s/)[0] ?? "";
|
|
151
|
+
return { raw, role: "openTag", tag, end };
|
|
152
152
|
}
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
return { type: "comment", value: token.value };
|
|
158
|
-
if (token.type === TokenType.CDATA)
|
|
159
|
-
return { type: "cdata", value: token.value };
|
|
160
|
-
if (token.type === TokenType.SELF_CLOSING)
|
|
161
|
-
return {
|
|
162
|
-
type: "element",
|
|
163
|
-
tag: token.tag,
|
|
164
|
-
attributes: token.attributes,
|
|
165
|
-
children: []
|
|
166
|
-
};
|
|
167
|
-
if (token.type === TokenType.PROCESSING_INSTRUCTION)
|
|
168
|
-
return {
|
|
169
|
-
type: "processing-instruction",
|
|
170
|
-
target: token.target,
|
|
171
|
-
attributes: token.attributes
|
|
172
|
-
};
|
|
173
|
-
return { type: "malformed", raw: token.raw, malformed: true };
|
|
153
|
+
|
|
154
|
+
// src/modules/scaffold/types.ts
|
|
155
|
+
function isMalformed(node) {
|
|
156
|
+
return node.malformed === true;
|
|
174
157
|
}
|
|
175
158
|
export {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
tokenize
|
|
159
|
+
isMalformed,
|
|
160
|
+
scaffold
|
|
179
161
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xml-to-html-converter",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"description": "Zero dependency XML to HTML converter for Node environments",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -19,8 +19,10 @@
|
|
|
19
19
|
},
|
|
20
20
|
"scripts": {
|
|
21
21
|
"build": "tsup",
|
|
22
|
+
"typecheck": "tsc --noEmit",
|
|
22
23
|
"test": "vitest run",
|
|
23
|
-
"coverage": "vitest run --coverage"
|
|
24
|
+
"coverage": "vitest run --coverage",
|
|
25
|
+
"prepublishOnly": "npm run build && npm test"
|
|
24
26
|
},
|
|
25
27
|
"keywords": [
|
|
26
28
|
"xml",
|