xml-to-html-converter 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -9
- package/dist/index.d.ts +8 -1
- package/dist/index.js +50 -7
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -15,9 +15,17 @@ A zero-dependency Node.js package for converting XML to HTML. Currently in pre-1
|
|
|
15
15
|
Version `0.1.x` is focused entirely on parsing raw XML into a structured tree of nodes. The `scaffold` function walks an XML string and produces an array of `XmlNode` objects, each carrying its role, its raw source text, and its position in the document, both globally across the full document and locally within its parent.
|
|
16
16
|
|
|
17
17
|
```ts
|
|
18
|
+
interface XmlAttribute {
|
|
19
|
+
name: string;
|
|
20
|
+
value: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
18
23
|
interface XmlNode {
|
|
19
24
|
role: XmlNodeRole;
|
|
20
25
|
raw: string;
|
|
26
|
+
xmlTag?: string;
|
|
27
|
+
xmlInner?: string;
|
|
28
|
+
xmlAttributes?: XmlAttribute[];
|
|
21
29
|
globalIndex: number;
|
|
22
30
|
localIndex: number;
|
|
23
31
|
children?: XmlNode[];
|
|
@@ -44,6 +52,7 @@ This scaffold is the foundation everything else will be built on. No transformat
|
|
|
44
52
|
>
|
|
45
53
|
> - **`scaffold(xml)`** reads any XML string and returns a nested node tree
|
|
46
54
|
> - Every node knows its `role`, its `raw` source string, its `globalIndex` in the document, and its `localIndex` within its parent
|
|
55
|
+
> - Tag nodes (`openTag`, `selfTag`) also carry `xmlTag`, `xmlInner`, and `xmlAttributes` — the parsed tag name, raw attribute string, and structured attribute array
|
|
47
56
|
> - Broken XML is never thrown - malformed nodes are flagged with `malformed: true` in place and the tree is built regardless
|
|
48
57
|
>
|
|
49
58
|
> `v1.0.0` is when this package becomes what it says it is: a full XML-to-HTML converter. Everything before that is the work to get there.
|
|
@@ -92,12 +101,18 @@ const tree = scaffold(`
|
|
|
92
101
|
{
|
|
93
102
|
"role": "openTag",
|
|
94
103
|
"raw": "<book category=\"cooking\">",
|
|
104
|
+
"xmlTag": "book",
|
|
105
|
+
"xmlInner": "category=\"cooking\"",
|
|
106
|
+
"xmlAttributes": [{ "name": "category", "value": "cooking" }],
|
|
95
107
|
"globalIndex": 2,
|
|
96
108
|
"localIndex": 0,
|
|
97
109
|
"children": [
|
|
98
110
|
{
|
|
99
111
|
"role": "openTag",
|
|
100
112
|
"raw": "<title lang=\"en\">",
|
|
113
|
+
"xmlTag": "title",
|
|
114
|
+
"xmlInner": "lang=\"en\"",
|
|
115
|
+
"xmlAttributes": [{ "name": "lang", "value": "en" }],
|
|
101
116
|
"globalIndex": 3,
|
|
102
117
|
"localIndex": 0,
|
|
103
118
|
"children": [
|
|
@@ -122,14 +137,17 @@ const tree = scaffold(`
|
|
|
122
137
|
|
|
123
138
|
Every node in the tree has the following fields:
|
|
124
139
|
|
|
125
|
-
| Field
|
|
126
|
-
|
|
|
127
|
-
| `role`
|
|
128
|
-
| `raw`
|
|
129
|
-
| `
|
|
130
|
-
| `
|
|
131
|
-
| `
|
|
132
|
-
| `
|
|
140
|
+
| Field | Type | Description |
|
|
141
|
+
| --------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
|
|
142
|
+
| `role` | `XmlNodeRole` | What kind of node this is |
|
|
143
|
+
| `raw` | `string` | The exact source string, untouched |
|
|
144
|
+
| `xmlTag` | `string` | Tag name only, e.g. `"book"` or `"env:Envelope"`. Present on `openTag`, `selfTag`, `closeTag` |
|
|
145
|
+
| `xmlInner` | `string` | Everything after the tag name inside the brackets, verbatim. Present on `openTag` and `selfTag` when attributes exist |
|
|
146
|
+
| `xmlAttributes` | `XmlAttribute[]` | Parsed array of `{ name, value }` attribute objects. Present on `openTag` and `selfTag` when attributes exist |
|
|
147
|
+
| `globalIndex` | `number` | Position in the entire document (never resets) |
|
|
148
|
+
| `localIndex` | `number` | Position within the parent's children array |
|
|
149
|
+
| `children` | `XmlNode[]` | Present only on `openTag` - the nested nodes inside |
|
|
150
|
+
| `malformed` | `true` | Present only when the structure is broken |
|
|
133
151
|
|
|
134
152
|
---
|
|
135
153
|
|
|
@@ -151,11 +169,12 @@ Every node in the tree has the following fields:
|
|
|
151
169
|
|
|
152
170
|
`scaffold` never throws. No matter what the input looks like, it always returns a complete tree. Malformed structures are flagged with `malformed: true` in place and the walk continues.
|
|
153
171
|
|
|
154
|
-
|
|
172
|
+
Four cases are handled:
|
|
155
173
|
|
|
156
174
|
- **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
|
|
157
175
|
- **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
|
|
158
176
|
- **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
|
|
177
|
+
- **Excessive nesting** - documents nested beyond 500 levels have the deepest open tag flagged `malformed: true` to prevent a stack overflow
|
|
159
178
|
|
|
160
179
|
```js
|
|
161
180
|
const tree = scaffold("<root><unclosed><valid>text</valid></root>");
|
|
@@ -207,6 +226,7 @@ import { scaffold, isMalformed } from "xml-to-html-converter";
|
|
|
207
226
|
import type {
|
|
208
227
|
XmlNode,
|
|
209
228
|
XmlNodeRole,
|
|
229
|
+
XmlAttribute,
|
|
210
230
|
MalformedXmlNode,
|
|
211
231
|
} from "xml-to-html-converter";
|
|
212
232
|
```
|
|
@@ -217,6 +237,7 @@ import type {
|
|
|
217
237
|
| `isMalformed` | function | Type guard, narrows `XmlNode` to `MalformedXmlNode` |
|
|
218
238
|
| `XmlNode` | type | The shape of every node in the tree |
|
|
219
239
|
| `XmlNodeRole` | type | Union of all valid role strings |
|
|
240
|
+
| `XmlAttribute` | type | Shape of a parsed attribute `{ name, value }` |
|
|
220
241
|
| `MalformedXmlNode` | type | `XmlNode` narrowed to `{ malformed: true }` |
|
|
221
242
|
|
|
222
243
|
---
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,14 @@
|
|
|
1
|
+
interface XmlAttribute {
|
|
2
|
+
name: string;
|
|
3
|
+
value: string;
|
|
4
|
+
}
|
|
1
5
|
type XmlNodeRole = "closeTag" | "comment" | "doctype" | "openTag" | "processingInstruction" | "selfTag" | "textLeaf";
|
|
2
6
|
interface XmlNode {
|
|
3
7
|
role: XmlNodeRole;
|
|
4
8
|
raw: string;
|
|
9
|
+
xmlTag?: string;
|
|
10
|
+
xmlInner?: string;
|
|
11
|
+
xmlAttributes?: XmlAttribute[];
|
|
5
12
|
globalIndex: number;
|
|
6
13
|
localIndex: number;
|
|
7
14
|
children?: XmlNode[];
|
|
@@ -14,4 +21,4 @@ declare function isMalformed(node: XmlNode): node is MalformedXmlNode;
|
|
|
14
21
|
|
|
15
22
|
declare function scaffold(xml: string): XmlNode[];
|
|
16
23
|
|
|
17
|
-
export { type MalformedXmlNode, type XmlNode, type XmlNodeRole, isMalformed, scaffold };
|
|
24
|
+
export { type MalformedXmlNode, type XmlAttribute, type XmlNode, type XmlNodeRole, isMalformed, scaffold };
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,38 @@
|
|
|
1
1
|
// src/modules/scaffold/scaffold.ts
|
|
2
|
+
function parseXmlAttributes(xmlInner) {
|
|
3
|
+
const attributes = [];
|
|
4
|
+
let i = 0;
|
|
5
|
+
const s = xmlInner.trim();
|
|
6
|
+
while (i < s.length) {
|
|
7
|
+
while (i < s.length && /\s/.test(s[i])) i++;
|
|
8
|
+
if (i >= s.length) break;
|
|
9
|
+
const nameStart = i;
|
|
10
|
+
while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
|
|
11
|
+
const name = s.slice(nameStart, i).trim();
|
|
12
|
+
if (!name) break;
|
|
13
|
+
while (i < s.length && /\s/.test(s[i])) i++;
|
|
14
|
+
if (s[i] !== "=") break;
|
|
15
|
+
i++;
|
|
16
|
+
while (i < s.length && /\s/.test(s[i])) i++;
|
|
17
|
+
const quote = s[i];
|
|
18
|
+
if (quote !== '"' && quote !== "'") break;
|
|
19
|
+
i++;
|
|
20
|
+
const valueStart = i;
|
|
21
|
+
while (i < s.length && s[i] !== quote) i++;
|
|
22
|
+
const value = s.slice(valueStart, i);
|
|
23
|
+
i++;
|
|
24
|
+
attributes.push({ name, value });
|
|
25
|
+
}
|
|
26
|
+
return attributes.length > 0 ? attributes : void 0;
|
|
27
|
+
}
|
|
28
|
+
var MAX_DEPTH = 500;
|
|
2
29
|
function scaffold(xml) {
|
|
3
30
|
const counter = { value: 0 };
|
|
4
|
-
const { xmlNodes } = collectXmlNodes(xml, 0, null, counter);
|
|
31
|
+
const { xmlNodes } = collectXmlNodes(xml, 0, null, counter, 0);
|
|
5
32
|
return xmlNodes;
|
|
6
33
|
}
|
|
7
|
-
function collectXmlNodes(xml, position, parentTag, counter) {
|
|
34
|
+
function collectXmlNodes(xml, position, parentTag, counter, depth) {
|
|
35
|
+
if (depth > MAX_DEPTH) return { xmlNodes: [], position, closed: false };
|
|
8
36
|
const xmlNodes = [];
|
|
9
37
|
while (position < xml.length) {
|
|
10
38
|
const xmlNodeData = extractXmlNodes(xml, position);
|
|
@@ -18,6 +46,8 @@ function collectXmlNodes(xml, position, parentTag, counter) {
|
|
|
18
46
|
xmlNodes.push({
|
|
19
47
|
role: "closeTag",
|
|
20
48
|
raw: xmlNodeData.raw,
|
|
49
|
+
xmlTag: xmlNodeData.tag || void 0,
|
|
50
|
+
xmlInner: xmlNodeData.xmlInner,
|
|
21
51
|
globalIndex: counter.value++,
|
|
22
52
|
localIndex: xmlNodes.length,
|
|
23
53
|
malformed: true
|
|
@@ -32,11 +62,15 @@ function collectXmlNodes(xml, position, parentTag, counter) {
|
|
|
32
62
|
xml,
|
|
33
63
|
xmlNodeData.end,
|
|
34
64
|
xmlNodeData.tag,
|
|
35
|
-
counter
|
|
65
|
+
counter,
|
|
66
|
+
depth + 1
|
|
36
67
|
);
|
|
37
68
|
const xmlNode2 = {
|
|
38
69
|
role: "openTag",
|
|
39
70
|
raw: xmlNodeData.raw,
|
|
71
|
+
xmlTag: xmlNodeData.tag || void 0,
|
|
72
|
+
xmlInner: xmlNodeData.xmlInner,
|
|
73
|
+
xmlAttributes: xmlNodeData.xmlAttributes,
|
|
40
74
|
globalIndex,
|
|
41
75
|
localIndex,
|
|
42
76
|
children: nested.xmlNodes
|
|
@@ -49,6 +83,9 @@ function collectXmlNodes(xml, position, parentTag, counter) {
|
|
|
49
83
|
const xmlNode = {
|
|
50
84
|
role: xmlNodeData.role,
|
|
51
85
|
raw: xmlNodeData.raw,
|
|
86
|
+
xmlTag: xmlNodeData.tag || void 0,
|
|
87
|
+
xmlInner: xmlNodeData.xmlInner,
|
|
88
|
+
xmlAttributes: xmlNodeData.xmlAttributes,
|
|
52
89
|
globalIndex: counter.value++,
|
|
53
90
|
localIndex: xmlNodes.length
|
|
54
91
|
};
|
|
@@ -141,14 +178,20 @@ function extractXmlNodes(xml, position) {
|
|
|
141
178
|
const inner = xml.slice(position + 1, closeAt).trim();
|
|
142
179
|
if (inner.startsWith("/")) {
|
|
143
180
|
const tag2 = inner.slice(1).trim().split(/\s/)[0] ?? "";
|
|
144
|
-
|
|
181
|
+
const xmlInner2 = inner.slice(1).trim().slice(tag2.length).trim() || void 0;
|
|
182
|
+
return { raw, role: "closeTag", tag: tag2, xmlInner: xmlInner2, end };
|
|
145
183
|
}
|
|
146
184
|
if (inner.endsWith("/")) {
|
|
147
|
-
const
|
|
148
|
-
|
|
185
|
+
const trimmed = inner.slice(0, -1).trim();
|
|
186
|
+
const tag2 = trimmed.split(/\s/)[0] ?? "";
|
|
187
|
+
const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
|
|
188
|
+
const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
|
|
189
|
+
return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
|
|
149
190
|
}
|
|
150
191
|
const tag = inner.split(/\s/)[0] ?? "";
|
|
151
|
-
|
|
192
|
+
const xmlInner = inner.slice(tag.length).trim() || void 0;
|
|
193
|
+
const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
|
|
194
|
+
return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
|
|
152
195
|
}
|
|
153
196
|
|
|
154
197
|
// src/modules/scaffold/types.ts
|