xml-to-html-converter 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -15,9 +15,17 @@ A zero-dependency Node.js package for converting XML to HTML. Currently in pre-1
15
15
  Version `0.1.x` is focused entirely on parsing raw XML into a structured tree of nodes. The `scaffold` function walks an XML string and produces an array of `XmlNode` objects, each carrying its role, its raw source text, and its position in the document, both globally across the full document and locally within its parent.
16
16
 
17
17
  ```ts
18
+ interface XmlAttribute {
19
+ name: string;
20
+ value: string;
21
+ }
22
+
18
23
  interface XmlNode {
19
24
  role: XmlNodeRole;
20
25
  raw: string;
26
+ xmlTag?: string;
27
+ xmlInner?: string;
28
+ xmlAttributes?: XmlAttribute[];
21
29
  globalIndex: number;
22
30
  localIndex: number;
23
31
  children?: XmlNode[];
@@ -44,6 +52,7 @@ This scaffold is the foundation everything else will be built on. No transformat
44
52
  >
45
53
  > - **`scaffold(xml)`** reads any XML string and returns a nested node tree
46
54
  > - Every node knows its `role`, its `raw` source string, its `globalIndex` in the document, and its `localIndex` within its parent
55
+ > - Tag nodes (`openTag`, `selfTag`) also carry `xmlTag`, `xmlInner`, and `xmlAttributes` — the parsed tag name, raw attribute string, and structured attribute array
47
56
  > - Broken XML is never thrown - malformed nodes are flagged with `malformed: true` in place and the tree is built regardless
48
57
  >
49
58
  > `v1.0.0` is when this package becomes what it says it is: a full XML-to-HTML converter. Everything before that is the work to get there.
@@ -92,12 +101,18 @@ const tree = scaffold(`
92
101
  {
93
102
  "role": "openTag",
94
103
  "raw": "<book category=\"cooking\">",
104
+ "xmlTag": "book",
105
+ "xmlInner": "category=\"cooking\"",
106
+ "xmlAttributes": [{ "name": "category", "value": "cooking" }],
95
107
  "globalIndex": 2,
96
108
  "localIndex": 0,
97
109
  "children": [
98
110
  {
99
111
  "role": "openTag",
100
112
  "raw": "<title lang=\"en\">",
113
+ "xmlTag": "title",
114
+ "xmlInner": "lang=\"en\"",
115
+ "xmlAttributes": [{ "name": "lang", "value": "en" }],
101
116
  "globalIndex": 3,
102
117
  "localIndex": 0,
103
118
  "children": [
@@ -122,14 +137,17 @@ const tree = scaffold(`
122
137
 
123
138
  Every node in the tree has the following fields:
124
139
 
125
- | Field | Type | Description |
126
- | ------------- | ------------- | --------------------------------------------------- |
127
- | `role` | `XmlNodeRole` | What kind of node this is |
128
- | `raw` | `string` | The exact source string, untouched |
129
- | `globalIndex` | `number` | Position in the entire document (never resets) |
130
- | `localIndex` | `number` | Position within the parent's children array |
131
- | `children` | `XmlNode[]` | Present only on `openTag` - the nested nodes inside |
132
- | `malformed` | `true` | Present only when the structure is broken |
140
+ | Field | Type | Description |
141
+ | --------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------- |
142
+ | `role` | `XmlNodeRole` | What kind of node this is |
143
+ | `raw` | `string` | The exact source string, untouched |
144
+ | `xmlTag` | `string` | Tag name only, e.g. `"book"` or `"env:Envelope"`. Present on `openTag`, `selfTag`, `closeTag` |
145
+ | `xmlInner` | `string` | Everything after the tag name inside the brackets, verbatim. Present on `openTag` and `selfTag` when attributes exist |
146
+ | `xmlAttributes` | `XmlAttribute[]` | Parsed array of `{ name, value }` attribute objects. Present on `openTag` and `selfTag` when attributes exist |
147
+ | `globalIndex` | `number` | Position in the entire document (never resets) |
148
+ | `localIndex` | `number` | Position within the parent's children array |
149
+ | `children` | `XmlNode[]` | Present only on `openTag` - the nested nodes inside |
150
+ | `malformed` | `true` | Present only when the structure is broken |
133
151
 
134
152
  ---
135
153
 
@@ -151,11 +169,12 @@ Every node in the tree has the following fields:
151
169
 
152
170
  `scaffold` never throws. No matter what the input looks like, it always returns a complete tree. Malformed structures are flagged with `malformed: true` in place and the walk continues.
153
171
 
154
- Three cases are handled:
172
+ Four cases are handled:
155
173
 
156
174
  - **Unclosed tags** - opens but never closes, gets `malformed: true`, children are still collected
157
175
  - **Stray closing tags** - a `</tag>` with no matching open surfaces as a `closeTag` token with `malformed: true`
158
176
  - **Unclosed brackets** - a `<` with no matching `>` captures the remainder as a malformed token
177
+ - **Excessive nesting** - documents nested beyond 500 levels have the deepest open tag flagged `malformed: true` to prevent a stack overflow
159
178
 
160
179
  ```js
161
180
  const tree = scaffold("<root><unclosed><valid>text</valid></root>");
@@ -207,6 +226,7 @@ import { scaffold, isMalformed } from "xml-to-html-converter";
207
226
  import type {
208
227
  XmlNode,
209
228
  XmlNodeRole,
229
+ XmlAttribute,
210
230
  MalformedXmlNode,
211
231
  } from "xml-to-html-converter";
212
232
  ```
@@ -217,6 +237,7 @@ import type {
217
237
  | `isMalformed` | function | Type guard, narrows `XmlNode` to `MalformedXmlNode` |
218
238
  | `XmlNode` | type | The shape of every node in the tree |
219
239
  | `XmlNodeRole` | type | Union of all valid role strings |
240
+ | `XmlAttribute` | type | Shape of a parsed attribute `{ name, value }` |
220
241
  | `MalformedXmlNode` | type | `XmlNode` narrowed to `{ malformed: true }` |
221
242
 
222
243
  ---
package/dist/index.d.ts CHANGED
@@ -1,7 +1,14 @@
1
+ interface XmlAttribute {
2
+ name: string;
3
+ value: string;
4
+ }
1
5
  type XmlNodeRole = "closeTag" | "comment" | "doctype" | "openTag" | "processingInstruction" | "selfTag" | "textLeaf";
2
6
  interface XmlNode {
3
7
  role: XmlNodeRole;
4
8
  raw: string;
9
+ xmlTag?: string;
10
+ xmlInner?: string;
11
+ xmlAttributes?: XmlAttribute[];
5
12
  globalIndex: number;
6
13
  localIndex: number;
7
14
  children?: XmlNode[];
@@ -14,4 +21,4 @@ declare function isMalformed(node: XmlNode): node is MalformedXmlNode;
14
21
 
15
22
  declare function scaffold(xml: string): XmlNode[];
16
23
 
17
- export { type MalformedXmlNode, type XmlNode, type XmlNodeRole, isMalformed, scaffold };
24
+ export { type MalformedXmlNode, type XmlAttribute, type XmlNode, type XmlNodeRole, isMalformed, scaffold };
package/dist/index.js CHANGED
@@ -1,10 +1,38 @@
1
1
  // src/modules/scaffold/scaffold.ts
2
+ function parseXmlAttributes(xmlInner) {
3
+ const attributes = [];
4
+ let i = 0;
5
+ const s = xmlInner.trim();
6
+ while (i < s.length) {
7
+ while (i < s.length && /\s/.test(s[i])) i++;
8
+ if (i >= s.length) break;
9
+ const nameStart = i;
10
+ while (i < s.length && s[i] !== "=" && !/\s/.test(s[i])) i++;
11
+ const name = s.slice(nameStart, i).trim();
12
+ if (!name) break;
13
+ while (i < s.length && /\s/.test(s[i])) i++;
14
+ if (s[i] !== "=") break;
15
+ i++;
16
+ while (i < s.length && /\s/.test(s[i])) i++;
17
+ const quote = s[i];
18
+ if (quote !== '"' && quote !== "'") break;
19
+ i++;
20
+ const valueStart = i;
21
+ while (i < s.length && s[i] !== quote) i++;
22
+ const value = s.slice(valueStart, i);
23
+ i++;
24
+ attributes.push({ name, value });
25
+ }
26
+ return attributes.length > 0 ? attributes : void 0;
27
+ }
28
+ var MAX_DEPTH = 500;
2
29
  function scaffold(xml) {
3
30
  const counter = { value: 0 };
4
- const { xmlNodes } = collectXmlNodes(xml, 0, null, counter);
31
+ const { xmlNodes } = collectXmlNodes(xml, 0, null, counter, 0);
5
32
  return xmlNodes;
6
33
  }
7
- function collectXmlNodes(xml, position, parentTag, counter) {
34
+ function collectXmlNodes(xml, position, parentTag, counter, depth) {
35
+ if (depth > MAX_DEPTH) return { xmlNodes: [], position, closed: false };
8
36
  const xmlNodes = [];
9
37
  while (position < xml.length) {
10
38
  const xmlNodeData = extractXmlNodes(xml, position);
@@ -18,6 +46,8 @@ function collectXmlNodes(xml, position, parentTag, counter) {
18
46
  xmlNodes.push({
19
47
  role: "closeTag",
20
48
  raw: xmlNodeData.raw,
49
+ xmlTag: xmlNodeData.tag || void 0,
50
+ xmlInner: xmlNodeData.xmlInner,
21
51
  globalIndex: counter.value++,
22
52
  localIndex: xmlNodes.length,
23
53
  malformed: true
@@ -32,11 +62,15 @@ function collectXmlNodes(xml, position, parentTag, counter) {
32
62
  xml,
33
63
  xmlNodeData.end,
34
64
  xmlNodeData.tag,
35
- counter
65
+ counter,
66
+ depth + 1
36
67
  );
37
68
  const xmlNode2 = {
38
69
  role: "openTag",
39
70
  raw: xmlNodeData.raw,
71
+ xmlTag: xmlNodeData.tag || void 0,
72
+ xmlInner: xmlNodeData.xmlInner,
73
+ xmlAttributes: xmlNodeData.xmlAttributes,
40
74
  globalIndex,
41
75
  localIndex,
42
76
  children: nested.xmlNodes
@@ -49,6 +83,9 @@ function collectXmlNodes(xml, position, parentTag, counter) {
49
83
  const xmlNode = {
50
84
  role: xmlNodeData.role,
51
85
  raw: xmlNodeData.raw,
86
+ xmlTag: xmlNodeData.tag || void 0,
87
+ xmlInner: xmlNodeData.xmlInner,
88
+ xmlAttributes: xmlNodeData.xmlAttributes,
52
89
  globalIndex: counter.value++,
53
90
  localIndex: xmlNodes.length
54
91
  };
@@ -141,14 +178,20 @@ function extractXmlNodes(xml, position) {
141
178
  const inner = xml.slice(position + 1, closeAt).trim();
142
179
  if (inner.startsWith("/")) {
143
180
  const tag2 = inner.slice(1).trim().split(/\s/)[0] ?? "";
144
- return { raw, role: "closeTag", tag: tag2, end };
181
+ const xmlInner2 = inner.slice(1).trim().slice(tag2.length).trim() || void 0;
182
+ return { raw, role: "closeTag", tag: tag2, xmlInner: xmlInner2, end };
145
183
  }
146
184
  if (inner.endsWith("/")) {
147
- const tag2 = inner.slice(0, -1).trim().split(/\s/)[0] ?? "";
148
- return { raw, role: "selfTag", tag: tag2, end };
185
+ const trimmed = inner.slice(0, -1).trim();
186
+ const tag2 = trimmed.split(/\s/)[0] ?? "";
187
+ const xmlInner2 = trimmed.slice(tag2.length).trim() || void 0;
188
+ const xmlAttributes2 = xmlInner2 ? parseXmlAttributes(xmlInner2) : void 0;
189
+ return { raw, role: "selfTag", tag: tag2, xmlInner: xmlInner2, xmlAttributes: xmlAttributes2, end };
149
190
  }
150
191
  const tag = inner.split(/\s/)[0] ?? "";
151
- return { raw, role: "openTag", tag, end };
192
+ const xmlInner = inner.slice(tag.length).trim() || void 0;
193
+ const xmlAttributes = xmlInner ? parseXmlAttributes(xmlInner) : void 0;
194
+ return { raw, role: "openTag", tag, xmlInner, xmlAttributes, end };
152
195
  }
153
196
 
154
197
  // src/modules/scaffold/types.ts
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xml-to-html-converter",
3
- "version": "0.1.7",
3
+ "version": "0.1.8",
4
4
  "description": "Zero dependency XML to HTML converter for Node environments",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",