@nodable/flexible-xml-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -0
- package/LICENSE +21 -0
- package/README.md +284 -0
- package/lib/fxp.d.cts +652 -0
- package/package.json +80 -0
- package/src/AttributeProcessor.js +107 -0
- package/src/AutoCloseHandler.js +257 -0
- package/src/CharsSymbol.js +16 -0
- package/src/DocTypeReader.js +522 -0
- package/src/InputSource/BufferSource.js +228 -0
- package/src/InputSource/FeedableSource.js +340 -0
- package/src/InputSource/StreamSource.js +49 -0
- package/src/InputSource/StringSource.js +225 -0
- package/src/OptionsBuilder.js +400 -0
- package/src/ParseError.js +91 -0
- package/src/StopNodeProcessor.js +573 -0
- package/src/XMLParser.js +293 -0
- package/src/Xml2JsParser.js +573 -0
- package/src/XmlPartReader.js +183 -0
- package/src/XmlSpecialTagsReader.js +82 -0
- package/src/fxp.d.ts +619 -0
- package/src/fxp.js +8 -0
- package/src/util.js +58 -0
package/CHANGELOG.md
ADDED
|
File without changes
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Amit Gupta
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# Flexible XML Parser
|
|
2
|
+
|
|
3
|
+
A flexible, high-performance XML parser for Node.js with pluggable output builders, a composable value parser chain, and multiple input modes.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Multiple input modes** — string, Buffer, Uint8Array, Node.js streams, and incremental feed/end API. More can be created easily.
|
|
8
|
+
- **Pluggable output builders** — swap `CompactObjBuilder` for `NodeTreeBuilder`, `OrderedKeyValueBuilder`, or your own subclass of `BaseOutputBuilder`
|
|
9
|
+
- **Composable value parser chain** — built-in parsers for entities, numbers, booleans, trim, and currency; custom parsers receive full context
|
|
10
|
+
- **Path-expression stop nodes** — capture raw content inside matched tags (e.g. `<script>`, `<style>`) without further XML parsing; configurable enclosure skipping for nested quotes and comments
|
|
11
|
+
- **Entity expansion control** — built-in XML entities, optional HTML entities, external/registered entities, DocType-declared entities; all with DoS-prevention limits
|
|
12
|
+
- **Auto-close for lenient HTML parsing** — configurable recovery from unclosed tags and mismatched close tags; collect parse errors without throwing
|
|
13
|
+
- **DoS protection** — configurable limits on nesting depth, attributes per tag, entity count, entity size, and total expansion length
|
|
14
|
+
- **Security** — prototype-pollution prevention; reserved names throw; dangerous names are sanitised by default
|
|
15
|
+
- **TypeScript definitions** — complete dual-mode types (`fxp.d.ts` for ESM, `fxp.d.cts` for CJS)
|
|
16
|
+
- **ES Modules + CommonJS** — `"type": "module"` source with a bundled CJS output
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
npm install flexible-xml-parser
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Quick Start
|
|
25
|
+
|
|
26
|
+
```javascript
|
|
27
|
+
import XMLParser from 'flexible-xml-parser';
|
|
28
|
+
|
|
29
|
+
const parser = new XMLParser();
|
|
30
|
+
const result = parser.parse('<root><count>3</count><active>true</active></root>');
|
|
31
|
+
// { root: { count: 3, active: true } }
|
|
32
|
+
|
|
33
|
+
// Enable attributes
|
|
34
|
+
const parser2 = new XMLParser({ skip: { attributes: false } });
|
|
35
|
+
parser2.parse('<item id="1">hello</item>');
|
|
36
|
+
// { item: { '@_id': 1, '#text': 'hello' } }
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Input modes
|
|
40
|
+
|
|
41
|
+
```javascript
|
|
42
|
+
// String or Buffer
|
|
43
|
+
parser.parse('<root/>');
|
|
44
|
+
parser.parse(Buffer.from('<root/>'));
|
|
45
|
+
|
|
46
|
+
// Typed array
|
|
47
|
+
parser.parseBytesArr(new Uint8Array([...]));
|
|
48
|
+
|
|
49
|
+
// Node.js Readable stream — memory stays proportional to the largest token,
|
|
50
|
+
// not the total document size
|
|
51
|
+
const result = await parser.parseStream(fs.createReadStream('large.xml'));
|
|
52
|
+
|
|
53
|
+
// Incremental feed — useful for WebSocket / chunked HTTP
|
|
54
|
+
parser.feed('<root>');
|
|
55
|
+
parser.feed('<item>1</item>');
|
|
56
|
+
const result = parser.end();
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Options
|
|
60
|
+
|
|
61
|
+
```javascript
|
|
62
|
+
new XMLParser({
|
|
63
|
+
// What to exclude from output
|
|
64
|
+
skip: {
|
|
65
|
+
attributes: true, // set false to parse attributes (default: true)
|
|
66
|
+
nsPrefix: false, // strip ns:tag → tag (default: false)
|
|
67
|
+
declaration: false,
|
|
68
|
+
pi: false,
|
|
69
|
+
cdata: false,
|
|
70
|
+
comment: false,
|
|
71
|
+
},
|
|
72
|
+
|
|
73
|
+
// Property names for special nodes
|
|
74
|
+
nameFor: {
|
|
75
|
+
text: '#text', // mixed-content text property
|
|
76
|
+
cdata: '', // '' = merge CDATA into text; '#cdata' = separate key
|
|
77
|
+
comment: '', // '' = omit; '#comment' = capture
|
|
78
|
+
},
|
|
79
|
+
|
|
80
|
+
// Attribute representation
|
|
81
|
+
attributes: {
|
|
82
|
+
prefix: '@_',
|
|
83
|
+
suffix: '',
|
|
84
|
+
groupBy: '', // group all attributes under this key; '' = inline
|
|
85
|
+
booleanType: false, // allow valueless attributes (treated as true)
|
|
86
|
+
valueParsers: ['entity', 'number', 'boolean'],
|
|
87
|
+
},
|
|
88
|
+
|
|
89
|
+
// Tag value options
|
|
90
|
+
tags: {
|
|
91
|
+
unpaired: [], // self-closing tags without / (e.g. ['br', 'img'])
|
|
92
|
+
stopNodes: [], // paths whose content is captured raw (see below)
|
|
93
|
+
valueParsers: ['entity', 'number', 'boolean'],
|
|
94
|
+
},
|
|
95
|
+
|
|
96
|
+
numberParseOptions: { hex: true, leadingZeros: true, eNotation: true },
|
|
97
|
+
|
|
98
|
+
// Entity sources and security limits
|
|
99
|
+
entityParseOptions: {
|
|
100
|
+
default: true, // built-in XML entities (lt, gt, amp, …)
|
|
101
|
+
html: false, // HTML named entities ( , ©, …)
|
|
102
|
+
external: true, // entities added via parser.addEntity()
|
|
103
|
+
docType: false, // entities declared in DOCTYPE internal subset
|
|
104
|
+
maxEntityCount: 100,
|
|
105
|
+
maxEntitySize: 10000,
|
|
106
|
+
maxTotalExpansions: 1000,
|
|
107
|
+
maxExpandedLength: 100000,
|
|
108
|
+
},
|
|
109
|
+
|
|
110
|
+
// DoS prevention
|
|
111
|
+
limits: {
|
|
112
|
+
maxNestedTags: null, // max tag nesting depth
|
|
113
|
+
maxAttributesPerTag: null, // max attributes on a single tag
|
|
114
|
+
},
|
|
115
|
+
|
|
116
|
+
// Lenient HTML-mode recovery
|
|
117
|
+
autoClose: null, // null = strict; 'html' = recover from unclosed/mismatched tags
|
|
118
|
+
|
|
119
|
+
// Pluggable output builder (default: CompactObjBuilder)
|
|
120
|
+
OutputBuilder: null,
|
|
121
|
+
});
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Value parsers
|
|
125
|
+
|
|
126
|
+
Built-in chain names: `'entity'`, `'number'`, `'boolean'`, `'trim'`, `'currency'`.
|
|
127
|
+
|
|
128
|
+
```javascript
|
|
129
|
+
// Disable entity expansion
|
|
130
|
+
new XMLParser({ tags: { valueParsers: ['number', 'boolean'] } });
|
|
131
|
+
|
|
132
|
+
// HTML entities + trim whitespace
|
|
133
|
+
new XMLParser({
|
|
134
|
+
tags: { valueParsers: ['entity', 'trim', 'number', 'boolean'] },
|
|
135
|
+
entityParseOptions: { html: true },
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// All values as raw strings
|
|
139
|
+
new XMLParser({ tags: { valueParsers: [] }, attributes: { valueParsers: [] } });
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
Custom parsers receive `(val, context)` where context carries `{ elementName, elementValue, elementType, matcher, isLeafNode }`:
|
|
143
|
+
|
|
144
|
+
```javascript
|
|
145
|
+
class PriceParser {
|
|
146
|
+
parse(val, context) {
|
|
147
|
+
return context.elementName === 'price' ? parseFloat(val) : val;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
new XMLParser({
|
|
152
|
+
tags: { valueParsers: ['entity', new PriceParser(), 'boolean'] },
|
|
153
|
+
});
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
Register a reusable custom parser by name via `CompactObjBuilder`:
|
|
157
|
+
|
|
158
|
+
```javascript
|
|
159
|
+
import { CompactObjBuilder } from 'flexible-xml-parser';
|
|
160
|
+
|
|
161
|
+
const builder = new CompactObjBuilder();
|
|
162
|
+
builder.registerValueParser('price', new PriceParser());
|
|
163
|
+
|
|
164
|
+
new XMLParser({
|
|
165
|
+
tags: { valueParsers: ['entity', 'price', 'boolean'] },
|
|
166
|
+
OutputBuilder: builder,
|
|
167
|
+
});
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Stop nodes
|
|
171
|
+
|
|
172
|
+
Stop nodes capture raw content without further XML parsing — useful for `<script>`, `<style>`, or embedded HTML fragments.
|
|
173
|
+
|
|
174
|
+
```javascript
|
|
175
|
+
import { xmlEnclosures, quoteEnclosures } from 'flexible-xml-parser';
|
|
176
|
+
|
|
177
|
+
new XMLParser({
|
|
178
|
+
tags: {
|
|
179
|
+
stopNodes: [
|
|
180
|
+
'..script', // plain — first </script> ends collection
|
|
181
|
+
{ expression: 'body..pre', skipEnclosures: [...xmlEnclosures] },
|
|
182
|
+
{ expression: 'head..style', skipEnclosures: [...xmlEnclosures, ...quoteEnclosures] },
|
|
183
|
+
],
|
|
184
|
+
},
|
|
185
|
+
onStopNode(tagDetail, rawContent, matcher) {
|
|
186
|
+
console.log(tagDetail.name, rawContent);
|
|
187
|
+
},
|
|
188
|
+
});
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
`xmlEnclosures` covers XML comments and CDATA; `quoteEnclosures` covers single-quote, double-quote, and template literals.
|
|
192
|
+
|
|
193
|
+
## Pluggable output builders
|
|
194
|
+
|
|
195
|
+
```javascript
|
|
196
|
+
import XMLParser, { CompactObjBuilder, BaseOutputBuilder, ElementType } from 'flexible-xml-parser';
|
|
197
|
+
|
|
198
|
+
// CompactObjBuilder — default JS object output with extra options
|
|
199
|
+
const builder = new CompactObjBuilder({
|
|
200
|
+
alwaysArray: ['item'], // tag names or path expressions always wrapped in []
|
|
201
|
+
forceArray: (matcher) => ..., // function-based array forcing
|
|
202
|
+
forceTextNode: false, // always emit nameFor.text even for text-only tags
|
|
203
|
+
textJoint: '', // join string when text spans multiple text nodes
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
new XMLParser({ OutputBuilder: builder });
|
|
207
|
+
|
|
208
|
+
// Custom builder by extending BaseOutputBuilder
|
|
209
|
+
class MyBuilder extends BaseOutputBuilder {
|
|
210
|
+
addElement(tag, matcher) { /* … */ }
|
|
211
|
+
closeElement(matcher) { /* … */ }
|
|
212
|
+
addValue(text, matcher) { /* … */ }
|
|
213
|
+
getOutput() { return this.result; }
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
## Auto-close (lenient HTML parsing)
|
|
218
|
+
|
|
219
|
+
```javascript
|
|
220
|
+
// 'html' preset: recover from unclosed tags and mismatched close tags
|
|
221
|
+
const parser = new XMLParser({ autoClose: 'html' });
|
|
222
|
+
const result = parser.parse('<div><p>text<br></div>');
|
|
223
|
+
|
|
224
|
+
const errors = parser.getParseErrors();
|
|
225
|
+
// [{ type: 'unclosed-eof', tag: 'p', line: 1, col: … }, …]
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Fine-grained control:
|
|
229
|
+
|
|
230
|
+
```javascript
|
|
231
|
+
new XMLParser({
|
|
232
|
+
autoClose: {
|
|
233
|
+
onEof: 'closeAll', // 'throw' | 'closeAll'
|
|
234
|
+
onMismatch: 'recover', // 'throw' | 'recover' | 'discard'
|
|
235
|
+
collectErrors: true,
|
|
236
|
+
},
|
|
237
|
+
});
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Error handling
|
|
241
|
+
|
|
242
|
+
```javascript
|
|
243
|
+
import XMLParser, { ParseError, ErrorCode } from 'flexible-xml-parser';
|
|
244
|
+
|
|
245
|
+
try {
|
|
246
|
+
parser.parse(xml);
|
|
247
|
+
} catch (e) {
|
|
248
|
+
if (e instanceof ParseError) {
|
|
249
|
+
console.error(e.code, e.line, e.col, e.message);
|
|
250
|
+
// e.g. 'MISMATCHED_CLOSE_TAG' 4 12 'Expected </div>, got </span>'
|
|
251
|
+
} else {
|
|
252
|
+
throw e;
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
All error codes are available on the `ErrorCode` constant for exhaustive matching without string literals.
|
|
258
|
+
|
|
259
|
+
## Custom entities
|
|
260
|
+
|
|
261
|
+
```javascript
|
|
262
|
+
parser.addEntity('copy', '©');
|
|
263
|
+
parser.addEntity('trade', '™');
|
|
264
|
+
// requires entityParseOptions.external: true (default)
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## TypeScript
|
|
268
|
+
|
|
269
|
+
```typescript
|
|
270
|
+
import XMLParser, { X2jOptions, CompactObjBuilder, BaseOutputBuilder, ElementType } from 'flexible-xml-parser';
|
|
271
|
+
|
|
272
|
+
const options: X2jOptions = {
|
|
273
|
+
skip: { attributes: false, nsPrefix: true },
|
|
274
|
+
nameFor: { cdata: '#cdata' },
|
|
275
|
+
tags: { valueParsers: ['entity', 'trim', 'number', 'boolean'] },
|
|
276
|
+
limits: { maxNestedTags: 100 },
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
const parser = new XMLParser(options);
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
## License
|
|
283
|
+
|
|
284
|
+
MIT — [Amit Gupta](https://nodable.com)
|