@nodable/flexible-xml-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
File without changes
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Amit Gupta
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,284 @@
1
+ # Flexible XML Parser
2
+
3
+ A flexible, high-performance XML parser for Node.js with pluggable output builders, a composable value parser chain, and multiple input modes.
4
+
5
+ ## Features
6
+
7
+ - **Multiple input modes** — string, Buffer, Uint8Array, Node.js streams, and incremental feed/end API. More can be created easily.
8
+ - **Pluggable output builders** — swap `CompactObjBuilder` for `NodeTreeBuilder`, `OrderedKeyValueBuilder`, or your own subclass of `BaseOutputBuilder`
9
+ - **Composable value parser chain** — built-in parsers for entities, numbers, booleans, trim, and currency; custom parsers receive full context
10
+ - **Path-expression stop nodes** — capture raw content inside matched tags (e.g. `<script>`, `<style>`) without further XML parsing; configurable enclosure skipping for nested quotes and comments
11
+ - **Entity expansion control** — built-in XML entities, optional HTML entities, external/registered entities, DocType-declared entities; all with DoS-prevention limits
12
+ - **Auto-close for lenient HTML parsing** — configurable recovery from unclosed tags and mismatched close tags; collect parse errors without throwing
13
+ - **DoS protection** — configurable limits on nesting depth, attributes per tag, entity count, entity size, and total expansion length
14
+ - **Security** — prototype-pollution prevention; reserved names throw; dangerous names are sanitised by default
15
+ - **TypeScript definitions** — complete dual-mode types (`fxp.d.ts` for ESM, `fxp.d.cts` for CJS)
16
+ - **ES Modules + CommonJS** — `"type": "module"` source with a bundled CJS output
17
+
18
+ ## Installation
19
+
20
+ ```bash
21
+ npm install flexible-xml-parser
22
+ ```
23
+
24
+ ## Quick Start
25
+
26
+ ```javascript
27
+ import XMLParser from 'flexible-xml-parser';
28
+
29
+ const parser = new XMLParser();
30
+ const result = parser.parse('<root><count>3</count><active>true</active></root>');
31
+ // { root: { count: 3, active: true } }
32
+
33
+ // Enable attributes
34
+ const parser2 = new XMLParser({ skip: { attributes: false } });
35
+ parser2.parse('<item id="1">hello</item>');
36
+ // { item: { '@_id': 1, '#text': 'hello' } }
37
+ ```
38
+
39
+ ## Input modes
40
+
41
+ ```javascript
42
+ // String or Buffer
43
+ parser.parse('<root/>');
44
+ parser.parse(Buffer.from('<root/>'));
45
+
46
+ // Typed array
47
+ parser.parseBytesArr(new Uint8Array([...]));
48
+
49
+ // Node.js Readable stream — memory stays proportional to the largest token,
50
+ // not the total document size
51
+ const result = await parser.parseStream(fs.createReadStream('large.xml'));
52
+
53
+ // Incremental feed — useful for WebSocket / chunked HTTP
54
+ parser.feed('<root>');
55
+ parser.feed('<item>1</item>');
56
+ const result = parser.end();
57
+ ```
58
+
59
+ ## Options
60
+
61
+ ```javascript
62
+ new XMLParser({
63
+ // What to exclude from output
64
+ skip: {
65
+ attributes: true, // set false to parse attributes (default: true)
66
+ nsPrefix: false, // strip ns:tag → tag (default: false)
67
+ declaration: false,
68
+ pi: false,
69
+ cdata: false,
70
+ comment: false,
71
+ },
72
+
73
+ // Property names for special nodes
74
+ nameFor: {
75
+ text: '#text', // mixed-content text property
76
+ cdata: '', // '' = merge CDATA into text; '#cdata' = separate key
77
+ comment: '', // '' = omit; '#comment' = capture
78
+ },
79
+
80
+ // Attribute representation
81
+ attributes: {
82
+ prefix: '@_',
83
+ suffix: '',
84
+ groupBy: '', // group all attributes under this key; '' = inline
85
+ booleanType: false, // allow valueless attributes (treated as true)
86
+ valueParsers: ['entity', 'number', 'boolean'],
87
+ },
88
+
89
+ // Tag value options
90
+ tags: {
91
+ unpaired: [], // self-closing tags without / (e.g. ['br', 'img'])
92
+ stopNodes: [], // paths whose content is captured raw (see below)
93
+ valueParsers: ['entity', 'number', 'boolean'],
94
+ },
95
+
96
+ numberParseOptions: { hex: true, leadingZeros: true, eNotation: true },
97
+
98
+ // Entity sources and security limits
99
+ entityParseOptions: {
100
+ default: true, // built-in XML entities (lt, gt, amp, …)
101
+ html: false, // HTML named entities (&nbsp;, &copy;, …)
102
+ external: true, // entities added via parser.addEntity()
103
+ docType: false, // entities declared in DOCTYPE internal subset
104
+ maxEntityCount: 100,
105
+ maxEntitySize: 10000,
106
+ maxTotalExpansions: 1000,
107
+ maxExpandedLength: 100000,
108
+ },
109
+
110
+ // DoS prevention
111
+ limits: {
112
+ maxNestedTags: null, // max tag nesting depth
113
+ maxAttributesPerTag: null, // max attributes on a single tag
114
+ },
115
+
116
+ // Lenient HTML-mode recovery
117
+ autoClose: null, // null = strict; 'html' = recover from unclosed/mismatched tags
118
+
119
+ // Pluggable output builder (default: CompactObjBuilder)
120
+ OutputBuilder: null,
121
+ });
122
+ ```
123
+
124
+ ## Value parsers
125
+
126
+ Built-in chain names: `'entity'`, `'number'`, `'boolean'`, `'trim'`, `'currency'`.
127
+
128
+ ```javascript
129
+ // Disable entity expansion
130
+ new XMLParser({ tags: { valueParsers: ['number', 'boolean'] } });
131
+
132
+ // HTML entities + trim whitespace
133
+ new XMLParser({
134
+ tags: { valueParsers: ['entity', 'trim', 'number', 'boolean'] },
135
+ entityParseOptions: { html: true },
136
+ });
137
+
138
+ // All values as raw strings
139
+ new XMLParser({ tags: { valueParsers: [] }, attributes: { valueParsers: [] } });
140
+ ```
141
+
142
+ Custom parsers receive `(val, context)` where context carries `{ elementName, elementValue, elementType, matcher, isLeafNode }`:
143
+
144
+ ```javascript
145
+ class PriceParser {
146
+ parse(val, context) {
147
+ return context.elementName === 'price' ? parseFloat(val) : val;
148
+ }
149
+ }
150
+
151
+ new XMLParser({
152
+ tags: { valueParsers: ['entity', new PriceParser(), 'boolean'] },
153
+ });
154
+ ```
155
+
156
+ Register a reusable custom parser by name via `CompactObjBuilder`:
157
+
158
+ ```javascript
159
+ import { CompactObjBuilder } from 'flexible-xml-parser';
160
+
161
+ const builder = new CompactObjBuilder();
162
+ builder.registerValueParser('price', new PriceParser());
163
+
164
+ new XMLParser({
165
+ tags: { valueParsers: ['entity', 'price', 'boolean'] },
166
+ OutputBuilder: builder,
167
+ });
168
+ ```
169
+
170
+ ## Stop nodes
171
+
172
+ Stop nodes capture raw content without further XML parsing — useful for `<script>`, `<style>`, or embedded HTML fragments.
173
+
174
+ ```javascript
175
+ import { xmlEnclosures, quoteEnclosures } from 'flexible-xml-parser';
176
+
177
+ new XMLParser({
178
+ tags: {
179
+ stopNodes: [
180
+ '..script', // plain — first </script> ends collection
181
+ { expression: 'body..pre', skipEnclosures: [...xmlEnclosures] },
182
+ { expression: 'head..style', skipEnclosures: [...xmlEnclosures, ...quoteEnclosures] },
183
+ ],
184
+ },
185
+ onStopNode(tagDetail, rawContent, matcher) {
186
+ console.log(tagDetail.name, rawContent);
187
+ },
188
+ });
189
+ ```
190
+
191
+ `xmlEnclosures` covers XML comments and CDATA; `quoteEnclosures` covers single-quote, double-quote, and template literals.
192
+
193
+ ## Pluggable output builders
194
+
195
+ ```javascript
196
+ import XMLParser, { CompactObjBuilder, BaseOutputBuilder, ElementType } from 'flexible-xml-parser';
197
+
198
+ // CompactObjBuilder — default JS object output with extra options
199
+ const builder = new CompactObjBuilder({
200
+ alwaysArray: ['item'], // tag names or path expressions always wrapped in []
201
+ forceArray: (matcher) => ..., // function-based array forcing
202
+ forceTextNode: false, // always emit nameFor.text even for text-only tags
203
+ textJoint: '', // join string when text spans multiple text nodes
204
+ });
205
+
206
+ new XMLParser({ OutputBuilder: builder });
207
+
208
+ // Custom builder by extending BaseOutputBuilder
209
+ class MyBuilder extends BaseOutputBuilder {
210
+ addElement(tag, matcher) { /* … */ }
211
+ closeElement(matcher) { /* … */ }
212
+ addValue(text, matcher) { /* … */ }
213
+ getOutput() { return this.result; }
214
+ }
215
+ ```
216
+
217
+ ## Auto-close (lenient HTML parsing)
218
+
219
+ ```javascript
220
+ // 'html' preset: recover from unclosed tags and mismatched close tags
221
+ const parser = new XMLParser({ autoClose: 'html' });
222
+ const result = parser.parse('<div><p>text<br></div>');
223
+
224
+ const errors = parser.getParseErrors();
225
+ // [{ type: 'unclosed-eof', tag: 'p', line: 1, col: … }, …]
226
+ ```
227
+
228
+ Fine-grained control:
229
+
230
+ ```javascript
231
+ new XMLParser({
232
+ autoClose: {
233
+ onEof: 'closeAll', // 'throw' | 'closeAll'
234
+ onMismatch: 'recover', // 'throw' | 'recover' | 'discard'
235
+ collectErrors: true,
236
+ },
237
+ });
238
+ ```
239
+
240
+ ## Error handling
241
+
242
+ ```javascript
243
+ import XMLParser, { ParseError, ErrorCode } from 'flexible-xml-parser';
244
+
245
+ try {
246
+ parser.parse(xml);
247
+ } catch (e) {
248
+ if (e instanceof ParseError) {
249
+ console.error(e.code, e.line, e.col, e.message);
250
+ // e.g. 'MISMATCHED_CLOSE_TAG' 4 12 'Expected </div>, got </span>'
251
+ } else {
252
+ throw e;
253
+ }
254
+ }
255
+ ```
256
+
257
+ All error codes are available on the `ErrorCode` constant for exhaustive matching without string literals.
258
+
259
+ ## Custom entities
260
+
261
+ ```javascript
262
+ parser.addEntity('copy', '©');
263
+ parser.addEntity('trade', '™');
264
+ // requires entityParseOptions.external: true (default)
265
+ ```
266
+
267
+ ## TypeScript
268
+
269
+ ```typescript
270
+ import XMLParser, { X2jOptions, CompactObjBuilder, BaseOutputBuilder, ElementType } from 'flexible-xml-parser';
271
+
272
+ const options: X2jOptions = {
273
+ skip: { attributes: false, nsPrefix: true },
274
+ nameFor: { cdata: '#cdata' },
275
+ tags: { valueParsers: ['entity', 'trim', 'number', 'boolean'] },
276
+ limits: { maxNestedTags: 100 },
277
+ };
278
+
279
+ const parser = new XMLParser(options);
280
+ ```
281
+
282
+ ## License
283
+
284
+ MIT — [Amit Gupta](https://nodable.com)