@nodable/flexible-xml-parser 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +0 -0
- package/LICENSE +21 -0
- package/README.md +284 -0
- package/lib/fxp.d.cts +652 -0
- package/package.json +80 -0
- package/src/AttributeProcessor.js +107 -0
- package/src/AutoCloseHandler.js +257 -0
- package/src/CharsSymbol.js +16 -0
- package/src/DocTypeReader.js +522 -0
- package/src/InputSource/BufferSource.js +228 -0
- package/src/InputSource/FeedableSource.js +340 -0
- package/src/InputSource/StreamSource.js +49 -0
- package/src/InputSource/StringSource.js +225 -0
- package/src/OptionsBuilder.js +400 -0
- package/src/ParseError.js +91 -0
- package/src/StopNodeProcessor.js +573 -0
- package/src/XMLParser.js +293 -0
- package/src/Xml2JsParser.js +573 -0
- package/src/XmlPartReader.js +183 -0
- package/src/XmlSpecialTagsReader.js +82 -0
- package/src/fxp.d.ts +619 -0
- package/src/fxp.js +8 -0
- package/src/util.js +58 -0
package/src/fxp.d.ts
ADDED
|
@@ -0,0 +1,619 @@
|
|
|
1
|
+
import { BaseOutputBuilderFactory } from "@nodable/base-output-builder"
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Flex XML Parser — TypeScript Definitions
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Object form of a skip-tag entry — allows per-node control of nested depth
|
|
9
|
+
* tracking and enclosure skipping when scanning for the closing tag.
|
|
10
|
+
*
|
|
11
|
+
* ```ts
|
|
12
|
+
* import { xmlEnclosures } from 'flex-xml-parser';
|
|
13
|
+
*
|
|
14
|
+
* const parser = new XMLParser({
|
|
15
|
+
* skip: {
|
|
16
|
+
* tags: [
|
|
17
|
+
* "..secret",
|
|
18
|
+
* { expression: "root.internal", nested: true, skipEnclosures: [...xmlEnclosures] },
|
|
19
|
+
* ]
|
|
20
|
+
* }
|
|
21
|
+
* });
|
|
22
|
+
* ```
|
|
23
|
+
*/
|
|
24
|
+
export interface SkipTagEntry {
|
|
25
|
+
/** Path expression (same syntax as string skip-tag entries). */
|
|
26
|
+
expression: string;
|
|
27
|
+
/**
|
|
28
|
+
* When true, nested same-name open tags are tracked and the skip ends only
|
|
29
|
+
* when the outermost closing tag is found. Default: false.
|
|
30
|
+
*/
|
|
31
|
+
nested?: boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Enclosure pairs to skip while scanning for the closing tag.
|
|
34
|
+
* Checked in array order — first open match wins.
|
|
35
|
+
* Defaults to `[]` (plain first-match, no enclosure awareness).
|
|
36
|
+
*/
|
|
37
|
+
skipEnclosures?: Enclosure[];
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export interface SkipOptions {
|
|
41
|
+
/** Skip XML declaration `<?xml ... ?>` from output. Default: false */
|
|
42
|
+
declaration?: boolean;
|
|
43
|
+
/** Skip processing instructions (other than declaration) from output. Default: false */
|
|
44
|
+
pi?: boolean;
|
|
45
|
+
/** Skip all attributes from output. Default: true */
|
|
46
|
+
attributes?: boolean;
|
|
47
|
+
/** Exclude CDATA sections entirely from output. Default: false */
|
|
48
|
+
cdata?: boolean;
|
|
49
|
+
/** Exclude comments entirely from output. Default: false */
|
|
50
|
+
comment?: boolean;
|
|
51
|
+
/**
|
|
52
|
+
* Strip namespace prefixes from tag and attribute names.
|
|
53
|
+
* E.g. `ns:tag` → `tag`, `xmlns:*` attributes are dropped.
|
|
54
|
+
* Default: false
|
|
55
|
+
*/
|
|
56
|
+
nsPrefix?: boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Tag paths whose entire subtree is silently dropped from output.
|
|
59
|
+
* The parser advances past the closing tag using the same raw-collection
|
|
60
|
+
* mechanism as stop nodes, then discards the content without calling
|
|
61
|
+
* any output builder methods.
|
|
62
|
+
*
|
|
63
|
+
* Each entry is either:
|
|
64
|
+
* - A plain string path expression — equivalent to `{ expression, nested: false, skipEnclosures: [] }`.
|
|
65
|
+
* The very first `</tagName>` ends collection.
|
|
66
|
+
* - A `SkipTagEntry` object with optional `nested` and `skipEnclosures`.
|
|
67
|
+
*
|
|
68
|
+
* Supports path-expression-matcher syntax. Default: []
|
|
69
|
+
*
|
|
70
|
+
* @example
|
|
71
|
+
* import { xmlEnclosures } from 'flex-xml-parser';
|
|
72
|
+
*
|
|
73
|
+
* skip: {
|
|
74
|
+
* tags: [
|
|
75
|
+
* "..secret",
|
|
76
|
+
* { expression: "root.internal", nested: true, skipEnclosures: [...xmlEnclosures] },
|
|
77
|
+
* ]
|
|
78
|
+
* }
|
|
79
|
+
*/
|
|
80
|
+
tags?: Array<string | SkipTagEntry>;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export interface NameForOptions {
|
|
84
|
+
/**
|
|
85
|
+
* Property name for mixed text content when a tag contains both text and child elements.
|
|
86
|
+
* Default: '#text'
|
|
87
|
+
*/
|
|
88
|
+
text?: string;
|
|
89
|
+
/**
|
|
90
|
+
* Property name for CDATA sections.
|
|
91
|
+
* Empty string (default) merges CDATA content into the tag's text value.
|
|
92
|
+
*/
|
|
93
|
+
cdata?: string;
|
|
94
|
+
/**
|
|
95
|
+
* Property name for XML comments.
|
|
96
|
+
* Empty string (default) omits comments from output.
|
|
97
|
+
* Set e.g. '#comment' to capture them.
|
|
98
|
+
*/
|
|
99
|
+
comment?: string;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export interface AttributeOptions {
|
|
103
|
+
/** Allow boolean (valueless) attributes — treated as `true`. Default: false */
|
|
104
|
+
booleanType?: boolean;
|
|
105
|
+
/** Group all attributes under this property name. Empty string = inline with tag. Default: '' */
|
|
106
|
+
groupBy?: string;
|
|
107
|
+
/** Prefix prepended to attribute names in output. Default: '@_' */
|
|
108
|
+
prefix?: string;
|
|
109
|
+
/** Suffix appended to attribute names in output. Default: '' */
|
|
110
|
+
suffix?: string;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* An open/close pair that defines a region the stop-node processor should skip
|
|
115
|
+
* when scanning for the closing tag. Anything between `open` and `close` is
|
|
116
|
+
* treated as opaque text — closing-tag detection and depth tracking are
|
|
117
|
+
* suspended until `close` is found.
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
* { open: '<!--', close: '-->' } // XML comment
|
|
121
|
+
* { open: '"', close: '"' } // double-quoted string
|
|
122
|
+
*/
|
|
123
|
+
export interface Enclosure {
|
|
124
|
+
open: string;
|
|
125
|
+
close: string;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Object form of a stop-node entry — allows per-node control of which
|
|
130
|
+
* enclosures the processor should skip when scanning for the closing tag.
|
|
131
|
+
*
|
|
132
|
+
* ```ts
|
|
133
|
+
* import { xmlEnclosures, quoteEnclosures } from 'flex-xml-parser';
|
|
134
|
+
*
|
|
135
|
+
* const parser = new XMLParser({
|
|
136
|
+
* tags: {
|
|
137
|
+
* stopNodes: [
|
|
138
|
+
* "..script", // plain — no enclosures
|
|
139
|
+
* { expression: "body..pre", skipEnclosures: [...xmlEnclosures] },
|
|
140
|
+
* { expression: "head..style", skipEnclosures: [...xmlEnclosures, ...quoteEnclosures] },
|
|
141
|
+
* ]
|
|
142
|
+
* }
|
|
143
|
+
* });
|
|
144
|
+
* ```
|
|
145
|
+
*/
|
|
146
|
+
export interface StopNodeEntry {
|
|
147
|
+
/** Path expression (same syntax as string stop-node entries). */
|
|
148
|
+
expression: string;
|
|
149
|
+
/**
|
|
150
|
+
* When true, nested same-name open tags are tracked and the stop node ends
|
|
151
|
+
* only when the outermost closing tag is found. Default: false.
|
|
152
|
+
*/
|
|
153
|
+
nested?: boolean;
|
|
154
|
+
/**
|
|
155
|
+
* Enclosure pairs to skip while scanning for the closing tag.
|
|
156
|
+
* Checked in array order — first open match wins.
|
|
157
|
+
* Defaults to `[]` (plain first-match, no depth tracking).
|
|
158
|
+
*/
|
|
159
|
+
skipEnclosures: Enclosure[];
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export interface TagOptions {
|
|
163
|
+
/** Tags that never have a closing tag (e.g. ['br', 'img', 'hr']). Default: [] */
|
|
164
|
+
unpaired?: string[];
|
|
165
|
+
/**
|
|
166
|
+
* Tag paths whose content is captured raw without further XML parsing.
|
|
167
|
+
*
|
|
168
|
+
* Each entry is either:
|
|
169
|
+
* - A plain string path expression — equivalent to `{ expression, skipEnclosures: [] }`.
|
|
170
|
+
* The very first `</tagName>` ends collection (no depth tracking, no enclosure skipping).
|
|
171
|
+
* - A `StopNodeEntry` object with an explicit `skipEnclosures` array.
|
|
172
|
+
* When `skipEnclosures` is non-empty, depth tracking is enabled and anything
|
|
173
|
+
* between an enclosure's open/close markers is skipped (so false closing tags
|
|
174
|
+
* inside comments, CDATA, string literals, etc. are ignored).
|
|
175
|
+
*
|
|
176
|
+
* Supports path-expression-matcher syntax. Default: []
|
|
177
|
+
*
|
|
178
|
+
* @example
|
|
179
|
+
* import { xmlEnclosures, quoteEnclosures } from 'flex-xml-parser';
|
|
180
|
+
*
|
|
181
|
+
* stopNodes: [
|
|
182
|
+
* "..script", // plain
|
|
183
|
+
* { expression: "body..pre", skipEnclosures: [...xmlEnclosures] },
|
|
184
|
+
* { expression: "head..style", skipEnclosures: [...xmlEnclosures, ...quoteEnclosures] },
|
|
185
|
+
* ]
|
|
186
|
+
*/
|
|
187
|
+
stopNodes?: Array<string | StopNodeEntry>;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Options for DOCTYPE reading — controls whether entities are collected
|
|
192
|
+
* and enforces read-time security limits.
|
|
193
|
+
*/
|
|
194
|
+
export interface DoctypeOptions {
|
|
195
|
+
/**
|
|
196
|
+
* Whether to collect entities declared in the DOCTYPE internal subset and
|
|
197
|
+
* forward them to the output builder for replacement.
|
|
198
|
+
* The DOCTYPE block is always read to consume it; this flag controls forwarding.
|
|
199
|
+
*/
|
|
200
|
+
enabled?: boolean;
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Max number of entities that may be declared in a DOCTYPE internal subset.
|
|
204
|
+
* Enforced by DocTypeReader at declaration time.
|
|
205
|
+
* Default: 100
|
|
206
|
+
*/
|
|
207
|
+
maxEntityCount?: number;
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Max bytes per entity definition value in DOCTYPE.
|
|
211
|
+
* Enforced by DocTypeReader at declaration time.
|
|
212
|
+
* Default: 10000
|
|
213
|
+
*/
|
|
214
|
+
maxEntitySize?: number;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ─── Error handling ────────────────────────────────────────────────────────────
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* All error codes thrown by the parser.
|
|
221
|
+
* Use with `instanceof ParseError` and `err.code === ErrorCode.XXX` for
|
|
222
|
+
* precise error handling without string-matching against messages.
|
|
223
|
+
*/
|
|
224
|
+
export declare const ErrorCode: {
|
|
225
|
+
// Input type errors
|
|
226
|
+
readonly INVALID_INPUT: 'INVALID_INPUT';
|
|
227
|
+
readonly INVALID_STREAM: 'INVALID_STREAM';
|
|
228
|
+
|
|
229
|
+
// Streaming / feed API
|
|
230
|
+
readonly ALREADY_STREAMING: 'ALREADY_STREAMING';
|
|
231
|
+
readonly NOT_STREAMING: 'NOT_STREAMING';
|
|
232
|
+
readonly DATA_MUST_BE_STRING: 'DATA_MUST_BE_STRING';
|
|
233
|
+
|
|
234
|
+
// Tag structure
|
|
235
|
+
readonly UNEXPECTED_END: 'UNEXPECTED_END';
|
|
236
|
+
readonly UNEXPECTED_CLOSE_TAG: 'UNEXPECTED_CLOSE_TAG';
|
|
237
|
+
readonly MISMATCHED_CLOSE_TAG: 'MISMATCHED_CLOSE_TAG';
|
|
238
|
+
readonly UNEXPECTED_TRAILING_DATA: 'UNEXPECTED_TRAILING_DATA';
|
|
239
|
+
readonly INVALID_TAG: 'INVALID_TAG';
|
|
240
|
+
readonly UNCLOSED_QUOTE: 'UNCLOSED_QUOTE';
|
|
241
|
+
|
|
242
|
+
// Namespace
|
|
243
|
+
readonly MULTIPLE_NAMESPACES: 'MULTIPLE_NAMESPACES';
|
|
244
|
+
|
|
245
|
+
// Security
|
|
246
|
+
readonly SECURITY_PROTOTYPE_POLLUTION: 'SECURITY_PROTOTYPE_POLLUTION';
|
|
247
|
+
readonly SECURITY_RESERVED_OPTION: 'SECURITY_RESERVED_OPTION';
|
|
248
|
+
readonly SECURITY_RESTRICTED_NAME: 'SECURITY_RESTRICTED_NAME';
|
|
249
|
+
|
|
250
|
+
// Limits (DoS prevention)
|
|
251
|
+
readonly LIMIT_MAX_NESTED_TAGS: 'LIMIT_MAX_NESTED_TAGS';
|
|
252
|
+
readonly LIMIT_MAX_ATTRIBUTES: 'LIMIT_MAX_ATTRIBUTES';
|
|
253
|
+
|
|
254
|
+
// Entity limits
|
|
255
|
+
readonly ENTITY_MAX_COUNT: 'ENTITY_MAX_COUNT';
|
|
256
|
+
readonly ENTITY_MAX_SIZE: 'ENTITY_MAX_SIZE';
|
|
257
|
+
readonly ENTITY_MAX_EXPANSIONS: 'ENTITY_MAX_EXPANSIONS';
|
|
258
|
+
readonly ENTITY_MAX_EXPANDED_LENGTH: 'ENTITY_MAX_EXPANDED_LENGTH';
|
|
259
|
+
|
|
260
|
+
// Entity registration
|
|
261
|
+
readonly ENTITY_INVALID_KEY: 'ENTITY_INVALID_KEY';
|
|
262
|
+
readonly ENTITY_INVALID_VALUE: 'ENTITY_INVALID_VALUE';
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
export type ErrorCodeValue = typeof ErrorCode[keyof typeof ErrorCode];
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Structured error class thrown by all parser error paths.
|
|
269
|
+
*
|
|
270
|
+
* Always catch with `instanceof ParseError` to distinguish library errors
|
|
271
|
+
* from unexpected runtime errors:
|
|
272
|
+
*
|
|
273
|
+
* ```ts
|
|
274
|
+
* try {
|
|
275
|
+
* parser.parse(xml);
|
|
276
|
+
* } catch (e) {
|
|
277
|
+
* if (e instanceof ParseError) {
|
|
278
|
+
* console.error(e.code, e.line, e.col, e.message);
|
|
279
|
+
* } else {
|
|
280
|
+
* throw e; // unexpected runtime error
|
|
281
|
+
* }
|
|
282
|
+
* }
|
|
283
|
+
* ```
|
|
284
|
+
*/
|
|
285
|
+
export declare class ParseError extends Error {
|
|
286
|
+
readonly name: 'ParseError';
|
|
287
|
+
|
|
288
|
+
/** Machine-readable error code. Always one of the `ErrorCode` values. */
|
|
289
|
+
readonly code: ErrorCodeValue;
|
|
290
|
+
|
|
291
|
+
/**
|
|
292
|
+
* 1-based line number where the error occurred.
|
|
293
|
+
* `undefined` when position information is not available for this error type.
|
|
294
|
+
*/
|
|
295
|
+
readonly line: number | undefined;
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* 1-based column where the error occurred.
|
|
299
|
+
* `undefined` when position information is not available for this error type.
|
|
300
|
+
*/
|
|
301
|
+
readonly col: number | undefined;
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* 0-based character offset from the start of the document.
|
|
305
|
+
* `undefined` when position information is not available for this error type.
|
|
306
|
+
*/
|
|
307
|
+
readonly index: number | undefined;
|
|
308
|
+
|
|
309
|
+
constructor(
|
|
310
|
+
message: string,
|
|
311
|
+
code: ErrorCodeValue,
|
|
312
|
+
position?: { line?: number; col?: number; index?: number }
|
|
313
|
+
);
|
|
314
|
+
|
|
315
|
+
/** Returns a formatted string: `ParseError [CODE] at line N, col M: message` */
|
|
316
|
+
toString(): string;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ─── Limits ────────────────────────────────────────────────────────────────────
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Structural limits that guard against resource-exhaustion and DoS attacks.
|
|
323
|
+
* All properties default to `null` (no limit enforced).
|
|
324
|
+
*
|
|
325
|
+
* Errors thrown when limits are exceeded are always `ParseError` instances
|
|
326
|
+
* with codes `LIMIT_MAX_NESTED_TAGS` or `LIMIT_MAX_ATTRIBUTES` respectively,
|
|
327
|
+
* and carry `line`, `col`, and `index` position information.
|
|
328
|
+
*/
|
|
329
|
+
export interface LimitsOptions {
|
|
330
|
+
/**
|
|
331
|
+
* Maximum tag nesting depth.
|
|
332
|
+
*
|
|
333
|
+
* Throws `ParseError` with code `LIMIT_MAX_NESTED_TAGS` when a tag would
|
|
334
|
+
* open at a depth greater than this value.
|
|
335
|
+
*
|
|
336
|
+
* Prevents stack-overflow attacks via pathologically deep XML such as
|
|
337
|
+
* `<a><a><a>...</a></a></a>` (1 million levels deep).
|
|
338
|
+
*
|
|
339
|
+
* Must be a positive integer (`>= 1`) or `null`.
|
|
340
|
+
* Default: `null` (unlimited)
|
|
341
|
+
*
|
|
342
|
+
* @example
|
|
343
|
+
* // Reject XML deeper than 100 tags
|
|
344
|
+
* new XMLParser({ limits: { maxNestedTags: 100 } });
|
|
345
|
+
*/
|
|
346
|
+
maxNestedTags?: number | null;
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Maximum number of attributes allowed on a single tag.
|
|
350
|
+
*
|
|
351
|
+
* Throws `ParseError` with code `LIMIT_MAX_ATTRIBUTES` when a tag has
|
|
352
|
+
* more attributes than this value. Only enforced when `skip.attributes`
|
|
353
|
+
* is `false` (attributes are being parsed).
|
|
354
|
+
*
|
|
355
|
+
* Prevents attacks that use thousands of attributes to exhaust memory or
|
|
356
|
+
* CPU during attribute parsing.
|
|
357
|
+
*
|
|
358
|
+
* Must be a non-negative integer (`>= 0`) or `null`.
|
|
359
|
+
* `0` means no attributes are permitted on any tag.
|
|
360
|
+
* Default: `null` (unlimited)
|
|
361
|
+
*
|
|
362
|
+
* @example
|
|
363
|
+
* // Reject any tag with more than 50 attributes
|
|
364
|
+
* new XMLParser({ skip: { attributes: false }, limits: { maxAttributesPerTag: 50 } });
|
|
365
|
+
*/
|
|
366
|
+
maxAttributesPerTag?: number | null;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Buffer options for the feed()/end() and parseStream() input APIs.
|
|
371
|
+
* Passed as `feedable` inside XMLParser options.
|
|
372
|
+
*/
|
|
373
|
+
export interface FeedableOptions {
|
|
374
|
+
/**
|
|
375
|
+
* Maximum number of characters allowed in the buffer at any one time.
|
|
376
|
+
* Prevents memory exhaustion when data is fed faster than it is consumed.
|
|
377
|
+
* Default: 10485760 (10 MB)
|
|
378
|
+
*/
|
|
379
|
+
maxBufferSize?: number;
|
|
380
|
+
|
|
381
|
+
/**
|
|
382
|
+
* When true (default), already-processed characters are automatically
|
|
383
|
+
* discarded from the buffer once the processed portion exceeds
|
|
384
|
+
* flushThreshold. Keeps memory usage flat for large documents.
|
|
385
|
+
* Default: true
|
|
386
|
+
*/
|
|
387
|
+
autoFlush?: boolean;
|
|
388
|
+
|
|
389
|
+
/**
|
|
390
|
+
* Number of processed characters that triggers an automatic flush.
|
|
391
|
+
* Lower values free memory sooner at the cost of more string-slice
|
|
392
|
+
* operations. Default: 1024 (1 KB)
|
|
393
|
+
*/
|
|
394
|
+
flushThreshold?: number;
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
export interface X2jOptions {
|
|
398
|
+
// --- node-type controls ---
|
|
399
|
+
/** Fine-grained control over which node types appear in output */
|
|
400
|
+
skip?: SkipOptions;
|
|
401
|
+
|
|
402
|
+
// --- property name mapping ---
|
|
403
|
+
/** Property names used for special nodes in output */
|
|
404
|
+
nameFor?: NameForOptions;
|
|
405
|
+
|
|
406
|
+
// --- attribute controls ---
|
|
407
|
+
/** Attribute parsing and representation options */
|
|
408
|
+
attributes?: AttributeOptions;
|
|
409
|
+
|
|
410
|
+
// --- tag controls ---
|
|
411
|
+
/** Tag parsing options including stop nodes and value parser chain */
|
|
412
|
+
tags?: TagOptions;
|
|
413
|
+
|
|
414
|
+
// --- DOCTYPE parsing ---
|
|
415
|
+
/**
|
|
416
|
+
* Controls whether DOCTYPE entities are collected and read-time security limits.
|
|
417
|
+
* Once collected will be passed to Output builder to take any decision
|
|
418
|
+
*/
|
|
419
|
+
doctypeOptions?: DoctypeOptions;
|
|
420
|
+
|
|
421
|
+
// --- security ---
|
|
422
|
+
/** Throw when a tag/attribute name collides with a nameFor.* or attributes.groupBy value. Default: false */
|
|
423
|
+
strictReservedNames?: boolean;
|
|
424
|
+
/** Custom handler for dangerous (non-critical) property names. Default: prefix with '__' */
|
|
425
|
+
onDangerousProperty?: (name: string) => string;
|
|
426
|
+
|
|
427
|
+
// --- filtering (path-expression-matcher) ---
|
|
428
|
+
select?: string[];
|
|
429
|
+
only?: string[];
|
|
430
|
+
|
|
431
|
+
// --- limits (DoS prevention) ---
|
|
432
|
+
/**
|
|
433
|
+
* Structural limits that guard against resource-exhaustion attacks.
|
|
434
|
+
* All properties default to `null` (no limit enforced).
|
|
435
|
+
*
|
|
436
|
+
* ```ts
|
|
437
|
+
* new XMLParser({
|
|
438
|
+
* limits: {
|
|
439
|
+
* maxNestedTags: 100, // reject XML deeper than 100 levels
|
|
440
|
+
* maxAttributesPerTag: 50, // reject any tag with > 50 attributes
|
|
441
|
+
* }
|
|
442
|
+
* });
|
|
443
|
+
* ```
|
|
444
|
+
*/
|
|
445
|
+
limits?: LimitsOptions | null;
|
|
446
|
+
|
|
447
|
+
// --- feedable (feed/end and parseStream buffer options) ---
|
|
448
|
+
/**
|
|
449
|
+
* Buffer behaviour for the FeedableSource (feed/end API) and StreamSource
|
|
450
|
+
* (parseStream API). All properties have sensible defaults and only need
|
|
451
|
+
* to be set when processing very large documents or operating under tight
|
|
452
|
+
* memory constraints.
|
|
453
|
+
*/
|
|
454
|
+
feedable?: FeedableOptions;
|
|
455
|
+
|
|
456
|
+
// --- output builder ---
|
|
457
|
+
/** Pluggable output builder instance. Default: CompactObjBuilder */
|
|
458
|
+
OutputBuilder?: BaseOutputBuilderFactory;
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Callback fired by `NodeTreeBuilder` and `CompactObjBuilder` whenever a stop node
|
|
462
|
+
* is fully collected, before the raw content is added to the output tree.
|
|
463
|
+
*
|
|
464
|
+
* Receive the tag detail, the raw unparsed content, and a read-only path
|
|
465
|
+
* matcher. Useful for side-channel analysis (e.g. extracting script content
|
|
466
|
+
* from HTML) without having to post-process the output tree.
|
|
467
|
+
*
|
|
468
|
+
* The callback is informational — return value is ignored. To suppress the
|
|
469
|
+
* node from output, use a custom OutputBuilder subclass instead.
|
|
470
|
+
*
|
|
471
|
+
* @param tagDetail - `{ name, line, col, index }` of the stop-node opening tag.
|
|
472
|
+
* @param rawContent - Raw text content between the opening and closing tags.
|
|
473
|
+
* @param matcher - Read-only path matcher positioned at the stop node.
|
|
474
|
+
*
|
|
475
|
+
* @example
|
|
476
|
+
* const scripts: string[] = [];
|
|
477
|
+
* const parser = new XMLParser({
|
|
478
|
+
* tags: { stopNodes: ["..script"] },
|
|
479
|
+
* onStopNode(tagDetail, rawContent, matcher) {
|
|
480
|
+
* scripts.push(rawContent);
|
|
481
|
+
* }
|
|
482
|
+
* });
|
|
483
|
+
*/
|
|
484
|
+
onStopNode?: (
|
|
485
|
+
tagDetail: { name: string; line: number; col: number; index: number },
|
|
486
|
+
rawContent: string,
|
|
487
|
+
matcher: any,
|
|
488
|
+
) => void;
|
|
489
|
+
|
|
490
|
+
/**
|
|
491
|
+
* Predicate evaluated after each non-self-closing, non-stop, non-skip opening
|
|
492
|
+
* tag is pushed onto the parser stack. When the function returns `true` the
|
|
493
|
+
* parser immediately stops reading further input and returns a partial-but-
|
|
494
|
+
* consistent output object.
|
|
495
|
+
*
|
|
496
|
+
* At the moment of evaluation the read-only `matcher` is positioned at the
|
|
497
|
+
* tag that triggered the exit. All tags that were open before it are cleanly
|
|
498
|
+
* closed (innermost first) so the output builder can finalise its tree.
|
|
499
|
+
* The output builder's `onExit()` method is then called with the exit context.
|
|
500
|
+
*
|
|
501
|
+
* No error is thrown — the normal return value of `parse()` / `feed()+end()`
|
|
502
|
+
* / `parseStream()` is returned as usual.
|
|
503
|
+
*
|
|
504
|
+
* Must be a function. Passing any other truthy value raises a `ParseError`
|
|
505
|
+
* with code `INVALID_INPUT` at construction time.
|
|
506
|
+
*
|
|
507
|
+
* @param matcher - Read-only path matcher positioned at the triggering tag.
|
|
508
|
+
* @returns `true` to stop parsing now; any other value to continue.
|
|
509
|
+
*
|
|
510
|
+
* @example
|
|
511
|
+
* // Stop after the first <item> whose @id attribute equals 'stop-here'
|
|
512
|
+
* const parser = new XMLParser({
|
|
513
|
+
* skip: { attributes: false },
|
|
514
|
+
* exitIf(matcher) {
|
|
515
|
+
* return matcher.getTagName() === 'item' &&
|
|
516
|
+
* matcher.getAttribute('@_id') === 'stop-here';
|
|
517
|
+
* },
|
|
518
|
+
* });
|
|
519
|
+
*/
|
|
520
|
+
exitIf?: ((matcher: any) => boolean) | null;
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
export default class XMLParser {
|
|
525
|
+
/**
|
|
526
|
+
* Create a new XMLParser.
|
|
527
|
+
* @throws {ParseError} with code `INVALID_INPUT` or `SECURITY_RESERVED_OPTION`
|
|
528
|
+
* if any option value is invalid or contains a reserved property name.
|
|
529
|
+
*/
|
|
530
|
+
constructor(options?: X2jOptions);
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Parse an XML string or Buffer to a JavaScript object.
|
|
534
|
+
* @throws {ParseError} on any well-formedness or limit violation.
|
|
535
|
+
*/
|
|
536
|
+
parse(xmlData: string | Buffer): any;
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Parse a Uint8Array / byte array to a JavaScript object.
|
|
540
|
+
* @throws {ParseError} on any well-formedness or limit violation.
|
|
541
|
+
*/
|
|
542
|
+
parseBytesArr(xmlData: Uint8Array | ArrayBufferView): any;
|
|
543
|
+
|
|
544
|
+
/**
|
|
545
|
+
* Parse an XML Node.js Readable stream and return a Promise that resolves
|
|
546
|
+
* with the parsed JS object.
|
|
547
|
+
*
|
|
548
|
+
* Chunks are processed incrementally as they arrive — already-consumed input
|
|
549
|
+
* is freed immediately, so memory stays proportional to the largest single
|
|
550
|
+
* token rather than the total document size.
|
|
551
|
+
*
|
|
552
|
+
* @throws {ParseError} with code `INVALID_STREAM` if the argument is not a
|
|
553
|
+
* Node.js Readable stream.
|
|
554
|
+
*/
|
|
555
|
+
parseStream(readable: NodeJS.ReadableStream): Promise<any>;
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Feed an XML data chunk for incremental parsing.
|
|
559
|
+
* Call `end()` when all chunks have been fed.
|
|
560
|
+
* @throws {ParseError} with code `DATA_MUST_BE_STRING` if data is not a string or Buffer.
|
|
561
|
+
*/
|
|
562
|
+
feed(data: string | Buffer): this;
|
|
563
|
+
|
|
564
|
+
/**
|
|
565
|
+
* Signal end of input and return the parsed result.
|
|
566
|
+
* @throws {ParseError} with code `NOT_STREAMING` if called before any `feed()`.
|
|
567
|
+
* @throws {ParseError} on any well-formedness or limit violation in the accumulated input.
|
|
568
|
+
*/
|
|
569
|
+
end(): any;
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Return structural errors collected during the last parse call.
|
|
573
|
+
* Only populated when `autoClose.collectErrors` is `true`.
|
|
574
|
+
* Each entry: `{ type, tag, expected, line, col, index }`
|
|
575
|
+
*/
|
|
576
|
+
getParseErrors(): Array<{
|
|
577
|
+
type: 'unclosed-eof' | 'mismatched-close' | 'phantom-close' | 'partial-tag';
|
|
578
|
+
tag: string;
|
|
579
|
+
expected?: string;
|
|
580
|
+
line?: number;
|
|
581
|
+
col?: number;
|
|
582
|
+
index?: number;
|
|
583
|
+
}>;
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Returns `true` if the last parse call was terminated early by `exitIf`.
|
|
587
|
+
* Returns `false` when `exitIf` never fired or the feature is not configured.
|
|
588
|
+
*/
|
|
589
|
+
wasExited: boolean;
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
export { XMLParser };
|
|
593
|
+
|
|
594
|
+
|
|
595
|
+
// ─── Stop-node utilities ───────────────────────────────────────────────────────
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* XML structural enclosures — comments, CDATA sections, processing instructions.
|
|
599
|
+
*
|
|
600
|
+
* Use in `skipEnclosures` to prevent false closing-tag matches inside these
|
|
601
|
+
* XML constructs:
|
|
602
|
+
*
|
|
603
|
+
* ```ts
|
|
604
|
+
* { expression: "body..pre", skipEnclosures: [...xmlEnclosures] }
|
|
605
|
+
* ```
|
|
606
|
+
*/
|
|
607
|
+
export declare const xmlEnclosures: ReadonlyArray<Enclosure>;
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* String-literal enclosures — single-quote, double-quote, and template literals.
|
|
611
|
+
*
|
|
612
|
+
* Use in `skipEnclosures` for stop nodes that contain JS or CSS source code
|
|
613
|
+
* where closing tags might appear inside string literals:
|
|
614
|
+
*
|
|
615
|
+
* ```ts
|
|
616
|
+
* { expression: "head..style", skipEnclosures: [...xmlEnclosures, ...quoteEnclosures] }
|
|
617
|
+
* ```
|
|
618
|
+
*/
|
|
619
|
+
export declare const quoteEnclosures: ReadonlyArray<Enclosure>;
|
package/src/fxp.js
ADDED
package/src/util.js
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const nameStartChar = ':A-Za-z_\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u02FF\\u0370-\\u037D\\u037F-\\u1FFF\\u200C-\\u200D\\u2070-\\u218F\\u2C00-\\u2FEF\\u3001-\\uD7FF\\uF900-\\uFDCF\\uFDF0-\\uFFFD';
|
|
4
|
+
const nameChar = nameStartChar + '\\-.\\d\\u00B7\\u0300-\\u036F\\u203F-\\u2040';
|
|
5
|
+
export const nameRegexp = '[' + nameStartChar + '][' + nameChar + ']*';
|
|
6
|
+
const regexName = new RegExp('^' + nameRegexp + '$');
|
|
7
|
+
|
|
8
|
+
export function getAllMatches(string, regex) {
|
|
9
|
+
const matches = [];
|
|
10
|
+
let match = regex.exec(string);
|
|
11
|
+
while (match) {
|
|
12
|
+
const allmatches = [];
|
|
13
|
+
allmatches.startIndex = regex.lastIndex - match[0].length;
|
|
14
|
+
const len = match.length;
|
|
15
|
+
for (let index = 0; index < len; index++) {
|
|
16
|
+
allmatches.push(match[index]);
|
|
17
|
+
}
|
|
18
|
+
matches.push(allmatches);
|
|
19
|
+
match = regex.exec(string);
|
|
20
|
+
}
|
|
21
|
+
return matches;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export const isName = function (string) {
|
|
25
|
+
const match = regexName.exec(string);
|
|
26
|
+
return !(match === null || typeof match === 'undefined');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function isExist(v) {
|
|
30
|
+
return typeof v !== 'undefined';
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function isEmptyObject(obj) {
|
|
34
|
+
return Object.keys(obj).length === 0;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function getValue(v) {
|
|
38
|
+
if (isExist(v)) {
|
|
39
|
+
return v;
|
|
40
|
+
} else {
|
|
41
|
+
return '';
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export const DANGEROUS_PROPERTY_NAMES = [
|
|
46
|
+
'hasOwnProperty',
|
|
47
|
+
'toString',
|
|
48
|
+
'valueOf',
|
|
49
|
+
'__defineGetter__',
|
|
50
|
+
'__defineSetter__',
|
|
51
|
+
'__lookupGetter__',
|
|
52
|
+
'__lookupSetter__',
|
|
53
|
+
"toLocaleString",
|
|
54
|
+
"isPrototypeOf",
|
|
55
|
+
"propertyIsEnumerable"
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
export const criticalProperties = ["__proto__", "constructor", "prototype"];
|