@nodable/flexible-xml-parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ import { ParseError, ErrorCode } from '../ParseError.js';
2
+
3
+ /**
4
+ * StringSource — input source backed by an in-memory string.
5
+ *
6
+ * ### Memory reclamation
7
+ *
8
+ * Unlike FeedableSource, the full document is available from the start, so
9
+ * there is no chunk-boundary risk and rewindToMark() is a safe no-op.
10
+ * However, the parsed prefix of the string is still held in memory until the
11
+ * parse finishes. flush() reclaims that prefix by slicing the buffer and
12
+ * resetting startIndex to 0.
13
+ *
14
+ * The same mark/flush protocol used by FeedableSource is implemented here so
15
+ * all reader functions (readTagExp, readClosingTagName, readCdata, etc.) work
16
+ * without any source-type conditionals:
17
+ *
18
+ * markTokenStart() — save the current read position at the start of a token
19
+ * rewindToMark() — no-op for StringSource (full doc always present)
20
+ * flush() — drop the already-parsed prefix to free memory
21
+ *
22
+ * Auto-flush fires inside updateBufferBoundary() whenever the processed
23
+ * portion exceeds flushThreshold and no token checkpoint is active.
24
+ */
25
+ export default class StringSource {
26
+ /**
27
+ * @param {string} str — the full XML document string
28
+ * @param {object} [options]
29
+ * @param {boolean} [options.autoFlush=true] — enable automatic flushing
30
+ * @param {number} [options.flushThreshold=1024] — flush after this many processed chars
31
+ */
32
+ constructor(str, options = {}) {
33
+ this.line = 1;
34
+ this.cols = 0;
35
+ this.buffer = str;
36
+ // Boundary pointer: data before this index has been consumed and may be freed.
37
+ this.startIndex = 0;
38
+
39
+ this.autoFlush = options.autoFlush !== false;
40
+ this.flushThreshold = options.flushThreshold ?? 1024;
41
+
42
+ // Two-level mark stack matching FeedableSource's API.
43
+ // _marks[0] = outer mark (parseXml loop), _marks[1] = inner mark (readers).
44
+ // -1 means "not set" for that level.
45
+ this._marks = [-1, -1];
46
+ }
47
+
48
+ // ─── Token-start checkpoint ───────────────────────────────────────────────
49
+
50
+ /**
51
+ * Save the current read position into the two-level mark stack.
52
+ *
53
+ * Mirrors FeedableSource's two-level API so all reader functions work
54
+ * identically regardless of source type:
55
+ *
56
+ * level 0 (default) — outer mark, set by parseXml()'s main loop.
57
+ * level 1 — inner mark, set by individual reader functions.
58
+ *
59
+ * For StringSource the distinction only matters for flush() boundary
60
+ * calculations — rewindToMark() is always a no-op here.
61
+ *
62
+ * @param {0|1} [level=0]
63
+ */
64
+ markTokenStart(level = 0) {
65
+ this._marks[level] = this.startIndex;
66
+ }
67
+
68
+ /**
69
+ * Restore startIndex to the last markTokenStart() position.
70
+ *
71
+ * StringSource always has the full document available, so a mid-token end
72
+ * of input cannot occur and this method is a safe no-op. It exists solely
73
+ * so caller code (XMLParser.feed / parseXml) can call rewindToMark()
74
+ * unconditionally without branching on source type.
75
+ */
76
+ rewindToMark() {
77
+ // No-op: the complete document is in memory; no rewind is ever needed.
78
+ }
79
+
80
+ /** Clear both mark slots (mirrors FeedableSource.clearMark). */
81
+ clearMark() {
82
+ this._marks[0] = -1;
83
+ this._marks[1] = -1;
84
+ }
85
+
86
+ /**
87
+ * Discard the already-processed prefix of the buffer to free memory.
88
+ *
89
+ * The flush origin is the minimum of all active mark positions so that any
90
+ * in-progress token (at either mark level) is preserved in the buffer.
91
+ * If no marks are active, the origin is startIndex itself.
92
+ */
93
+ flush() {
94
+ let origin = this.startIndex;
95
+ for (const m of this._marks) {
96
+ if (m >= 0 && m < origin) origin = m;
97
+ }
98
+ if (origin > 0) {
99
+ this.buffer = this.buffer.substring(origin);
100
+ const marksLen = this._marks.length;
101
+ for (let i = 0; i < marksLen; i++) {
102
+ if (this._marks[i] >= 0) this._marks[i] -= origin;
103
+ }
104
+ this.startIndex -= origin;
105
+ }
106
+ }
107
+
108
+ // ─── Core read interface ──────────────────────────────────────────────────
109
+
110
+ readCh() {
111
+ return this.buffer[this.startIndex++];
112
+ }
113
+
114
+ readChAt(index) {
115
+ return this.buffer[this.startIndex + index];
116
+ }
117
+
118
+ readStr(n, from) {
119
+ if (typeof from === 'undefined') from = this.startIndex;
120
+ return this.buffer.substring(from, from + n);
121
+ }
122
+
123
+ readUpto(stopStr) {
124
+ const inputLength = this.buffer.length;
125
+ const stopLength = stopStr.length;
126
+
127
+ for (let i = this.startIndex; i < inputLength; i++) {
128
+ let match = true;
129
+ for (let j = 0; j < stopLength; j++) {
130
+ if (this.buffer[i + j] !== stopStr[j]) { match = false; break; }
131
+ }
132
+ if (match) {
133
+ const result = this.buffer.substring(this.startIndex, i);
134
+ this.startIndex = i + stopLength;
135
+ return result;
136
+ }
137
+ }
138
+
139
+ throw new ParseError(`Unexpected end of source reading '${stopStr}'`, ErrorCode.UNEXPECTED_END);
140
+ }
141
+
142
+ /**
143
+ * Single-character variant of readUpto — faster because there is no inner
144
+ * match loop. Reads until `stopChar` is found, consumes it, and returns
145
+ * the text before it.
146
+ *
147
+ * @param {string} stopChar Exactly one character.
148
+ * @returns {string}
149
+ */
150
+ readUptoChar(stopChar) {
151
+ const i = this.buffer.indexOf(stopChar, this.startIndex);
152
+ if (i === -1) {
153
+ throw new ParseError(`Unexpected end of source reading '${stopChar}'`, ErrorCode.UNEXPECTED_END);
154
+ }
155
+ const result = this.buffer.substring(this.startIndex, i);
156
+ this.startIndex = i + 1;
157
+ return result;
158
+ }
159
+
160
+ readUptoCloseTag(stopStr) { // stopStr: "</tagname"
161
+ const inputLength = this.buffer.length;
162
+ const stopLength = stopStr.length;
163
+ let tagMatchStart = -1;
164
+ // 0: scanning, 1: tag-name matched (scanning for '>'), 2: full match
165
+ let state = 0;
166
+
167
+ for (let i = this.startIndex; i < inputLength; i++) {
168
+ if (state === 1) {
169
+ const c = this.buffer[i];
170
+ if (c === ' ' || c === '\t') continue;
171
+ if (c === '>') { state = 2; }
172
+ else { state = 0; tagMatchStart = -1; } // false match e.g. </scriptX>
173
+ } else {
174
+ // Try to match stopStr at position i
175
+ let matched = true;
176
+ for (let j = 0; j < stopLength; j++) {
177
+ if (this.buffer[i + j] !== stopStr[j]) { matched = false; break; }
178
+ }
179
+ if (matched) {
180
+ state = 1;
181
+ tagMatchStart = i;
182
+ i += stopLength - 1; // skip past matched string
183
+ }
184
+ }
185
+ if (state === 2) {
186
+ const result = this.buffer.substring(this.startIndex, tagMatchStart);
187
+ this.startIndex = i + 1;
188
+ return result;
189
+ }
190
+ }
191
+
192
+ throw new ParseError(`Unexpected end of source reading '${stopStr}'`, ErrorCode.UNEXPECTED_END);
193
+ }
194
+
195
+ readFromBuffer(n, updateIndex) {
196
+ const ch = n === 1
197
+ ? this.buffer[this.startIndex]
198
+ : this.buffer.substring(this.startIndex, this.startIndex + n);
199
+ if (updateIndex) this.updateBufferBoundary(n);
200
+ return ch;
201
+ }
202
+
203
+ /**
204
+ * Advance the read cursor by n characters.
205
+ *
206
+ * Triggers an automatic flush of already-processed data when autoFlush is
207
+ * enabled, the processed portion has grown past flushThreshold, and no
208
+ * token checkpoint is currently active (a flush while a checkpoint is live
209
+ * would invalidate the saved position).
210
+ *
211
+ * @param {number} [n=1]
212
+ */
213
+ updateBufferBoundary(n = 1) {
214
+ this.startIndex += n;
215
+ const anyMarkActive = this._marks[0] >= 0 || this._marks[1] >= 0;
216
+ if (this.autoFlush && this.startIndex >= this.flushThreshold && !anyMarkActive) {
217
+ this.flush();
218
+ }
219
+ }
220
+
221
+ canRead(n) {
222
+ n = (n !== undefined) ? n : this.startIndex;
223
+ return this.buffer.length - n > 0;
224
+ }
225
+ }
@@ -0,0 +1,400 @@
1
+ import { CompactBuilderFactory } from '@nodable/compact-builder';
2
+ import { Expression, ExpressionSet } from 'path-expression-matcher';
3
+ import { ParseError, ErrorCode } from './ParseError.js';
4
+ import { DANGEROUS_PROPERTY_NAMES, criticalProperties } from './util.js';
5
+
6
+ const defaultOnDangerousProperty = (name) => {
7
+ if (DANGEROUS_PROPERTY_NAMES.includes(name)) {
8
+ return "__" + name;
9
+ }
10
+ return name;
11
+ };
12
+
13
+ export const defaultOptions = {
14
+ // --- skip group ---
15
+ // Controls which node types are excluded from output
16
+ skip: {
17
+ declaration: false, // Skip <?xml ... ?> declaration
18
+ pi: false, // Skip processing instructions (other than declaration)
19
+ attributes: true, // Skip all attributes
20
+ cdata: false, // Exclude CDATA sections from output entirely
21
+ comment: false, // Exclude comments from output entirely
22
+ nsPrefix: false, // Strip namespace prefixes (e.g. ns:tag → tag)
23
+ tags: [], // Tag paths to skip entirely — content is silently dropped from output
24
+ },
25
+
26
+ // --- nameFor group ---
27
+ // Property names used when including special nodes in output.
28
+ nameFor: {
29
+ text: '#text', // Property for mixed text content
30
+ cdata: '', // '' = merge CDATA into text value
31
+ comment: '', // '' = omit comments from output
32
+ },
33
+
34
+ // --- attributes group ---
35
+ attributes: {
36
+ booleanType: false, // Allow valueless attributes (treated as boolean true)
37
+ groupBy: '', // Group all attributes under this key; '' = inline with tag
38
+ prefix: '@_', // Prepended to attribute names in output
39
+ suffix: '', // Appended to attribute names in output
40
+ },
41
+
42
+ // --- tags group ---
43
+ tags: {
44
+ unpaired: [], // Tags that never have a closing tag (e.g. br, img, hr)
45
+ stopNodes: [], // Tag paths whose content is captured raw without parsing
46
+ },
47
+
48
+ // --- security ---
49
+ strictReservedNames: false,
50
+ onDangerousProperty: defaultOnDangerousProperty,
51
+
52
+ // --- filtering (path-expression-matcher) ---
53
+ only: [], // for future
54
+
55
+ // --- DOCTYPE parsing ---
56
+ // Controls whether DOCTYPE entities are collected and read-time security limits.
57
+ //
58
+ // enabled — false (default) → DOCTYPE is read (to consume it) but entities
59
+ // are discarded and never forwarded to output builders
60
+ // true → collect DOCTYPE entities and forward them to the output builder
61
+ // Note: the output builder must have an EntitiesValueParser registered
62
+ // under 'entity' and 'entity' must be in its
63
+ // valueParsers chain for replacement to actually happen.
64
+ //
65
+ // Read-time security limits (enforced by DocTypeReader at declaration time):
66
+ // maxEntityCount — max entities declared in a DOCTYPE (default: 100)
67
+ // maxEntitySize — max bytes per entity definition value (default: 10000)
68
+ //
69
+ // Replacement-time limits (maxTotalExpansions, maxExpandedLength) are configured
70
+ // on EntitiesValueParser directly — they are not part of doctypeOptions.
71
+ doctypeOptions: {
72
+ enabled: false,
73
+ maxEntityCount: 100,
74
+ maxEntitySize: 10000,
75
+ },
76
+
77
+ // --- autoClose ---
78
+ // Controls parser behaviour when tags are unclosed or mismatched.
79
+ //
80
+ // onEof — what to do when EOF is reached with open tags still on the stack
81
+ // 'throw' (default) → throw an error
82
+ // 'closeAll' → silently close all remaining open tags
83
+ //
84
+ // onMismatch — what to do when a closing tag doesn't match the current open tag
85
+ // 'throw' (default) → throw an error
86
+ // 'recover' → pop the stack toward the nearest matching opener;
87
+ // if no match is found the tag is discarded
88
+ // 'discard' → silently ignore the bad closing tag
89
+ //
90
+ // collectErrors — when true, errors are recorded in result.__parseErrors instead
91
+ // of being silently dropped. Each entry has the shape:
92
+ // { type, tag, expected, line, col, index }
93
+ //
94
+ // Shorthand: autoClose: 'html' sets onEof:'closeAll', onMismatch:'discard',
95
+ // collectErrors:true, and adds the standard HTML void elements to tags.unpaired.
96
+ autoClose: null, // null = feature disabled; throws on any malformed input
97
+
98
+ // --- limits (DoS prevention) ---
99
+ // Group structural limits that guard against resource exhaustion.
100
+ //
101
+ // maxNestedTags — maximum tag nesting depth; throws when exceeded.
102
+ // Prevents stack-overflow attacks via deeply nested XML.
103
+ // Default: null (no limit)
104
+ //
105
+ // maxAttributesPerTag — maximum number of attributes on a single tag.
106
+ // Throws when a tag exceeds this count.
107
+ // Default: null (no limit)
108
+ //
109
+ limits: {
110
+ maxNestedTags: null,
111
+ maxAttributesPerTag: null,
112
+ },
113
+
114
+ // --- feedable (feed/end and parseStream input options) ---
115
+ // Controls buffer behaviour for the FeedableSource and StreamSource.
116
+ //
117
+ // maxBufferSize — maximum number of characters allowed in the buffer at
118
+ // any one time. Prevents memory exhaustion when a caller
119
+ // feeds data faster than it is consumed.
120
+ // Default: 10 MB (10 * 1024 * 1024 characters)
121
+ //
122
+ // autoFlush — when true (default), already-processed characters are
123
+ // automatically discarded from the front of the buffer
124
+ // whenever the processed portion exceeds flushThreshold.
125
+ // Keeps memory usage flat for large documents.
126
+ //
127
+ // flushThreshold — number of processed characters that triggers an auto-
128
+ // flush. Lower values free memory sooner but incur more
129
+ // string-slice operations. Default: 1024 characters (1 KB)
130
+ //
131
+ feedable: {
132
+ maxBufferSize: 10 * 1024 * 1024,
133
+ autoFlush: true,
134
+ flushThreshold: 1024,
135
+ },
136
+
137
+ // --- exitIf ---
138
+ // Stops parsing as soon as the predicate returns true for the current tag.
139
+ //
140
+ // The callback receives a read-only matcher positioned at the just-opened tag:
141
+ // exitIf(matcher) → boolean
142
+ //
143
+ // When exitIf returns true the parser immediately:
144
+ // 1. Closes all currently open tags (innermost first) by calling addTextNode()
145
+ // and popTag() for each, so the output builder can finalise its tree.
146
+ // 2. Calls outputBuilder.onExit({ tagDetail, matcher, tagsStack }) so the
147
+ // builder can record that the parse was intentionally truncated.
148
+ // 3. Breaks the parse loop — no further source characters are read.
149
+ //
150
+ // The parse call returns the partial-but-consistent output as normal.
151
+ // No error is thrown.
152
+ //
153
+ // Default: null (feature disabled)
154
+ exitIf: null,
155
+
156
+ // --- output ---
157
+ OutputBuilder: null, //TODO: accept lower case
158
+ };
159
+
160
+ // All names that should never appear as property keys
161
+ const ALL_RESERVED = new Set([...criticalProperties, ...DANGEROUS_PROPERTY_NAMES]);
162
+ export { ALL_RESERVED as RESERVED_JS_NAMES };
163
+
164
+ function validatePropertyName(value, optionName) {
165
+ if (typeof value !== 'string' || value === '') return;
166
+ if (ALL_RESERVED.has(value)) {
167
+ throw new ParseError(
168
+ `SECURITY: '${value}' is a reserved JavaScript keyword and cannot be used as ${optionName}`,
169
+ ErrorCode.SECURITY_RESERVED_OPTION
170
+ );
171
+ }
172
+ }
173
+
174
+ export const buildOptions = function (options) {
175
+ // Validate security-sensitive option values BEFORE merging
176
+ if (options) {
177
+ if (options.nameFor?.text) validatePropertyName(options.nameFor.text, 'nameFor.text');
178
+ if (options.nameFor?.cdata) validatePropertyName(options.nameFor.cdata, 'nameFor.cdata');
179
+ if (options.nameFor?.comment) validatePropertyName(options.nameFor.comment, 'nameFor.comment');
180
+ if (options.attributes?.prefix) validatePropertyName(options.attributes.prefix, 'attributes.prefix');
181
+ if (options.attributes?.groupBy) validatePropertyName(options.attributes.groupBy, 'attributes.groupBy');
182
+
183
+ // Validate limits option
184
+ if (options.limits !== undefined && options.limits !== null) {
185
+ if (typeof options.limits !== 'object') {
186
+ throw new ParseError(`'limits' must be an object, got ${typeof options.limits}`, ErrorCode.INVALID_INPUT);
187
+ }
188
+ const { maxNestedTags, maxAttributesPerTag } = options.limits;
189
+ if (maxNestedTags !== undefined && maxNestedTags !== null &&
190
+ (typeof maxNestedTags !== 'number' || !Number.isInteger(maxNestedTags) || maxNestedTags < 1)) {
191
+ throw new ParseError(`'limits.maxNestedTags' must be a positive integer, got ${maxNestedTags}`, ErrorCode.INVALID_INPUT);
192
+ }
193
+ if (maxAttributesPerTag !== undefined && maxAttributesPerTag !== null &&
194
+ (typeof maxAttributesPerTag !== 'number' || !Number.isInteger(maxAttributesPerTag) || maxAttributesPerTag < 0)) {
195
+ throw new ParseError(`'limits.maxAttributesPerTag' must be a non-negative integer, got ${maxAttributesPerTag}`, ErrorCode.INVALID_INPUT);
196
+ }
197
+ }
198
+ }
199
+
200
+ const finalOptions = deepClone(defaultOptions);
201
+
202
+ if (options) {
203
+ copyProperties(finalOptions, options);
204
+ }
205
+
206
+ if (!finalOptions.OutputBuilder) {
207
+ finalOptions.OutputBuilder = new CompactBuilderFactory();
208
+ }
209
+
210
+ // Normalize stopNodes and skip.tags entries into Expression objects with config embedded
211
+ // in Expression.data as { nested, skipEnclosures }. Build a sealed ExpressionSet for
212
+ // O(1) hot-path matching in the parser.
213
+ //
214
+ // Accepted entry forms (identical for both stopNodes and skip.tags):
215
+ // "..script"
216
+ // → Expression("..script", {}, { nested: false, skipEnclosures: [] })
217
+ //
218
+ // Expression instance
219
+ // → re-wrapped with { nested: false, skipEnclosures: [] } in data
220
+ //
221
+ // { expression: "..script", nested?: boolean, skipEnclosures?: [] }
222
+ // { expression: Expression, nested?: boolean, skipEnclosures?: [] }
223
+ // → Expression with the given config embedded in .data
224
+ //
225
+ // `nested` defaults to false; `skipEnclosures` defaults to [].
226
+ // The two flags are fully independent — any combination is valid.
227
+ if (Array.isArray(finalOptions.tags?.stopNodes)) {
228
+ const stopSet = new ExpressionSet();
229
+ finalOptions.tags.stopNodes = finalOptions.tags.stopNodes.map(
230
+ (entry) => normalizeTagEntry(entry, 'stopNodes', stopSet)
231
+ );
232
+ stopSet.seal();
233
+ finalOptions.tags.stopNodesSet = stopSet;
234
+ }
235
+
236
+ if (Array.isArray(finalOptions.skip?.tags)) {
237
+ const skipSet = new ExpressionSet();
238
+ finalOptions.skip.tags = finalOptions.skip.tags.map(
239
+ (entry) => normalizeTagEntry(entry, 'skip.tags', skipSet)
240
+ );
241
+ skipSet.seal();
242
+ finalOptions.skip.tagsSet = skipSet;
243
+ }
244
+
245
+ if (finalOptions.onDangerousProperty === null) {
246
+ finalOptions.onDangerousProperty = defaultOnDangerousProperty;
247
+ }
248
+
249
+ // Validate exitIf
250
+ if (finalOptions.exitIf !== null && finalOptions.exitIf !== undefined) {
251
+ if (typeof finalOptions.exitIf !== 'function') {
252
+ throw new ParseError(
253
+ `'exitIf' must be a function, got ${typeof finalOptions.exitIf}`,
254
+ ErrorCode.INVALID_INPUT,
255
+ );
256
+ }
257
+ }
258
+
259
+ // Resolve autoClose: expand the 'html' preset and normalise to an object
260
+ finalOptions.autoClose = resolveAutoClose(finalOptions.autoClose, finalOptions);
261
+
262
+ return finalOptions;
263
+ };
264
+
265
+ /** Standard HTML void elements — never have a closing tag. */
266
+ const HTML_VOID_ELEMENTS = [
267
+ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input',
268
+ 'link', 'meta', 'param', 'source', 'track', 'wbr',
269
+ ];
270
+
271
+ /**
272
+ * Normalise the raw `autoClose` option value into either null (disabled)
273
+ * or a fully-resolved options object.
274
+ *
275
+ * @param {null|string|object} raw - Value supplied by the user
276
+ * @param {object} opts - The already-merged final options (mutated for html preset)
277
+ * @returns {null|object}
278
+ */
279
+ function resolveAutoClose(raw, opts) {
280
+ if (!raw) return null;
281
+
282
+ if (raw === 'html') {
283
+ // Apply HTML-specific tag defaults
284
+ const existingUnpaired = opts.tags.unpaired || [];
285
+ const merged = [...new Set([...existingUnpaired, ...HTML_VOID_ELEMENTS])];
286
+ opts.tags = { ...opts.tags, unpaired: merged };
287
+
288
+ return {
289
+ onEof: 'closeAll',
290
+ onMismatch: 'discard',
291
+ collectErrors: true,
292
+ };
293
+ }
294
+
295
+ if (typeof raw === 'string') {
296
+ // e.g. autoClose: 'closeAll' — treat as shorthand for onEof
297
+ return {
298
+ onEof: raw,
299
+ onMismatch: 'throw',
300
+ collectErrors: false,
301
+ };
302
+ }
303
+
304
+ if (typeof raw === 'object') {
305
+ return {
306
+ onEof: raw.onEof || 'throw',
307
+ onMismatch: raw.onMismatch || 'throw',
308
+ collectErrors: raw.collectErrors || false,
309
+ };
310
+ }
311
+
312
+ return null;
313
+ }
314
+
315
+ /**
316
+ * Normalize one entry from `tags.stopNodes` or `skip.tags` into an Expression
317
+ * whose `.data` carries `{ nested, skipEnclosures }`, and register it in `set`.
318
+ *
319
+ * Accepted forms:
320
+ * string → bare pattern, defaults applied
321
+ * Expression instance → re-wrapped with defaults
322
+ * { expression: string|Expression, nested?, skipEnclosures? }
323
+ *
324
+ * @param {string|Expression|object} entry
325
+ * @param {string} optionName - Used in error messages ("stopNodes" or "skip.tags")
326
+ * @param {ExpressionSet} set - The set to register the resulting Expression into
327
+ * @returns {Expression}
328
+ */
329
+ function normalizeTagEntry(entry, optionName, set) {
330
+ let pattern, nested, skipEnclosures;
331
+
332
+ if (typeof entry === 'string') {
333
+ if (entry.length === 0) throw new ParseError(`${optionName} expression cannot be empty`, ErrorCode.INVALID_INPUT);
334
+ pattern = entry;
335
+ nested = false;
336
+ skipEnclosures = [];
337
+ } else if (entry instanceof Expression) {
338
+ // Bare Expression — keep its pattern, apply defaults for missing data fields
339
+ pattern = entry.toString();
340
+ nested = entry.data?.nested ?? false;
341
+ skipEnclosures = entry.data?.skipEnclosures ?? [];
342
+ } else if (entry && typeof entry === 'object' && entry.expression !== undefined) {
343
+ const raw = entry.expression;
344
+ if (typeof raw === 'string') {
345
+ if (raw.length === 0) throw new ParseError(`${optionName} expression cannot be empty`, ErrorCode.INVALID_INPUT);
346
+ pattern = raw;
347
+ } else if (raw instanceof Expression) {
348
+ pattern = raw.toString();
349
+ } else {
350
+ throw new ParseError(`${optionName} expression must be a string or Expression instance`, ErrorCode.INVALID_INPUT);
351
+ }
352
+ nested = entry.nested === true;
353
+ skipEnclosures = Array.isArray(entry.skipEnclosures) ? entry.skipEnclosures : [];
354
+ } else {
355
+ throw new ParseError(
356
+ `Invalid ${optionName} entry: expected a string, Expression, or { expression, nested?, skipEnclosures? } object.`,
357
+ ErrorCode.INVALID_INPUT,
358
+ );
359
+ }
360
+
361
+ const expr = new Expression(pattern, {}, { nested, skipEnclosures });
362
+ set.add(expr);
363
+ return expr;
364
+ }
365
+
366
+ function deepClone(obj) {
367
+ if (obj === null || typeof obj !== 'object') return obj;
368
+ if (Array.isArray(obj)) return obj.map(deepClone);
369
+ if (obj instanceof RegExp) return obj; // ← guard
370
+ if (obj instanceof Expression) return obj; // ← guard — Expression instances are immutable
371
+ const clone = {};
372
+ for (const key of Object.keys(obj)) {
373
+ clone[key] = typeof obj[key] === 'function' ? obj[key] : deepClone(obj[key]);
374
+ }
375
+ return clone;
376
+ }
377
+
378
+ function copyProperties(target, source) {
379
+ for (const key of Object.keys(source)) {
380
+ // Guard against prototype pollution via option keys
381
+ if (key === '__proto__' || key === 'constructor' || key === 'prototype') continue;
382
+
383
+ if (key === 'OutputBuilder') {
384
+ target[key] = source[key];
385
+ } else if (typeof source[key] === 'function') {
386
+ target[key] = source[key];
387
+ } else if (source[key] instanceof RegExp) { // ← guard, before the generic object check
388
+ target[key] = source[key];
389
+ } else if (Array.isArray(source[key])) {
390
+ target[key] = source[key];
391
+ } else if (typeof source[key] === 'object' && source[key] !== null) {
392
+ if (typeof target[key] !== 'object' || target[key] === null) {
393
+ target[key] = {};
394
+ }
395
+ copyProperties(target[key], source[key]);
396
+ } else {
397
+ target[key] = source[key];
398
+ }
399
+ }
400
+ }