path-expression-matcher 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,531 @@
1
+ # path-expression-matcher
2
+
3
+ Efficient path tracking and pattern matching for XML, JSON, YAML or any other parsers.
4
+
5
+ ## 🎯 Purpose
6
+
7
+ `path-expression-matcher` provides two core classes for tracking and matching paths:
8
+
9
+ - **`Expression`**: Parses and stores pattern expressions (e.g., `"root.users.user[id]"`)
10
+ - **`Matcher`**: Tracks current path during parsing and matches against expressions
11
+
12
+ Compatible with [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) and similar tools.
13
+
14
+ ## 📦 Installation
15
+
16
+ ```bash
17
+ npm install path-expression-matcher
18
+ ```
19
+
20
+ ## 🚀 Quick Start
21
+
22
+ ```javascript
23
+ import { Expression, Matcher } from 'path-expression-matcher';
24
+
25
+ // Create expression (parse once, reuse many times)
26
+ const expr = new Expression("root.users.user");
27
+
28
+ // Create matcher (tracks current path)
29
+ const matcher = new Matcher();
30
+
31
+ matcher.push("root");
32
+ matcher.push("users");
33
+ matcher.push("user", { id: "123" });
34
+
35
+ // Match current path against expression
36
+ if (matcher.matches(expr)) {
37
+ console.log("Match found!");
38
+ console.log("Current path:", matcher.toString()); // "root.users.user"
39
+ }
40
+ ```
41
+
42
+ ## 📖 Pattern Syntax
43
+
44
+ ### Basic Paths
45
+
46
+ ```javascript
47
+ "root.users.user" // Exact path match
48
+ "*.users.user" // Wildcard: any parent
49
+ "root.*.user" // Wildcard: any middle
50
+ "root.users.*" // Wildcard: any child
51
+ ```
52
+
53
+ ### Deep Wildcard
54
+
55
+ ```javascript
56
+ "..user" // user anywhere in tree
57
+ "root..user" // user anywhere under root
58
+ "..users..user" // users somewhere, then user below it
59
+ ```
60
+
61
+ ### Attribute Matching
62
+
63
+ ```javascript
64
+ "user[id]" // user with "id" attribute
65
+ "user[type=admin]" // user with type="admin" (current node only)
66
+ "root[lang]..user" // user under root that has "lang" attribute
67
+ ```
68
+
69
+ ### Position Selectors
70
+
71
+ ```javascript
72
+ "user:first" // First user (counter=0)
73
+ "user:nth(2)" // Third user (counter=2, zero-based)
74
+ "user:odd" // Odd-numbered users (counter=1,3,5...)
75
+ "user:even" // Even-numbered users (counter=0,2,4...)
76
+ "root.users.user:first" // First user under users
77
+ ```
78
+
79
+ **Note:** Position selectors use the **counter** (occurrence count of the tag name), not the position (child index). For example, in `<root><a/><b/><a/></root>`, the second `<a/>` has position=2 but counter=1.
80
+
81
+ ### Combined Patterns
82
+
83
+ ```javascript
84
+ "..user[id]:first" // First user with id, anywhere
85
+ "root..user[type=admin]" // Admin user under root
86
+ ```
87
+
88
+ ## 🔧 API Reference
89
+
90
+ ### Expression
91
+
92
+ #### Constructor
93
+
94
+ ```javascript
95
+ new Expression(pattern, options)
96
+ ```
97
+
98
+ **Parameters:**
99
+ - `pattern` (string): Pattern to parse
100
+ - `options.separator` (string): Path separator (default: `'.'`)
101
+
102
+ **Example:**
103
+ ```javascript
104
+ const expr1 = new Expression("root.users.user");
105
+ const expr2 = new Expression("root/users/user", { separator: '/' });
106
+ ```
107
+
108
+ #### Methods
109
+
110
+ - `hasDeepWildcard()` → boolean
111
+ - `hasAttributeCondition()` → boolean
112
+ - `hasPositionSelector()` → boolean
113
+ - `toString()` → string
114
+
115
+ ### Matcher
116
+
117
+ #### Constructor
118
+
119
+ ```javascript
120
+ new Matcher(options)
121
+ ```
122
+
123
+ **Parameters:**
124
+ - `options.separator` (string): Default path separator (default: `'.'`)
125
+
126
+ #### Path Tracking Methods
127
+
128
+ ##### `push(tagName, attrValues)`
129
+
130
+ Add a tag to the current path. Position and counter are automatically calculated.
131
+
132
+ **Parameters:**
133
+ - `tagName` (string): Tag name
134
+ - `attrValues` (object, optional): Attribute key-value pairs (current node only)
135
+
136
+ **Example:**
137
+ ```javascript
138
+ matcher.push("user", { id: "123", type: "admin" });
139
+ matcher.push("item"); // No attributes
140
+ ```
141
+
142
+ **Position vs Counter:**
143
+ - **Position**: The child index in the parent (0, 1, 2, 3...)
144
+ - **Counter**: How many times this tag name appeared at this level (0, 1, 2...)
145
+
146
+ Example:
147
+ ```xml
148
+ <root>
149
+ <a/> <!-- position=0, counter=0 -->
150
+ <b/> <!-- position=1, counter=0 -->
151
+ <a/> <!-- position=2, counter=1 -->
152
+ </root>
153
+ ```
154
+
155
+ ##### `pop()`
156
+
157
+ Remove the last tag from the path.
158
+
159
+ ```javascript
160
+ matcher.pop();
161
+ ```
162
+
163
+ ##### `updateCurrent(attrValues)`
164
+
165
+ Update current node's attributes (useful when attributes are parsed after push).
166
+
167
+ ```javascript
168
+ matcher.push("user"); // Don't know values yet
169
+ // ... parse attributes ...
170
+ matcher.updateCurrent({ id: "123" });
171
+ ```
172
+
173
+ ##### `reset()`
174
+
175
+ Clear the entire path.
176
+
177
+ ```javascript
178
+ matcher.reset();
179
+ ```
180
+
181
+ #### Query Methods
182
+
183
+ ##### `matches(expression)`
184
+
185
+ Check if current path matches an Expression.
186
+
187
+ ```javascript
188
+ const expr = new Expression("root.users.user");
189
+ if (matcher.matches(expr)) {
190
+ // Current path matches
191
+ }
192
+ ```
193
+
194
+ ##### `getCurrentTag()`
195
+
196
+ Get current tag name.
197
+
198
+ ```javascript
199
+ const tag = matcher.getCurrentTag(); // "user"
200
+ ```
201
+
202
+ ##### `getAttrValue(attrName)`
203
+
204
+ Get attribute value of current node.
205
+
206
+ ```javascript
207
+ const id = matcher.getAttrValue("id"); // "123"
208
+ ```
209
+
210
+ ##### `hasAttr(attrName)`
211
+
212
+ Check if current node has an attribute.
213
+
214
+ ```javascript
215
+ if (matcher.hasAttr("id")) {
216
+ // Current node has "id" attribute
217
+ }
218
+ ```
219
+
220
+ ##### `getPosition()`
221
+
222
+ Get sibling position of current node (child index in parent).
223
+
224
+ ```javascript
225
+ const position = matcher.getPosition(); // 0, 1, 2, ...
226
+ ```
227
+
228
+ ##### `getCounter()`
229
+
230
+ Get repeat counter of current node (occurrence count of this tag name).
231
+
232
+ ```javascript
233
+ const counter = matcher.getCounter(); // 0, 1, 2, ...
234
+ ```
235
+
236
+ ##### `getIndex()` (deprecated)
237
+
238
+ Alias for `getPosition()`. Use `getPosition()` or `getCounter()` instead for clarity.
239
+
240
+ ```javascript
241
+ const index = matcher.getIndex(); // Same as getPosition()
242
+ ```
243
+
244
+ ##### `getDepth()`
245
+
246
+ Get current path depth.
247
+
248
+ ```javascript
249
+ const depth = matcher.getDepth(); // 3 for "root.users.user"
250
+ ```
251
+
252
+ ##### `toString(separator?)`
253
+
254
+ Get path as string.
255
+
256
+ ```javascript
257
+ const path = matcher.toString(); // "root.users.user"
258
+ const path2 = matcher.toString('/'); // "root/users/user"
259
+ ```
260
+
261
+ ##### `toArray()`
262
+
263
+ Get path as array.
264
+
265
+ ```javascript
266
+ const arr = matcher.toArray(); // ["root", "users", "user"]
267
+ ```
268
+
269
+ #### State Management
270
+
271
+ ##### `snapshot()`
272
+
273
+ Create a snapshot of current state.
274
+
275
+ ```javascript
276
+ const snapshot = matcher.snapshot();
277
+ ```
278
+
279
+ ##### `restore(snapshot)`
280
+
281
+ Restore from a snapshot.
282
+
283
+ ```javascript
284
+ matcher.restore(snapshot);
285
+ ```
286
+
287
+ ## 💡 Usage Examples
288
+
289
+ ### Example 1: XML Parser with stopNodes
290
+
291
+ ```javascript
292
+ import { XMLParser } from 'fast-xml-parser';
293
+ import { Expression, Matcher } from 'path-expression-matcher';
294
+
295
+ class MyParser {
296
+ constructor() {
297
+ this.matcher = new Matcher();
298
+
299
+ // Pre-compile stop node patterns
300
+ this.stopNodeExpressions = [
301
+ new Expression("html.body.script"),
302
+ new Expression("html.body.style"),
303
+ new Expression("..svg"),
304
+ ];
305
+ }
306
+
307
+ parseTag(tagName, attrs) {
308
+ this.matcher.push(tagName, attrs);
309
+
310
+ // Check if this is a stop node
311
+ for (const expr of this.stopNodeExpressions) {
312
+ if (this.matcher.matches(expr)) {
313
+ // Don't parse children, read as raw text
314
+ return this.readRawContent();
315
+ }
316
+ }
317
+
318
+ // Continue normal parsing
319
+ this.parseChildren();
320
+
321
+ this.matcher.pop();
322
+ }
323
+ }
324
+ ```
325
+
326
+ ### Example 2: Conditional Processing
327
+
328
+ ```javascript
329
+ const matcher = new Matcher();
330
+ const userExpr = new Expression("..user[type=admin]");
331
+ const firstItemExpr = new Expression("..item:first");
332
+
333
+ function processTag(tagName, value, attrs) {
334
+ matcher.push(tagName, attrs);
335
+
336
+ if (matcher.matches(userExpr)) {
337
+ value = enhanceAdminUser(value);
338
+ }
339
+
340
+ if (matcher.matches(firstItemExpr)) {
341
+ value = markAsFirst(value);
342
+ }
343
+
344
+ matcher.pop();
345
+ return value;
346
+ }
347
+ ```
348
+
349
+ ### Example 3: Path-based Filtering
350
+
351
+ ```javascript
352
+ const patterns = [
353
+ new Expression("data.users.user"),
354
+ new Expression("data.posts.post"),
355
+ new Expression("..comment[approved=true]"),
356
+ ];
357
+
358
+ function shouldInclude(matcher) {
359
+ return patterns.some(expr => matcher.matches(expr));
360
+ }
361
+ ```
362
+
363
+ ### Example 4: Custom Separator
364
+
365
+ ```javascript
366
+ const matcher = new Matcher({ separator: '/' });
367
+ const expr = new Expression("root/config/database", { separator: '/' });
368
+
369
+ matcher.push("root");
370
+ matcher.push("config");
371
+ matcher.push("database");
372
+
373
+ console.log(matcher.toString()); // "root/config/database"
374
+ console.log(matcher.matches(expr)); // true
375
+ ```
376
+
377
+ ### Example 5: Attribute Checking
378
+
379
+ ```javascript
380
+ const matcher = new Matcher();
381
+ matcher.push("root");
382
+ matcher.push("user", { id: "123", type: "admin", status: "active" });
383
+
384
+ // Check attribute existence (current node only)
385
+ console.log(matcher.hasAttr("id")); // true
386
+ console.log(matcher.hasAttr("email")); // false
387
+
388
+ // Get attribute value (current node only)
389
+ console.log(matcher.getAttrValue("type")); // "admin"
390
+
391
+ // Match by attribute
392
+ const expr1 = new Expression("user[id]");
393
+ console.log(matcher.matches(expr1)); // true
394
+
395
+ const expr2 = new Expression("user[type=admin]");
396
+ console.log(matcher.matches(expr2)); // true
397
+ ```
398
+
399
+ ### Example 6: Position vs Counter
400
+
401
+ ```javascript
402
+ const matcher = new Matcher();
403
+ matcher.push("root");
404
+
405
+ // Mixed tags at same level
406
+ matcher.push("item"); // position=0, counter=0 (first item)
407
+ matcher.pop();
408
+
409
+ matcher.push("div"); // position=1, counter=0 (first div)
410
+ matcher.pop();
411
+
412
+ matcher.push("item"); // position=2, counter=1 (second item)
413
+
414
+ console.log(matcher.getPosition()); // 2 (third child overall)
415
+ console.log(matcher.getCounter()); // 1 (second "item" specifically)
416
+
417
+ // :first uses counter, not position
418
+ const expr = new Expression("root.item:first");
419
+ console.log(matcher.matches(expr)); // false (counter=1, not 0)
420
+ ```
421
+
422
+ ## 🏗️ Architecture
423
+
424
+ ### Data Storage Strategy
425
+
426
+ **Ancestor nodes:** Store only tag name, position, and counter (minimal memory)
427
+ **Current node:** Store tag name, position, counter, and attribute values
428
+
429
+ This design minimizes memory usage:
430
+ - No attribute names stored (derived from values object when needed)
431
+ - Attribute values only for current node, not ancestors
432
+ - Attribute checking for ancestors is not supported (acceptable trade-off)
433
+ - For 1M nodes with 3 attributes each, saves ~50MB vs storing attribute names
434
+
435
+ ### Matching Strategy
436
+
437
+ Matching is performed **bottom-to-top** (from current node toward root):
438
+ 1. Start at current node
439
+ 2. Match segments from pattern end to start
440
+ 3. Attribute checking only works for current node (ancestors have no attribute data)
441
+ 4. Position selectors use **counter** (occurrence count), not position (child index)
442
+
443
+ ### Performance
444
+
445
+ - **Expression parsing:** One-time cost when Expression is created
446
+ - **Expression analysis:** Cached (hasDeepWildcard, hasAttributeCondition, hasPositionSelector)
447
+ - **Path tracking:** O(1) for push/pop operations
448
+ - **Pattern matching:** O(n*m) where n = path depth, m = pattern segments
449
+ - **Memory per ancestor node:** ~40-60 bytes (tag, position, counter only)
450
+ - **Memory per current node:** ~80-120 bytes (adds attribute values)
451
+
452
+ ## 🎓 Design Patterns
453
+
454
+ ### Pre-compile Patterns (Recommended)
455
+
456
+ ```javascript
457
+ // ✅ GOOD: Parse once, reuse many times
458
+ const expr = new Expression("..user[id]");
459
+
460
+ for (let i = 0; i < 1000; i++) {
461
+ if (matcher.matches(expr)) {
462
+ // ...
463
+ }
464
+ }
465
+ ```
466
+
467
+ ```javascript
468
+ // ❌ BAD: Parse on every iteration
469
+ for (let i = 0; i < 1000; i++) {
470
+ if (matcher.matches(new Expression("..user[id]"))) {
471
+ // ...
472
+ }
473
+ }
474
+ ```
475
+
476
+ ### Batch Pattern Checking
477
+
478
+ ```javascript
479
+ // For multiple patterns, check all at once
480
+ const patterns = [
481
+ new Expression("..user"),
482
+ new Expression("..post"),
483
+ new Expression("..comment"),
484
+ ];
485
+
486
+ function matchesAny(matcher, patterns) {
487
+ return patterns.some(expr => matcher.matches(expr));
488
+ }
489
+ ```
490
+
491
+ ## 🔗 Integration with fast-xml-parser
492
+
493
+ **Basic integration:**
494
+
495
+ ```javascript
496
+ import { XMLParser } from 'fast-xml-parser';
497
+ import { Expression, Matcher } from 'path-expression-matcher';
498
+
499
+ const parser = new XMLParser({
500
+ // Custom options using path-expression-matcher
501
+ stopNodes: ["script", "style"].map(tag => new Expression(`..${tag}`)),
502
+
503
+ tagValueProcessor: (tagName, value, jPath, hasAttrs, isLeaf, matcher) => {
504
+ // matcher is available in callbacks
505
+ if (matcher.matches(new Expression("..user[type=admin]"))) {
506
+ return enhanceValue(value);
507
+ }
508
+ return value;
509
+ }
510
+ });
511
+ ```
512
+
513
+ ## 🧪 Testing
514
+
515
+ ```bash
516
+ npm test
517
+ ```
518
+
519
+ All 77 tests covering:
520
+ - Pattern parsing (exact, wildcards, attributes, position)
521
+ - Path tracking (push, pop, update)
522
+ - Pattern matching (all combinations)
523
+ - Edge cases and error conditions
524
+
525
+ ## 📄 License
526
+
527
+ MIT
528
+
529
+ ## 🤝 Contributing
530
+
531
+ Issues and PRs welcome! This package is designed to be used by XML/JSON parsers like fast-xml-parser.
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "path-expression-matcher",
3
+ "version": "1.0.0",
4
+ "description": "Efficient path tracking and pattern matching for XML/JSON parsers",
5
+ "main": "src/index.js",
6
+ "type": "module",
7
+ "exports": {
8
+ ".": "./src/index.js",
9
+ "./Expression": "./src/Expression.js",
10
+ "./Matcher": "./src/Matcher.js"
11
+ },
12
+ "scripts": {
13
+ "test": "node test/test.js"
14
+ },
15
+ "keywords": [
16
+ "xml",
17
+ "json",
18
+ "yaml",
19
+ "path",
20
+ "matcher",
21
+ "pattern",
22
+ "xpath",
23
+ "selector",
24
+ "parser",
25
+ "fast-xml-parser",
26
+ "fast-xml-builder"
27
+ ],
28
+ "author": "Amit Gupta (https://solothought.com)",
29
+ "license": "MIT",
30
+ "repository": {
31
+ "type": "git",
32
+ "url": "https://github.com/NaturalIntelligence/path-expression-matcher"
33
+ },
34
+ "bugs": {
35
+ "url": "https://github.com/NaturalIntelligence/path-expression-matcher/issues"
36
+ },
37
+ "homepage": "https://github.com/NaturalIntelligence/path-expression-matcher#readme",
38
+ "engines": {
39
+ "node": ">=14.0.0"
40
+ },
41
+ "files": [
42
+ "src/",
43
+ "README.md",
44
+ "LICENSE"
45
+ ],
46
+ "funding": [
47
+ {
48
+ "type": "github",
49
+ "url": "https://github.com/sponsors/NaturalIntelligence"
50
+ }
51
+ ]
52
+ }
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Expression - Parses and stores a tag pattern expression
3
+ *
4
+ * Patterns are parsed once and stored in an optimized structure for fast matching.
5
+ *
6
+ * @example
7
+ * const expr = new Expression("root.users.user");
8
+ * const expr2 = new Expression("..user[id]:first");
9
+ * const expr3 = new Expression("root/users/user", { separator: '/' });
10
+ */
11
+ export default class Expression {
12
+ /**
13
+ * Create a new Expression
14
+ * @param {string} pattern - Pattern string (e.g., "root.users.user", "..user[id]")
15
+ * @param {Object} options - Configuration options
16
+ * @param {string} options.separator - Path separator (default: '.')
17
+ */
18
+ constructor(pattern, options = {}) {
19
+ this.pattern = pattern;
20
+ this.separator = options.separator || '.';
21
+ this.segments = this._parse(pattern);
22
+
23
+ // Cache expensive checks for performance (O(1) instead of O(n))
24
+ this._hasDeepWildcard = this.segments.some(seg => seg.type === 'deep-wildcard');
25
+ this._hasAttributeCondition = this.segments.some(seg => seg.attrName !== undefined);
26
+ this._hasPositionSelector = this.segments.some(seg => seg.position !== undefined);
27
+ }
28
+
29
+ /**
30
+ * Parse pattern string into segments
31
+ * @private
32
+ * @param {string} pattern - Pattern to parse
33
+ * @returns {Array} Array of segment objects
34
+ */
35
+ _parse(pattern) {
36
+ const segments = [];
37
+
38
+ // Split by separator but handle ".." specially
39
+ let i = 0;
40
+ let currentPart = '';
41
+
42
+ while (i < pattern.length) {
43
+ if (pattern[i] === this.separator) {
44
+ // Check if next char is also separator (deep wildcard)
45
+ if (i + 1 < pattern.length && pattern[i + 1] === this.separator) {
46
+ // Flush current part if any
47
+ if (currentPart.trim()) {
48
+ segments.push(this._parseSegment(currentPart.trim()));
49
+ currentPart = '';
50
+ }
51
+ // Add deep wildcard
52
+ segments.push({ type: 'deep-wildcard' });
53
+ i += 2; // Skip both separators
54
+ } else {
55
+ // Regular separator
56
+ if (currentPart.trim()) {
57
+ segments.push(this._parseSegment(currentPart.trim()));
58
+ }
59
+ currentPart = '';
60
+ i++;
61
+ }
62
+ } else {
63
+ currentPart += pattern[i];
64
+ i++;
65
+ }
66
+ }
67
+
68
+ // Flush remaining part
69
+ if (currentPart.trim()) {
70
+ segments.push(this._parseSegment(currentPart.trim()));
71
+ }
72
+
73
+ return segments;
74
+ }
75
+
76
+ /**
77
+ * Parse a single segment
78
+ * @private
79
+ * @param {string} part - Segment string (e.g., "user", "user[id]", "user:first")
80
+ * @returns {Object} Segment object
81
+ */
82
+ _parseSegment(part) {
83
+ const segment = { type: 'tag' };
84
+
85
+ // Match pattern: tagname[attr] or tagname[attr=value] or tagname:position
86
+ // Examples: user, user[id], user[type=admin], user:first, user[id]:first, user:nth(2)
87
+ const match = part.match(/^([^[\]:]+)(?:\[([^\]]+)\])?(?::(\w+(?:\(\d+\))?))?$/);
88
+
89
+ if (!match) {
90
+ throw new Error(`Invalid segment pattern: ${part}`);
91
+ }
92
+
93
+ segment.tag = match[1].trim();
94
+
95
+ // Parse attribute condition [attr] or [attr=value]
96
+ if (match[2]) {
97
+ const attrExpr = match[2];
98
+
99
+ if (attrExpr.includes('=')) {
100
+ const eqIndex = attrExpr.indexOf('=');
101
+ const attrName = attrExpr.substring(0, eqIndex).trim();
102
+ const attrValue = attrExpr.substring(eqIndex + 1).trim();
103
+
104
+ segment.attrName = attrName;
105
+ segment.attrValue = attrValue;
106
+ } else {
107
+ segment.attrName = attrExpr.trim();
108
+ }
109
+ }
110
+
111
+ // Parse position selector :first, :nth(n), :odd, :even
112
+ if (match[3]) {
113
+ const posExpr = match[3];
114
+
115
+ // Check for :nth(n) pattern
116
+ const nthMatch = posExpr.match(/^nth\((\d+)\)$/);
117
+ if (nthMatch) {
118
+ segment.position = 'nth';
119
+ segment.positionValue = parseInt(nthMatch[1], 10);
120
+ } else if (['first', 'odd', 'even'].includes(posExpr)) {
121
+ segment.position = posExpr;
122
+ } else {
123
+ throw new Error(`Invalid position selector: :${posExpr}`);
124
+ }
125
+ }
126
+
127
+ return segment;
128
+ }
129
+
130
+ /**
131
+ * Get the number of segments
132
+ * @returns {number}
133
+ */
134
+ get length() {
135
+ return this.segments.length;
136
+ }
137
+
138
+ /**
139
+ * Check if expression contains deep wildcard
140
+ * @returns {boolean}
141
+ */
142
+ hasDeepWildcard() {
143
+ return this._hasDeepWildcard;
144
+ }
145
+
146
+ /**
147
+ * Check if expression has attribute conditions
148
+ * @returns {boolean}
149
+ */
150
+ hasAttributeCondition() {
151
+ return this._hasAttributeCondition;
152
+ }
153
+
154
+ /**
155
+ * Check if expression has position selectors
156
+ * @returns {boolean}
157
+ */
158
+ hasPositionSelector() {
159
+ return this._hasPositionSelector;
160
+ }
161
+
162
+ /**
163
+ * Get string representation
164
+ * @returns {string}
165
+ */
166
+ toString() {
167
+ return this.pattern;
168
+ }
169
+ }
package/src/Matcher.js ADDED
@@ -0,0 +1,380 @@
1
+ /**
2
+ * Matcher - Tracks current path in XML/JSON tree and matches against Expressions
3
+ *
4
+ * The matcher maintains a stack of nodes representing the current path from root to
5
+ * current tag. It only stores attribute values for the current (top) node to minimize
6
+ * memory usage. Sibling tracking is used to auto-calculate position and counter.
7
+ *
8
+ * @example
9
+ * const matcher = new Matcher();
10
+ * matcher.push("root", {});
11
+ * matcher.push("users", {});
12
+ * matcher.push("user", { id: "123", type: "admin" });
13
+ *
14
+ * const expr = new Expression("root.users.user");
15
+ * matcher.matches(expr); // true
16
+ */
17
+ export default class Matcher {
18
+ /**
19
+ * Create a new Matcher
20
+ * @param {Object} options - Configuration options
21
+ * @param {string} options.separator - Default path separator (default: '.')
22
+ */
23
+ constructor(options = {}) {
24
+ this.separator = options.separator || '.';
25
+ this.path = [];
26
+ this.siblingStacks = [];
27
+ // Each path node: { tag: string, values: object, position: number, counter: number }
28
+ // values only present for current (last) node
29
+ // Each siblingStacks entry: Map<tagName, count> tracking occurrences at each level
30
+ }
31
+
32
+ /**
33
+ * Push a new tag onto the path
34
+ * @param {string} tagName - Name of the tag
35
+ * @param {Object} attrValues - Attribute key-value pairs for current node (optional)
36
+ */
37
+ push(tagName, attrValues = null) {
38
+ // Remove values from previous current node (now becoming ancestor)
39
+ if (this.path.length > 0) {
40
+ const prev = this.path[this.path.length - 1];
41
+ prev.values = undefined;
42
+ }
43
+
44
+ // Get or create sibling tracking for current level
45
+ const currentLevel = this.path.length;
46
+ if (!this.siblingStacks[currentLevel]) {
47
+ this.siblingStacks[currentLevel] = new Map();
48
+ }
49
+
50
+ const siblings = this.siblingStacks[currentLevel];
51
+
52
+ // Calculate counter (how many times this tag appeared at this level)
53
+ const counter = siblings.get(tagName) || 0;
54
+
55
+ // Calculate position (total children at this level so far)
56
+ let position = 0;
57
+ for (const count of siblings.values()) {
58
+ position += count;
59
+ }
60
+
61
+ // Update sibling count for this tag
62
+ siblings.set(tagName, counter + 1);
63
+
64
+ // Create new node
65
+ const node = {
66
+ tag: tagName,
67
+ position: position,
68
+ counter: counter
69
+ };
70
+
71
+ // Store values only for current node
72
+ if (attrValues !== null && attrValues !== undefined) {
73
+ node.values = attrValues;
74
+ }
75
+
76
+ this.path.push(node);
77
+ }
78
+
79
+ /**
80
+ * Pop the last tag from the path
81
+ * @returns {Object|undefined} The popped node
82
+ */
83
+ pop() {
84
+ if (this.path.length === 0) {
85
+ return undefined;
86
+ }
87
+
88
+ const node = this.path.pop();
89
+
90
+ // Clean up sibling tracking for this level
91
+ if (this.siblingStacks[this.path.length]) {
92
+ delete this.siblingStacks[this.path.length];
93
+ }
94
+
95
+ return node;
96
+ }
97
+
98
+ /**
99
+ * Update current node's attribute values
100
+ * Useful when attributes are parsed after push
101
+ * @param {Object} attrValues - Attribute values
102
+ */
103
+ updateCurrent(attrValues) {
104
+ if (this.path.length > 0) {
105
+ const current = this.path[this.path.length - 1];
106
+ if (attrValues !== null && attrValues !== undefined) {
107
+ current.values = attrValues;
108
+ }
109
+ }
110
+ }
111
+
112
+ /**
113
+ * Get current tag name
114
+ * @returns {string|undefined}
115
+ */
116
+ getCurrentTag() {
117
+ return this.path.length > 0 ? this.path[this.path.length - 1].tag : undefined;
118
+ }
119
+
120
+ /**
121
+ * Get current node's attribute value
122
+ * @param {string} attrName - Attribute name
123
+ * @returns {*} Attribute value or undefined
124
+ */
125
+ getAttrValue(attrName) {
126
+ if (this.path.length === 0) return undefined;
127
+ const current = this.path[this.path.length - 1];
128
+ return current.values?.[attrName];
129
+ }
130
+
131
+ /**
132
+ * Check if current node has an attribute
133
+ * @param {string} attrName - Attribute name
134
+ * @returns {boolean}
135
+ */
136
+ hasAttr(attrName) {
137
+ if (this.path.length === 0) return false;
138
+ const current = this.path[this.path.length - 1];
139
+ return current.values !== undefined && attrName in current.values;
140
+ }
141
+
142
+ /**
143
+ * Get current node's sibling position (child index in parent)
144
+ * @returns {number}
145
+ */
146
+ getPosition() {
147
+ if (this.path.length === 0) return -1;
148
+ return this.path[this.path.length - 1].position ?? 0;
149
+ }
150
+
151
+ /**
152
+ * Get current node's repeat counter (occurrence count of this tag name)
153
+ * @returns {number}
154
+ */
155
+ getCounter() {
156
+ if (this.path.length === 0) return -1;
157
+ return this.path[this.path.length - 1].counter ?? 0;
158
+ }
159
+
160
+ /**
161
+ * Get current node's sibling index (alias for getPosition for backward compatibility)
162
+ * @returns {number}
163
+ * @deprecated Use getPosition() or getCounter() instead
164
+ */
165
+ getIndex() {
166
+ return this.getPosition();
167
+ }
168
+
169
+ /**
170
+ * Get current path depth
171
+ * @returns {number}
172
+ */
173
+ getDepth() {
174
+ return this.path.length;
175
+ }
176
+
177
+ /**
178
+ * Get path as string
179
+ * @param {string} separator - Optional separator (uses default if not provided)
180
+ * @returns {string}
181
+ */
182
+ toString(separator) {
183
+ const sep = separator || this.separator;
184
+ return this.path.map(n => n.tag).join(sep);
185
+ }
186
+
187
+ /**
188
+ * Get path as array of tag names
189
+ * @returns {string[]}
190
+ */
191
+ toArray() {
192
+ return this.path.map(n => n.tag);
193
+ }
194
+
195
+ /**
196
+ * Reset the path to empty
197
+ */
198
+ reset() {
199
+ this.path = [];
200
+ this.siblingStacks = [];
201
+ }
202
+
203
+ /**
204
+ * Match current path against an Expression
205
+ * @param {Expression} expression - The expression to match against
206
+ * @returns {boolean} True if current path matches the expression
207
+ */
208
+ matches(expression) {
209
+ const segments = expression.segments;
210
+
211
+ if (segments.length === 0) {
212
+ return false;
213
+ }
214
+
215
+ // Handle deep wildcard patterns
216
+ if (expression.hasDeepWildcard()) {
217
+ return this._matchWithDeepWildcard(segments);
218
+ }
219
+
220
+ // Simple path matching (no deep wildcards)
221
+ return this._matchSimple(segments);
222
+ }
223
+
224
+ /**
225
+ * Match simple path (no deep wildcards)
226
+ * @private
227
+ */
228
+ _matchSimple(segments) {
229
+ // Path must be same length as segments
230
+ if (this.path.length !== segments.length) {
231
+ return false;
232
+ }
233
+
234
+ // Match each segment bottom-to-top
235
+ for (let i = 0; i < segments.length; i++) {
236
+ const segment = segments[i];
237
+ const node = this.path[i];
238
+ const isCurrentNode = (i === this.path.length - 1);
239
+
240
+ if (!this._matchSegment(segment, node, isCurrentNode)) {
241
+ return false;
242
+ }
243
+ }
244
+
245
+ return true;
246
+ }
247
+
248
+ /**
249
+ * Match path with deep wildcards
250
+ * @private
251
+ */
252
+ _matchWithDeepWildcard(segments) {
253
+ let pathIdx = this.path.length - 1; // Start from current node (bottom)
254
+ let segIdx = segments.length - 1; // Start from last segment
255
+
256
+ while (segIdx >= 0 && pathIdx >= 0) {
257
+ const segment = segments[segIdx];
258
+
259
+ if (segment.type === 'deep-wildcard') {
260
+ // ".." matches zero or more levels
261
+ segIdx--;
262
+
263
+ if (segIdx < 0) {
264
+ // Pattern ends with "..", always matches
265
+ return true;
266
+ }
267
+
268
+ // Find where next segment matches in the path
269
+ const nextSeg = segments[segIdx];
270
+ let found = false;
271
+
272
+ for (let i = pathIdx; i >= 0; i--) {
273
+ const isCurrentNode = (i === this.path.length - 1);
274
+ if (this._matchSegment(nextSeg, this.path[i], isCurrentNode)) {
275
+ pathIdx = i - 1;
276
+ segIdx--;
277
+ found = true;
278
+ break;
279
+ }
280
+ }
281
+
282
+ if (!found) {
283
+ return false;
284
+ }
285
+ } else {
286
+ // Regular segment
287
+ const isCurrentNode = (pathIdx === this.path.length - 1);
288
+ if (!this._matchSegment(segment, this.path[pathIdx], isCurrentNode)) {
289
+ return false;
290
+ }
291
+ pathIdx--;
292
+ segIdx--;
293
+ }
294
+ }
295
+
296
+ // All segments must be consumed
297
+ return segIdx < 0;
298
+ }
299
+
300
+ /**
301
+ * Match a single segment against a node
302
+ * @private
303
+ * @param {Object} segment - Segment from Expression
304
+ * @param {Object} node - Node from path
305
+ * @param {boolean} isCurrentNode - Whether this is the current (last) node
306
+ * @returns {boolean}
307
+ */
308
+ _matchSegment(segment, node, isCurrentNode) {
309
+ // Match tag name (* is wildcard)
310
+ if (segment.tag !== '*' && segment.tag !== node.tag) {
311
+ return false;
312
+ }
313
+
314
+ // Match attribute name (check if node has this attribute)
315
+ // Can only check for current node since ancestors don't have values
316
+ if (segment.attrName !== undefined) {
317
+ if (!isCurrentNode) {
318
+ // Can't check attributes for ancestor nodes (values not stored)
319
+ return false;
320
+ }
321
+
322
+ if (!node.values || !(segment.attrName in node.values)) {
323
+ return false;
324
+ }
325
+
326
+ // Match attribute value (only possible for current node)
327
+ if (segment.attrValue !== undefined) {
328
+ const actualValue = node.values[segment.attrName];
329
+ // Both should be strings
330
+ if (String(actualValue) !== String(segment.attrValue)) {
331
+ return false;
332
+ }
333
+ }
334
+ }
335
+
336
+ // Match position (only for current node)
337
+ if (segment.position !== undefined) {
338
+ if (!isCurrentNode) {
339
+ // Can't check position for ancestor nodes
340
+ return false;
341
+ }
342
+
343
+ const counter = node.counter ?? 0;
344
+
345
+ if (segment.position === 'first' && counter !== 0) {
346
+ return false;
347
+ } else if (segment.position === 'odd' && counter % 2 !== 1) {
348
+ return false;
349
+ } else if (segment.position === 'even' && counter % 2 !== 0) {
350
+ return false;
351
+ } else if (segment.position === 'nth') {
352
+ if (counter !== segment.positionValue) {
353
+ return false;
354
+ }
355
+ }
356
+ }
357
+
358
+ return true;
359
+ }
360
+
361
+ /**
362
+ * Create a snapshot of current state
363
+ * @returns {Object} State snapshot
364
+ */
365
+ snapshot() {
366
+ return {
367
+ path: this.path.map(node => ({ ...node })),
368
+ siblingStacks: this.siblingStacks.map(map => new Map(map))
369
+ };
370
+ }
371
+
372
+ /**
373
+ * Restore state from snapshot
374
+ * @param {Object} snapshot - State snapshot
375
+ */
376
+ restore(snapshot) {
377
+ this.path = snapshot.path.map(node => ({ ...node }));
378
+ this.siblingStacks = snapshot.siblingStacks.map(map => new Map(map));
379
+ }
380
+ }
package/src/index.js ADDED
@@ -0,0 +1,28 @@
1
+ /**
2
+ * fast-xml-tagger - XML/JSON path matching library
3
+ *
4
+ * Provides efficient path tracking and pattern matching for XML/JSON parsers.
5
+ *
6
+ * @example
7
+ * import { Expression, Matcher } from 'fast-xml-tagger';
8
+ *
9
+ * // Create expression (parse once)
10
+ * const expr = new Expression("root.users.user[id]");
11
+ *
12
+ * // Create matcher (track path)
13
+ * const matcher = new Matcher();
14
+ * matcher.push("root", [], {}, 0);
15
+ * matcher.push("users", [], {}, 0);
16
+ * matcher.push("user", ["id", "type"], { id: "123", type: "admin" }, 0);
17
+ *
18
+ * // Match
19
+ * if (matcher.matches(expr)) {
20
+ * console.log("Match found!");
21
+ * }
22
+ */
23
+
24
+ import Expression from './Expression.js';
25
+ import Matcher from './Matcher.js';
26
+
27
+ export { Expression, Matcher };
28
+ export default { Expression, Matcher };