path-expression-matcher 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +531 -0
- package/package.json +52 -0
- package/src/Expression.js +169 -0
- package/src/Matcher.js +380 -0
- package/src/index.js +28 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
# path-expression-matcher
|
|
2
|
+
|
|
3
|
+
Efficient path tracking and pattern matching for XML, JSON, YAML or any other parsers.
|
|
4
|
+
|
|
5
|
+
## 🎯 Purpose
|
|
6
|
+
|
|
7
|
+
`path-expression-matcher` provides two core classes for tracking and matching paths:
|
|
8
|
+
|
|
9
|
+
- **`Expression`**: Parses and stores pattern expressions (e.g., `"root.users.user[id]"`)
|
|
10
|
+
- **`Matcher`**: Tracks current path during parsing and matches against expressions
|
|
11
|
+
|
|
12
|
+
Compatible with [fast-xml-parser](https://github.com/NaturalIntelligence/fast-xml-parser) and similar tools.
|
|
13
|
+
|
|
14
|
+
## 📦 Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install path-expression-matcher
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## 🚀 Quick Start
|
|
21
|
+
|
|
22
|
+
```javascript
|
|
23
|
+
import { Expression, Matcher } from 'path-expression-matcher';
|
|
24
|
+
|
|
25
|
+
// Create expression (parse once, reuse many times)
|
|
26
|
+
const expr = new Expression("root.users.user");
|
|
27
|
+
|
|
28
|
+
// Create matcher (tracks current path)
|
|
29
|
+
const matcher = new Matcher();
|
|
30
|
+
|
|
31
|
+
matcher.push("root");
|
|
32
|
+
matcher.push("users");
|
|
33
|
+
matcher.push("user", { id: "123" });
|
|
34
|
+
|
|
35
|
+
// Match current path against expression
|
|
36
|
+
if (matcher.matches(expr)) {
|
|
37
|
+
console.log("Match found!");
|
|
38
|
+
console.log("Current path:", matcher.toString()); // "root.users.user"
|
|
39
|
+
}
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## 📖 Pattern Syntax
|
|
43
|
+
|
|
44
|
+
### Basic Paths
|
|
45
|
+
|
|
46
|
+
```javascript
|
|
47
|
+
"root.users.user" // Exact path match
|
|
48
|
+
"*.users.user" // Wildcard: any parent
|
|
49
|
+
"root.*.user" // Wildcard: any middle
|
|
50
|
+
"root.users.*" // Wildcard: any child
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Deep Wildcard
|
|
54
|
+
|
|
55
|
+
```javascript
|
|
56
|
+
"..user" // user anywhere in tree
|
|
57
|
+
"root..user" // user anywhere under root
|
|
58
|
+
"..users..user" // users somewhere, then user below it
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Attribute Matching
|
|
62
|
+
|
|
63
|
+
```javascript
|
|
64
|
+
"user[id]" // user with "id" attribute
|
|
65
|
+
"user[type=admin]" // user with type="admin" (current node only)
|
|
66
|
+
"root[lang]..user" // user under root that has "lang" attribute
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Position Selectors
|
|
70
|
+
|
|
71
|
+
```javascript
|
|
72
|
+
"user:first" // First user (counter=0)
|
|
73
|
+
"user:nth(2)" // Third user (counter=2, zero-based)
|
|
74
|
+
"user:odd" // Odd-numbered users (counter=1,3,5...)
|
|
75
|
+
"user:even" // Even-numbered users (counter=0,2,4...)
|
|
76
|
+
"root.users.user:first" // First user under users
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
**Note:** Position selectors use the **counter** (occurrence count of the tag name), not the position (child index). For example, in `<root><a/><b/><a/></root>`, the second `<a/>` has position=2 but counter=1.
|
|
80
|
+
|
|
81
|
+
### Combined Patterns
|
|
82
|
+
|
|
83
|
+
```javascript
|
|
84
|
+
"..user[id]:first" // First user with id, anywhere
|
|
85
|
+
"root..user[type=admin]" // Admin user under root
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 🔧 API Reference
|
|
89
|
+
|
|
90
|
+
### Expression
|
|
91
|
+
|
|
92
|
+
#### Constructor
|
|
93
|
+
|
|
94
|
+
```javascript
|
|
95
|
+
new Expression(pattern, options)
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Parameters:**
|
|
99
|
+
- `pattern` (string): Pattern to parse
|
|
100
|
+
- `options.separator` (string): Path separator (default: `'.'`)
|
|
101
|
+
|
|
102
|
+
**Example:**
|
|
103
|
+
```javascript
|
|
104
|
+
const expr1 = new Expression("root.users.user");
|
|
105
|
+
const expr2 = new Expression("root/users/user", { separator: '/' });
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
#### Methods
|
|
109
|
+
|
|
110
|
+
- `hasDeepWildcard()` → boolean
|
|
111
|
+
- `hasAttributeCondition()` → boolean
|
|
112
|
+
- `hasPositionSelector()` → boolean
|
|
113
|
+
- `toString()` → string
|
|
114
|
+
|
|
115
|
+
### Matcher
|
|
116
|
+
|
|
117
|
+
#### Constructor
|
|
118
|
+
|
|
119
|
+
```javascript
|
|
120
|
+
new Matcher(options)
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
**Parameters:**
|
|
124
|
+
- `options.separator` (string): Default path separator (default: `'.'`)
|
|
125
|
+
|
|
126
|
+
#### Path Tracking Methods
|
|
127
|
+
|
|
128
|
+
##### `push(tagName, attrValues)`
|
|
129
|
+
|
|
130
|
+
Add a tag to the current path. Position and counter are automatically calculated.
|
|
131
|
+
|
|
132
|
+
**Parameters:**
|
|
133
|
+
- `tagName` (string): Tag name
|
|
134
|
+
- `attrValues` (object, optional): Attribute key-value pairs (current node only)
|
|
135
|
+
|
|
136
|
+
**Example:**
|
|
137
|
+
```javascript
|
|
138
|
+
matcher.push("user", { id: "123", type: "admin" });
|
|
139
|
+
matcher.push("item"); // No attributes
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Position vs Counter:**
|
|
143
|
+
- **Position**: The child index in the parent (0, 1, 2, 3...)
|
|
144
|
+
- **Counter**: How many times this tag name appeared at this level (0, 1, 2...)
|
|
145
|
+
|
|
146
|
+
Example:
|
|
147
|
+
```xml
|
|
148
|
+
<root>
|
|
149
|
+
<a/> <!-- position=0, counter=0 -->
|
|
150
|
+
<b/> <!-- position=1, counter=0 -->
|
|
151
|
+
<a/> <!-- position=2, counter=1 -->
|
|
152
|
+
</root>
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
##### `pop()`
|
|
156
|
+
|
|
157
|
+
Remove the last tag from the path.
|
|
158
|
+
|
|
159
|
+
```javascript
|
|
160
|
+
matcher.pop();
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
##### `updateCurrent(attrValues)`
|
|
164
|
+
|
|
165
|
+
Update current node's attributes (useful when attributes are parsed after push).
|
|
166
|
+
|
|
167
|
+
```javascript
|
|
168
|
+
matcher.push("user"); // Don't know values yet
|
|
169
|
+
// ... parse attributes ...
|
|
170
|
+
matcher.updateCurrent({ id: "123" });
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
##### `reset()`
|
|
174
|
+
|
|
175
|
+
Clear the entire path.
|
|
176
|
+
|
|
177
|
+
```javascript
|
|
178
|
+
matcher.reset();
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
#### Query Methods
|
|
182
|
+
|
|
183
|
+
##### `matches(expression)`
|
|
184
|
+
|
|
185
|
+
Check if current path matches an Expression.
|
|
186
|
+
|
|
187
|
+
```javascript
|
|
188
|
+
const expr = new Expression("root.users.user");
|
|
189
|
+
if (matcher.matches(expr)) {
|
|
190
|
+
// Current path matches
|
|
191
|
+
}
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
##### `getCurrentTag()`
|
|
195
|
+
|
|
196
|
+
Get current tag name.
|
|
197
|
+
|
|
198
|
+
```javascript
|
|
199
|
+
const tag = matcher.getCurrentTag(); // "user"
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
##### `getAttrValue(attrName)`
|
|
203
|
+
|
|
204
|
+
Get attribute value of current node.
|
|
205
|
+
|
|
206
|
+
```javascript
|
|
207
|
+
const id = matcher.getAttrValue("id"); // "123"
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
##### `hasAttr(attrName)`
|
|
211
|
+
|
|
212
|
+
Check if current node has an attribute.
|
|
213
|
+
|
|
214
|
+
```javascript
|
|
215
|
+
if (matcher.hasAttr("id")) {
|
|
216
|
+
// Current node has "id" attribute
|
|
217
|
+
}
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
##### `getPosition()`
|
|
221
|
+
|
|
222
|
+
Get sibling position of current node (child index in parent).
|
|
223
|
+
|
|
224
|
+
```javascript
|
|
225
|
+
const position = matcher.getPosition(); // 0, 1, 2, ...
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
##### `getCounter()`
|
|
229
|
+
|
|
230
|
+
Get repeat counter of current node (occurrence count of this tag name).
|
|
231
|
+
|
|
232
|
+
```javascript
|
|
233
|
+
const counter = matcher.getCounter(); // 0, 1, 2, ...
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
##### `getIndex()` (deprecated)
|
|
237
|
+
|
|
238
|
+
Alias for `getPosition()`. Use `getPosition()` or `getCounter()` instead for clarity.
|
|
239
|
+
|
|
240
|
+
```javascript
|
|
241
|
+
const index = matcher.getIndex(); // Same as getPosition()
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
##### `getDepth()`
|
|
245
|
+
|
|
246
|
+
Get current path depth.
|
|
247
|
+
|
|
248
|
+
```javascript
|
|
249
|
+
const depth = matcher.getDepth(); // 3 for "root.users.user"
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
##### `toString(separator?)`
|
|
253
|
+
|
|
254
|
+
Get path as string.
|
|
255
|
+
|
|
256
|
+
```javascript
|
|
257
|
+
const path = matcher.toString(); // "root.users.user"
|
|
258
|
+
const path2 = matcher.toString('/'); // "root/users/user"
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
##### `toArray()`
|
|
262
|
+
|
|
263
|
+
Get path as array.
|
|
264
|
+
|
|
265
|
+
```javascript
|
|
266
|
+
const arr = matcher.toArray(); // ["root", "users", "user"]
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
#### State Management
|
|
270
|
+
|
|
271
|
+
##### `snapshot()`
|
|
272
|
+
|
|
273
|
+
Create a snapshot of current state.
|
|
274
|
+
|
|
275
|
+
```javascript
|
|
276
|
+
const snapshot = matcher.snapshot();
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
##### `restore(snapshot)`
|
|
280
|
+
|
|
281
|
+
Restore from a snapshot.
|
|
282
|
+
|
|
283
|
+
```javascript
|
|
284
|
+
matcher.restore(snapshot);
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
## 💡 Usage Examples
|
|
288
|
+
|
|
289
|
+
### Example 1: XML Parser with stopNodes
|
|
290
|
+
|
|
291
|
+
```javascript
|
|
292
|
+
import { XMLParser } from 'fast-xml-parser';
|
|
293
|
+
import { Expression, Matcher } from 'path-expression-matcher';
|
|
294
|
+
|
|
295
|
+
class MyParser {
|
|
296
|
+
constructor() {
|
|
297
|
+
this.matcher = new Matcher();
|
|
298
|
+
|
|
299
|
+
// Pre-compile stop node patterns
|
|
300
|
+
this.stopNodeExpressions = [
|
|
301
|
+
new Expression("html.body.script"),
|
|
302
|
+
new Expression("html.body.style"),
|
|
303
|
+
new Expression("..svg"),
|
|
304
|
+
];
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
parseTag(tagName, attrs) {
|
|
308
|
+
this.matcher.push(tagName, attrs);
|
|
309
|
+
|
|
310
|
+
// Check if this is a stop node
|
|
311
|
+
for (const expr of this.stopNodeExpressions) {
|
|
312
|
+
if (this.matcher.matches(expr)) {
|
|
313
|
+
// Don't parse children, read as raw text
|
|
314
|
+
return this.readRawContent();
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// Continue normal parsing
|
|
319
|
+
this.parseChildren();
|
|
320
|
+
|
|
321
|
+
this.matcher.pop();
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Example 2: Conditional Processing
|
|
327
|
+
|
|
328
|
+
```javascript
|
|
329
|
+
const matcher = new Matcher();
|
|
330
|
+
const userExpr = new Expression("..user[type=admin]");
|
|
331
|
+
const firstItemExpr = new Expression("..item:first");
|
|
332
|
+
|
|
333
|
+
function processTag(tagName, value, attrs) {
|
|
334
|
+
matcher.push(tagName, attrs);
|
|
335
|
+
|
|
336
|
+
if (matcher.matches(userExpr)) {
|
|
337
|
+
value = enhanceAdminUser(value);
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (matcher.matches(firstItemExpr)) {
|
|
341
|
+
value = markAsFirst(value);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
matcher.pop();
|
|
345
|
+
return value;
|
|
346
|
+
}
|
|
347
|
+
```
|
|
348
|
+
|
|
349
|
+
### Example 3: Path-based Filtering
|
|
350
|
+
|
|
351
|
+
```javascript
|
|
352
|
+
const patterns = [
|
|
353
|
+
new Expression("data.users.user"),
|
|
354
|
+
new Expression("data.posts.post"),
|
|
355
|
+
new Expression("..comment[approved=true]"),
|
|
356
|
+
];
|
|
357
|
+
|
|
358
|
+
function shouldInclude(matcher) {
|
|
359
|
+
return patterns.some(expr => matcher.matches(expr));
|
|
360
|
+
}
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### Example 4: Custom Separator
|
|
364
|
+
|
|
365
|
+
```javascript
|
|
366
|
+
const matcher = new Matcher({ separator: '/' });
|
|
367
|
+
const expr = new Expression("root/config/database", { separator: '/' });
|
|
368
|
+
|
|
369
|
+
matcher.push("root");
|
|
370
|
+
matcher.push("config");
|
|
371
|
+
matcher.push("database");
|
|
372
|
+
|
|
373
|
+
console.log(matcher.toString()); // "root/config/database"
|
|
374
|
+
console.log(matcher.matches(expr)); // true
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Example 5: Attribute Checking
|
|
378
|
+
|
|
379
|
+
```javascript
|
|
380
|
+
const matcher = new Matcher();
|
|
381
|
+
matcher.push("root");
|
|
382
|
+
matcher.push("user", { id: "123", type: "admin", status: "active" });
|
|
383
|
+
|
|
384
|
+
// Check attribute existence (current node only)
|
|
385
|
+
console.log(matcher.hasAttr("id")); // true
|
|
386
|
+
console.log(matcher.hasAttr("email")); // false
|
|
387
|
+
|
|
388
|
+
// Get attribute value (current node only)
|
|
389
|
+
console.log(matcher.getAttrValue("type")); // "admin"
|
|
390
|
+
|
|
391
|
+
// Match by attribute
|
|
392
|
+
const expr1 = new Expression("user[id]");
|
|
393
|
+
console.log(matcher.matches(expr1)); // true
|
|
394
|
+
|
|
395
|
+
const expr2 = new Expression("user[type=admin]");
|
|
396
|
+
console.log(matcher.matches(expr2)); // true
|
|
397
|
+
```
|
|
398
|
+
|
|
399
|
+
### Example 6: Position vs Counter
|
|
400
|
+
|
|
401
|
+
```javascript
|
|
402
|
+
const matcher = new Matcher();
|
|
403
|
+
matcher.push("root");
|
|
404
|
+
|
|
405
|
+
// Mixed tags at same level
|
|
406
|
+
matcher.push("item"); // position=0, counter=0 (first item)
|
|
407
|
+
matcher.pop();
|
|
408
|
+
|
|
409
|
+
matcher.push("div"); // position=1, counter=0 (first div)
|
|
410
|
+
matcher.pop();
|
|
411
|
+
|
|
412
|
+
matcher.push("item"); // position=2, counter=1 (second item)
|
|
413
|
+
|
|
414
|
+
console.log(matcher.getPosition()); // 2 (third child overall)
|
|
415
|
+
console.log(matcher.getCounter()); // 1 (second "item" specifically)
|
|
416
|
+
|
|
417
|
+
// :first uses counter, not position
|
|
418
|
+
const expr = new Expression("root.item:first");
|
|
419
|
+
console.log(matcher.matches(expr)); // false (counter=1, not 0)
|
|
420
|
+
```
|
|
421
|
+
|
|
422
|
+
## 🏗️ Architecture
|
|
423
|
+
|
|
424
|
+
### Data Storage Strategy
|
|
425
|
+
|
|
426
|
+
**Ancestor nodes:** Store only tag name, position, and counter (minimal memory)
|
|
427
|
+
**Current node:** Store tag name, position, counter, and attribute values
|
|
428
|
+
|
|
429
|
+
This design minimizes memory usage:
|
|
430
|
+
- No attribute names stored (derived from values object when needed)
|
|
431
|
+
- Attribute values only for current node, not ancestors
|
|
432
|
+
- Attribute checking for ancestors is not supported (acceptable trade-off)
|
|
433
|
+
- For 1M nodes with 3 attributes each, saves ~50MB vs storing attribute names
|
|
434
|
+
|
|
435
|
+
### Matching Strategy
|
|
436
|
+
|
|
437
|
+
Matching is performed **bottom-to-top** (from current node toward root):
|
|
438
|
+
1. Start at current node
|
|
439
|
+
2. Match segments from pattern end to start
|
|
440
|
+
3. Attribute checking only works for current node (ancestors have no attribute data)
|
|
441
|
+
4. Position selectors use **counter** (occurrence count), not position (child index)
|
|
442
|
+
|
|
443
|
+
### Performance
|
|
444
|
+
|
|
445
|
+
- **Expression parsing:** One-time cost when Expression is created
|
|
446
|
+
- **Expression analysis:** Cached (hasDeepWildcard, hasAttributeCondition, hasPositionSelector)
|
|
447
|
+
- **Path tracking:** O(1) for push/pop operations
|
|
448
|
+
- **Pattern matching:** O(n*m) where n = path depth, m = pattern segments
|
|
449
|
+
- **Memory per ancestor node:** ~40-60 bytes (tag, position, counter only)
|
|
450
|
+
- **Memory per current node:** ~80-120 bytes (adds attribute values)
|
|
451
|
+
|
|
452
|
+
## 🎓 Design Patterns
|
|
453
|
+
|
|
454
|
+
### Pre-compile Patterns (Recommended)
|
|
455
|
+
|
|
456
|
+
```javascript
|
|
457
|
+
// ✅ GOOD: Parse once, reuse many times
|
|
458
|
+
const expr = new Expression("..user[id]");
|
|
459
|
+
|
|
460
|
+
for (let i = 0; i < 1000; i++) {
|
|
461
|
+
if (matcher.matches(expr)) {
|
|
462
|
+
// ...
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
```javascript
|
|
468
|
+
// ❌ BAD: Parse on every iteration
|
|
469
|
+
for (let i = 0; i < 1000; i++) {
|
|
470
|
+
if (matcher.matches(new Expression("..user[id]"))) {
|
|
471
|
+
// ...
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
```
|
|
475
|
+
|
|
476
|
+
### Batch Pattern Checking
|
|
477
|
+
|
|
478
|
+
```javascript
|
|
479
|
+
// For multiple patterns, check all at once
|
|
480
|
+
const patterns = [
|
|
481
|
+
new Expression("..user"),
|
|
482
|
+
new Expression("..post"),
|
|
483
|
+
new Expression("..comment"),
|
|
484
|
+
];
|
|
485
|
+
|
|
486
|
+
function matchesAny(matcher, patterns) {
|
|
487
|
+
return patterns.some(expr => matcher.matches(expr));
|
|
488
|
+
}
|
|
489
|
+
```
|
|
490
|
+
|
|
491
|
+
## 🔗 Integration with fast-xml-parser
|
|
492
|
+
|
|
493
|
+
**Basic integration:**
|
|
494
|
+
|
|
495
|
+
```javascript
|
|
496
|
+
import { XMLParser } from 'fast-xml-parser';
|
|
497
|
+
import { Expression, Matcher } from 'path-expression-matcher';
|
|
498
|
+
|
|
499
|
+
const parser = new XMLParser({
|
|
500
|
+
// Custom options using path-expression-matcher
|
|
501
|
+
stopNodes: ["script", "style"].map(tag => new Expression(`..${tag}`)),
|
|
502
|
+
|
|
503
|
+
tagValueProcessor: (tagName, value, jPath, hasAttrs, isLeaf, matcher) => {
|
|
504
|
+
// matcher is available in callbacks
|
|
505
|
+
if (matcher.matches(new Expression("..user[type=admin]"))) {
|
|
506
|
+
return enhanceValue(value);
|
|
507
|
+
}
|
|
508
|
+
return value;
|
|
509
|
+
}
|
|
510
|
+
});
|
|
511
|
+
```
|
|
512
|
+
|
|
513
|
+
## 🧪 Testing
|
|
514
|
+
|
|
515
|
+
```bash
|
|
516
|
+
npm test
|
|
517
|
+
```
|
|
518
|
+
|
|
519
|
+
All 77 tests covering:
|
|
520
|
+
- Pattern parsing (exact, wildcards, attributes, position)
|
|
521
|
+
- Path tracking (push, pop, update)
|
|
522
|
+
- Pattern matching (all combinations)
|
|
523
|
+
- Edge cases and error conditions
|
|
524
|
+
|
|
525
|
+
## 📄 License
|
|
526
|
+
|
|
527
|
+
MIT
|
|
528
|
+
|
|
529
|
+
## 🤝 Contributing
|
|
530
|
+
|
|
531
|
+
Issues and PRs welcome! This package is designed to be used by XML/JSON parsers like fast-xml-parser.
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "path-expression-matcher",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Efficient path tracking and pattern matching for XML/JSON parsers",
|
|
5
|
+
"main": "src/index.js",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.js",
|
|
9
|
+
"./Expression": "./src/Expression.js",
|
|
10
|
+
"./Matcher": "./src/Matcher.js"
|
|
11
|
+
},
|
|
12
|
+
"scripts": {
|
|
13
|
+
"test": "node test/test.js"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"xml",
|
|
17
|
+
"json",
|
|
18
|
+
"yaml",
|
|
19
|
+
"path",
|
|
20
|
+
"matcher",
|
|
21
|
+
"pattern",
|
|
22
|
+
"xpath",
|
|
23
|
+
"selector",
|
|
24
|
+
"parser",
|
|
25
|
+
"fast-xml-parser",
|
|
26
|
+
"fast-xml-builder"
|
|
27
|
+
],
|
|
28
|
+
"author": "Amit Gupta (https://solothought.com)",
|
|
29
|
+
"license": "MIT",
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/NaturalIntelligence/path-expression-matcher"
|
|
33
|
+
},
|
|
34
|
+
"bugs": {
|
|
35
|
+
"url": "https://github.com/NaturalIntelligence/path-expression-matcher/issues"
|
|
36
|
+
},
|
|
37
|
+
"homepage": "https://github.com/NaturalIntelligence/path-expression-matcher#readme",
|
|
38
|
+
"engines": {
|
|
39
|
+
"node": ">=14.0.0"
|
|
40
|
+
},
|
|
41
|
+
"files": [
|
|
42
|
+
"src/",
|
|
43
|
+
"README.md",
|
|
44
|
+
"LICENSE"
|
|
45
|
+
],
|
|
46
|
+
"funding": [
|
|
47
|
+
{
|
|
48
|
+
"type": "github",
|
|
49
|
+
"url": "https://github.com/sponsors/NaturalIntelligence"
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
}
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Expression - Parses and stores a tag pattern expression
|
|
3
|
+
*
|
|
4
|
+
* Patterns are parsed once and stored in an optimized structure for fast matching.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* const expr = new Expression("root.users.user");
|
|
8
|
+
* const expr2 = new Expression("..user[id]:first");
|
|
9
|
+
* const expr3 = new Expression("root/users/user", { separator: '/' });
|
|
10
|
+
*/
|
|
11
|
+
export default class Expression {
|
|
12
|
+
/**
|
|
13
|
+
* Create a new Expression
|
|
14
|
+
* @param {string} pattern - Pattern string (e.g., "root.users.user", "..user[id]")
|
|
15
|
+
* @param {Object} options - Configuration options
|
|
16
|
+
* @param {string} options.separator - Path separator (default: '.')
|
|
17
|
+
*/
|
|
18
|
+
constructor(pattern, options = {}) {
|
|
19
|
+
this.pattern = pattern;
|
|
20
|
+
this.separator = options.separator || '.';
|
|
21
|
+
this.segments = this._parse(pattern);
|
|
22
|
+
|
|
23
|
+
// Cache expensive checks for performance (O(1) instead of O(n))
|
|
24
|
+
this._hasDeepWildcard = this.segments.some(seg => seg.type === 'deep-wildcard');
|
|
25
|
+
this._hasAttributeCondition = this.segments.some(seg => seg.attrName !== undefined);
|
|
26
|
+
this._hasPositionSelector = this.segments.some(seg => seg.position !== undefined);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Parse pattern string into segments
|
|
31
|
+
* @private
|
|
32
|
+
* @param {string} pattern - Pattern to parse
|
|
33
|
+
* @returns {Array} Array of segment objects
|
|
34
|
+
*/
|
|
35
|
+
_parse(pattern) {
|
|
36
|
+
const segments = [];
|
|
37
|
+
|
|
38
|
+
// Split by separator but handle ".." specially
|
|
39
|
+
let i = 0;
|
|
40
|
+
let currentPart = '';
|
|
41
|
+
|
|
42
|
+
while (i < pattern.length) {
|
|
43
|
+
if (pattern[i] === this.separator) {
|
|
44
|
+
// Check if next char is also separator (deep wildcard)
|
|
45
|
+
if (i + 1 < pattern.length && pattern[i + 1] === this.separator) {
|
|
46
|
+
// Flush current part if any
|
|
47
|
+
if (currentPart.trim()) {
|
|
48
|
+
segments.push(this._parseSegment(currentPart.trim()));
|
|
49
|
+
currentPart = '';
|
|
50
|
+
}
|
|
51
|
+
// Add deep wildcard
|
|
52
|
+
segments.push({ type: 'deep-wildcard' });
|
|
53
|
+
i += 2; // Skip both separators
|
|
54
|
+
} else {
|
|
55
|
+
// Regular separator
|
|
56
|
+
if (currentPart.trim()) {
|
|
57
|
+
segments.push(this._parseSegment(currentPart.trim()));
|
|
58
|
+
}
|
|
59
|
+
currentPart = '';
|
|
60
|
+
i++;
|
|
61
|
+
}
|
|
62
|
+
} else {
|
|
63
|
+
currentPart += pattern[i];
|
|
64
|
+
i++;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Flush remaining part
|
|
69
|
+
if (currentPart.trim()) {
|
|
70
|
+
segments.push(this._parseSegment(currentPart.trim()));
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return segments;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Parse a single segment
|
|
78
|
+
* @private
|
|
79
|
+
* @param {string} part - Segment string (e.g., "user", "user[id]", "user:first")
|
|
80
|
+
* @returns {Object} Segment object
|
|
81
|
+
*/
|
|
82
|
+
_parseSegment(part) {
|
|
83
|
+
const segment = { type: 'tag' };
|
|
84
|
+
|
|
85
|
+
// Match pattern: tagname[attr] or tagname[attr=value] or tagname:position
|
|
86
|
+
// Examples: user, user[id], user[type=admin], user:first, user[id]:first, user:nth(2)
|
|
87
|
+
const match = part.match(/^([^[\]:]+)(?:\[([^\]]+)\])?(?::(\w+(?:\(\d+\))?))?$/);
|
|
88
|
+
|
|
89
|
+
if (!match) {
|
|
90
|
+
throw new Error(`Invalid segment pattern: ${part}`);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
segment.tag = match[1].trim();
|
|
94
|
+
|
|
95
|
+
// Parse attribute condition [attr] or [attr=value]
|
|
96
|
+
if (match[2]) {
|
|
97
|
+
const attrExpr = match[2];
|
|
98
|
+
|
|
99
|
+
if (attrExpr.includes('=')) {
|
|
100
|
+
const eqIndex = attrExpr.indexOf('=');
|
|
101
|
+
const attrName = attrExpr.substring(0, eqIndex).trim();
|
|
102
|
+
const attrValue = attrExpr.substring(eqIndex + 1).trim();
|
|
103
|
+
|
|
104
|
+
segment.attrName = attrName;
|
|
105
|
+
segment.attrValue = attrValue;
|
|
106
|
+
} else {
|
|
107
|
+
segment.attrName = attrExpr.trim();
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Parse position selector :first, :nth(n), :odd, :even
|
|
112
|
+
if (match[3]) {
|
|
113
|
+
const posExpr = match[3];
|
|
114
|
+
|
|
115
|
+
// Check for :nth(n) pattern
|
|
116
|
+
const nthMatch = posExpr.match(/^nth\((\d+)\)$/);
|
|
117
|
+
if (nthMatch) {
|
|
118
|
+
segment.position = 'nth';
|
|
119
|
+
segment.positionValue = parseInt(nthMatch[1], 10);
|
|
120
|
+
} else if (['first', 'odd', 'even'].includes(posExpr)) {
|
|
121
|
+
segment.position = posExpr;
|
|
122
|
+
} else {
|
|
123
|
+
throw new Error(`Invalid position selector: :${posExpr}`);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return segment;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Get the number of segments
|
|
132
|
+
* @returns {number}
|
|
133
|
+
*/
|
|
134
|
+
get length() {
|
|
135
|
+
return this.segments.length;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
/**
|
|
139
|
+
* Check if expression contains deep wildcard
|
|
140
|
+
* @returns {boolean}
|
|
141
|
+
*/
|
|
142
|
+
hasDeepWildcard() {
|
|
143
|
+
return this._hasDeepWildcard;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Check if expression has attribute conditions
|
|
148
|
+
* @returns {boolean}
|
|
149
|
+
*/
|
|
150
|
+
hasAttributeCondition() {
|
|
151
|
+
return this._hasAttributeCondition;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Check if expression has position selectors
|
|
156
|
+
* @returns {boolean}
|
|
157
|
+
*/
|
|
158
|
+
hasPositionSelector() {
|
|
159
|
+
return this._hasPositionSelector;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Get string representation
|
|
164
|
+
* @returns {string}
|
|
165
|
+
*/
|
|
166
|
+
toString() {
|
|
167
|
+
return this.pattern;
|
|
168
|
+
}
|
|
169
|
+
}
|
package/src/Matcher.js
ADDED
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Matcher - Tracks current path in XML/JSON tree and matches against Expressions
|
|
3
|
+
*
|
|
4
|
+
* The matcher maintains a stack of nodes representing the current path from root to
|
|
5
|
+
* current tag. It only stores attribute values for the current (top) node to minimize
|
|
6
|
+
* memory usage. Sibling tracking is used to auto-calculate position and counter.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* const matcher = new Matcher();
|
|
10
|
+
* matcher.push("root", {});
|
|
11
|
+
* matcher.push("users", {});
|
|
12
|
+
* matcher.push("user", { id: "123", type: "admin" });
|
|
13
|
+
*
|
|
14
|
+
* const expr = new Expression("root.users.user");
|
|
15
|
+
* matcher.matches(expr); // true
|
|
16
|
+
*/
|
|
17
|
+
export default class Matcher {
|
|
18
|
+
/**
|
|
19
|
+
* Create a new Matcher
|
|
20
|
+
* @param {Object} options - Configuration options
|
|
21
|
+
* @param {string} options.separator - Default path separator (default: '.')
|
|
22
|
+
*/
|
|
23
|
+
constructor(options = {}) {
|
|
24
|
+
this.separator = options.separator || '.';
|
|
25
|
+
this.path = [];
|
|
26
|
+
this.siblingStacks = [];
|
|
27
|
+
// Each path node: { tag: string, values: object, position: number, counter: number }
|
|
28
|
+
// values only present for current (last) node
|
|
29
|
+
// Each siblingStacks entry: Map<tagName, count> tracking occurrences at each level
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Push a new tag onto the path
|
|
34
|
+
* @param {string} tagName - Name of the tag
|
|
35
|
+
* @param {Object} attrValues - Attribute key-value pairs for current node (optional)
|
|
36
|
+
*/
|
|
37
|
+
push(tagName, attrValues = null) {
|
|
38
|
+
// Remove values from previous current node (now becoming ancestor)
|
|
39
|
+
if (this.path.length > 0) {
|
|
40
|
+
const prev = this.path[this.path.length - 1];
|
|
41
|
+
prev.values = undefined;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Get or create sibling tracking for current level
|
|
45
|
+
const currentLevel = this.path.length;
|
|
46
|
+
if (!this.siblingStacks[currentLevel]) {
|
|
47
|
+
this.siblingStacks[currentLevel] = new Map();
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const siblings = this.siblingStacks[currentLevel];
|
|
51
|
+
|
|
52
|
+
// Calculate counter (how many times this tag appeared at this level)
|
|
53
|
+
const counter = siblings.get(tagName) || 0;
|
|
54
|
+
|
|
55
|
+
// Calculate position (total children at this level so far)
|
|
56
|
+
let position = 0;
|
|
57
|
+
for (const count of siblings.values()) {
|
|
58
|
+
position += count;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Update sibling count for this tag
|
|
62
|
+
siblings.set(tagName, counter + 1);
|
|
63
|
+
|
|
64
|
+
// Create new node
|
|
65
|
+
const node = {
|
|
66
|
+
tag: tagName,
|
|
67
|
+
position: position,
|
|
68
|
+
counter: counter
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
// Store values only for current node
|
|
72
|
+
if (attrValues !== null && attrValues !== undefined) {
|
|
73
|
+
node.values = attrValues;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
this.path.push(node);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Pop the last tag from the path
|
|
81
|
+
* @returns {Object|undefined} The popped node
|
|
82
|
+
*/
|
|
83
|
+
pop() {
|
|
84
|
+
if (this.path.length === 0) {
|
|
85
|
+
return undefined;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const node = this.path.pop();
|
|
89
|
+
|
|
90
|
+
// Clean up sibling tracking for this level
|
|
91
|
+
if (this.siblingStacks[this.path.length]) {
|
|
92
|
+
delete this.siblingStacks[this.path.length];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return node;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Update current node's attribute values
|
|
100
|
+
* Useful when attributes are parsed after push
|
|
101
|
+
* @param {Object} attrValues - Attribute values
|
|
102
|
+
*/
|
|
103
|
+
updateCurrent(attrValues) {
|
|
104
|
+
if (this.path.length > 0) {
|
|
105
|
+
const current = this.path[this.path.length - 1];
|
|
106
|
+
if (attrValues !== null && attrValues !== undefined) {
|
|
107
|
+
current.values = attrValues;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Get current tag name
|
|
114
|
+
* @returns {string|undefined}
|
|
115
|
+
*/
|
|
116
|
+
getCurrentTag() {
|
|
117
|
+
return this.path.length > 0 ? this.path[this.path.length - 1].tag : undefined;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Get current node's attribute value
|
|
122
|
+
* @param {string} attrName - Attribute name
|
|
123
|
+
* @returns {*} Attribute value or undefined
|
|
124
|
+
*/
|
|
125
|
+
getAttrValue(attrName) {
|
|
126
|
+
if (this.path.length === 0) return undefined;
|
|
127
|
+
const current = this.path[this.path.length - 1];
|
|
128
|
+
return current.values?.[attrName];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Check if current node has an attribute
|
|
133
|
+
* @param {string} attrName - Attribute name
|
|
134
|
+
* @returns {boolean}
|
|
135
|
+
*/
|
|
136
|
+
hasAttr(attrName) {
|
|
137
|
+
if (this.path.length === 0) return false;
|
|
138
|
+
const current = this.path[this.path.length - 1];
|
|
139
|
+
return current.values !== undefined && attrName in current.values;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Get current node's sibling position (child index in parent)
|
|
144
|
+
* @returns {number}
|
|
145
|
+
*/
|
|
146
|
+
getPosition() {
|
|
147
|
+
if (this.path.length === 0) return -1;
|
|
148
|
+
return this.path[this.path.length - 1].position ?? 0;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Get current node's repeat counter (occurrence count of this tag name)
|
|
153
|
+
* @returns {number}
|
|
154
|
+
*/
|
|
155
|
+
getCounter() {
|
|
156
|
+
if (this.path.length === 0) return -1;
|
|
157
|
+
return this.path[this.path.length - 1].counter ?? 0;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Get current node's sibling index (alias for getPosition for backward compatibility)
|
|
162
|
+
* @returns {number}
|
|
163
|
+
* @deprecated Use getPosition() or getCounter() instead
|
|
164
|
+
*/
|
|
165
|
+
getIndex() {
|
|
166
|
+
return this.getPosition();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Get current path depth
|
|
171
|
+
* @returns {number}
|
|
172
|
+
*/
|
|
173
|
+
getDepth() {
|
|
174
|
+
return this.path.length;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Get path as string
|
|
179
|
+
* @param {string} separator - Optional separator (uses default if not provided)
|
|
180
|
+
* @returns {string}
|
|
181
|
+
*/
|
|
182
|
+
toString(separator) {
|
|
183
|
+
const sep = separator || this.separator;
|
|
184
|
+
return this.path.map(n => n.tag).join(sep);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/**
|
|
188
|
+
* Get path as array of tag names
|
|
189
|
+
* @returns {string[]}
|
|
190
|
+
*/
|
|
191
|
+
toArray() {
|
|
192
|
+
return this.path.map(n => n.tag);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Reset the path to empty
|
|
197
|
+
*/
|
|
198
|
+
reset() {
|
|
199
|
+
this.path = [];
|
|
200
|
+
this.siblingStacks = [];
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Match current path against an Expression
|
|
205
|
+
* @param {Expression} expression - The expression to match against
|
|
206
|
+
* @returns {boolean} True if current path matches the expression
|
|
207
|
+
*/
|
|
208
|
+
matches(expression) {
|
|
209
|
+
const segments = expression.segments;
|
|
210
|
+
|
|
211
|
+
if (segments.length === 0) {
|
|
212
|
+
return false;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Handle deep wildcard patterns
|
|
216
|
+
if (expression.hasDeepWildcard()) {
|
|
217
|
+
return this._matchWithDeepWildcard(segments);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Simple path matching (no deep wildcards)
|
|
221
|
+
return this._matchSimple(segments);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Match simple path (no deep wildcards)
|
|
226
|
+
* @private
|
|
227
|
+
*/
|
|
228
|
+
_matchSimple(segments) {
|
|
229
|
+
// Path must be same length as segments
|
|
230
|
+
if (this.path.length !== segments.length) {
|
|
231
|
+
return false;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Match each segment bottom-to-top
|
|
235
|
+
for (let i = 0; i < segments.length; i++) {
|
|
236
|
+
const segment = segments[i];
|
|
237
|
+
const node = this.path[i];
|
|
238
|
+
const isCurrentNode = (i === this.path.length - 1);
|
|
239
|
+
|
|
240
|
+
if (!this._matchSegment(segment, node, isCurrentNode)) {
|
|
241
|
+
return false;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/**
|
|
249
|
+
* Match path with deep wildcards
|
|
250
|
+
* @private
|
|
251
|
+
*/
|
|
252
|
+
_matchWithDeepWildcard(segments) {
|
|
253
|
+
let pathIdx = this.path.length - 1; // Start from current node (bottom)
|
|
254
|
+
let segIdx = segments.length - 1; // Start from last segment
|
|
255
|
+
|
|
256
|
+
while (segIdx >= 0 && pathIdx >= 0) {
|
|
257
|
+
const segment = segments[segIdx];
|
|
258
|
+
|
|
259
|
+
if (segment.type === 'deep-wildcard') {
|
|
260
|
+
// ".." matches zero or more levels
|
|
261
|
+
segIdx--;
|
|
262
|
+
|
|
263
|
+
if (segIdx < 0) {
|
|
264
|
+
// Pattern ends with "..", always matches
|
|
265
|
+
return true;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Find where next segment matches in the path
|
|
269
|
+
const nextSeg = segments[segIdx];
|
|
270
|
+
let found = false;
|
|
271
|
+
|
|
272
|
+
for (let i = pathIdx; i >= 0; i--) {
|
|
273
|
+
const isCurrentNode = (i === this.path.length - 1);
|
|
274
|
+
if (this._matchSegment(nextSeg, this.path[i], isCurrentNode)) {
|
|
275
|
+
pathIdx = i - 1;
|
|
276
|
+
segIdx--;
|
|
277
|
+
found = true;
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (!found) {
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
} else {
|
|
286
|
+
// Regular segment
|
|
287
|
+
const isCurrentNode = (pathIdx === this.path.length - 1);
|
|
288
|
+
if (!this._matchSegment(segment, this.path[pathIdx], isCurrentNode)) {
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
pathIdx--;
|
|
292
|
+
segIdx--;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// All segments must be consumed
|
|
297
|
+
return segIdx < 0;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Match a single segment against a node
|
|
302
|
+
* @private
|
|
303
|
+
* @param {Object} segment - Segment from Expression
|
|
304
|
+
* @param {Object} node - Node from path
|
|
305
|
+
* @param {boolean} isCurrentNode - Whether this is the current (last) node
|
|
306
|
+
* @returns {boolean}
|
|
307
|
+
*/
|
|
308
|
+
_matchSegment(segment, node, isCurrentNode) {
|
|
309
|
+
// Match tag name (* is wildcard)
|
|
310
|
+
if (segment.tag !== '*' && segment.tag !== node.tag) {
|
|
311
|
+
return false;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Match attribute name (check if node has this attribute)
|
|
315
|
+
// Can only check for current node since ancestors don't have values
|
|
316
|
+
if (segment.attrName !== undefined) {
|
|
317
|
+
if (!isCurrentNode) {
|
|
318
|
+
// Can't check attributes for ancestor nodes (values not stored)
|
|
319
|
+
return false;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
if (!node.values || !(segment.attrName in node.values)) {
|
|
323
|
+
return false;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Match attribute value (only possible for current node)
|
|
327
|
+
if (segment.attrValue !== undefined) {
|
|
328
|
+
const actualValue = node.values[segment.attrName];
|
|
329
|
+
// Both should be strings
|
|
330
|
+
if (String(actualValue) !== String(segment.attrValue)) {
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Match position (only for current node)
|
|
337
|
+
if (segment.position !== undefined) {
|
|
338
|
+
if (!isCurrentNode) {
|
|
339
|
+
// Can't check position for ancestor nodes
|
|
340
|
+
return false;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
const counter = node.counter ?? 0;
|
|
344
|
+
|
|
345
|
+
if (segment.position === 'first' && counter !== 0) {
|
|
346
|
+
return false;
|
|
347
|
+
} else if (segment.position === 'odd' && counter % 2 !== 1) {
|
|
348
|
+
return false;
|
|
349
|
+
} else if (segment.position === 'even' && counter % 2 !== 0) {
|
|
350
|
+
return false;
|
|
351
|
+
} else if (segment.position === 'nth') {
|
|
352
|
+
if (counter !== segment.positionValue) {
|
|
353
|
+
return false;
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
return true;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Create a snapshot of current state
|
|
363
|
+
* @returns {Object} State snapshot
|
|
364
|
+
*/
|
|
365
|
+
snapshot() {
|
|
366
|
+
return {
|
|
367
|
+
path: this.path.map(node => ({ ...node })),
|
|
368
|
+
siblingStacks: this.siblingStacks.map(map => new Map(map))
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Restore state from snapshot
|
|
374
|
+
* @param {Object} snapshot - State snapshot
|
|
375
|
+
*/
|
|
376
|
+
restore(snapshot) {
|
|
377
|
+
this.path = snapshot.path.map(node => ({ ...node }));
|
|
378
|
+
this.siblingStacks = snapshot.siblingStacks.map(map => new Map(map));
|
|
379
|
+
}
|
|
380
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* fast-xml-tagger - XML/JSON path matching library
|
|
3
|
+
*
|
|
4
|
+
* Provides efficient path tracking and pattern matching for XML/JSON parsers.
|
|
5
|
+
*
|
|
6
|
+
* @example
|
|
7
|
+
* import { Expression, Matcher } from 'fast-xml-tagger';
|
|
8
|
+
*
|
|
9
|
+
* // Create expression (parse once)
|
|
10
|
+
* const expr = new Expression("root.users.user[id]");
|
|
11
|
+
*
|
|
12
|
+
* // Create matcher (track path)
|
|
13
|
+
* const matcher = new Matcher();
|
|
14
|
+
* matcher.push("root", [], {}, 0);
|
|
15
|
+
* matcher.push("users", [], {}, 0);
|
|
16
|
+
* matcher.push("user", ["id", "type"], { id: "123", type: "admin" }, 0);
|
|
17
|
+
*
|
|
18
|
+
* // Match
|
|
19
|
+
* if (matcher.matches(expr)) {
|
|
20
|
+
* console.log("Match found!");
|
|
21
|
+
* }
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import Expression from './Expression.js';
|
|
25
|
+
import Matcher from './Matcher.js';
|
|
26
|
+
|
|
27
|
+
export { Expression, Matcher };
|
|
28
|
+
export default { Expression, Matcher };
|