path-expression-matcher 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +110 -6
- package/package.json +2 -2
- package/src/Expression.js +94 -25
- package/src/Matcher.js +42 -8
package/README.md
CHANGED
|
@@ -37,6 +37,13 @@ if (matcher.matches(expr)) {
|
|
|
37
37
|
console.log("Match found!");
|
|
38
38
|
console.log("Current path:", matcher.toString()); // "root.users.user"
|
|
39
39
|
}
|
|
40
|
+
|
|
41
|
+
// Namespace support
|
|
42
|
+
const nsExpr = new Expression("soap::Envelope.soap::Body..ns::UserId");
|
|
43
|
+
matcher.push("Envelope", null, "soap");
|
|
44
|
+
matcher.push("Body", null, "soap");
|
|
45
|
+
matcher.push("UserId", null, "ns");
|
|
46
|
+
console.log(matcher.toString()); // "soap:Envelope.soap:Body.ns:UserId"
|
|
40
47
|
```
|
|
41
48
|
|
|
42
49
|
## 📖 Pattern Syntax
|
|
@@ -78,11 +85,50 @@ if (matcher.matches(expr)) {
|
|
|
78
85
|
|
|
79
86
|
**Note:** Position selectors use the **counter** (occurrence count of the tag name), not the position (child index). For example, in `<root><a/><b/><a/></root>`, the second `<a/>` has position=2 but counter=1.
|
|
80
87
|
|
|
88
|
+
### Namespaces
|
|
89
|
+
|
|
90
|
+
```javascript
|
|
91
|
+
"ns::user" // user with namespace "ns"
|
|
92
|
+
"soap::Envelope" // Envelope with namespace "soap"
|
|
93
|
+
"ns::user[id]" // user with namespace "ns" and "id" attribute
|
|
94
|
+
"ns::user:first" // First user with namespace "ns"
|
|
95
|
+
"*::user" // user with any namespace
|
|
96
|
+
"..ns::item" // item with namespace "ns" anywhere in tree
|
|
97
|
+
"soap::Envelope.soap::Body" // Nested namespaced elements
|
|
98
|
+
"ns::first" // Tag named "first" with namespace "ns" (NO ambiguity!)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
**Namespace syntax:**
|
|
102
|
+
- Use **double colon (::)** for namespace: `ns::tag`
|
|
103
|
+
- Use **single colon (:)** for position: `tag:first`
|
|
104
|
+
- Combined: `ns::tag:first` (namespace + tag + position)
|
|
105
|
+
|
|
106
|
+
**Namespace matching rules:**
|
|
107
|
+
- Pattern `ns::user` matches only nodes with namespace "ns" and tag "user"
|
|
108
|
+
- Pattern `user` (no namespace) matches nodes with tag "user" regardless of namespace
|
|
109
|
+
- Pattern `*::user` matches tag "user" with any namespace (wildcard namespace)
|
|
110
|
+
- Namespaces are tracked separately for counter/position (e.g., `ns1::item` and `ns2::item` have independent counters)
|
|
111
|
+
|
|
112
|
+
### Wildcard Differences
|
|
113
|
+
|
|
114
|
+
**Single wildcard (`*`)** - Matches exactly ONE level:
|
|
115
|
+
- `"*.fix1"` matches `root.fix1` (2 levels) ✅
|
|
116
|
+
- `"*.fix1"` does NOT match `root.another.fix1` (3 levels) ❌
|
|
117
|
+
- Path depth MUST equal pattern depth
|
|
118
|
+
|
|
119
|
+
**Deep wildcard (`..`)** - Matches ZERO or MORE levels:
|
|
120
|
+
- `"..fix1"` matches `root.fix1` ✅
|
|
121
|
+
- `"..fix1"` matches `root.another.fix1` ✅
|
|
122
|
+
- `"..fix1"` matches `a.b.c.d.fix1` ✅
|
|
123
|
+
- Works at any depth
|
|
124
|
+
|
|
81
125
|
### Combined Patterns
|
|
82
126
|
|
|
83
127
|
```javascript
|
|
84
|
-
"..user[id]:first"
|
|
85
|
-
"root..user[type=admin]"
|
|
128
|
+
"..user[id]:first" // First user with id, anywhere
|
|
129
|
+
"root..user[type=admin]" // Admin user under root
|
|
130
|
+
"ns::user[id]:first" // First namespaced user with id
|
|
131
|
+
"soap::Envelope..ns::UserId" // UserId with namespace ns under SOAP envelope
|
|
86
132
|
```
|
|
87
133
|
|
|
88
134
|
## 🔧 API Reference
|
|
@@ -125,18 +171,21 @@ new Matcher(options)
|
|
|
125
171
|
|
|
126
172
|
#### Path Tracking Methods
|
|
127
173
|
|
|
128
|
-
##### `push(tagName, attrValues)`
|
|
174
|
+
##### `push(tagName, attrValues, namespace)`
|
|
129
175
|
|
|
130
176
|
Add a tag to the current path. Position and counter are automatically calculated.
|
|
131
177
|
|
|
132
178
|
**Parameters:**
|
|
133
179
|
- `tagName` (string): Tag name
|
|
134
180
|
- `attrValues` (object, optional): Attribute key-value pairs (current node only)
|
|
181
|
+
- `namespace` (string, optional): Namespace for the tag
|
|
135
182
|
|
|
136
183
|
**Example:**
|
|
137
184
|
```javascript
|
|
138
185
|
matcher.push("user", { id: "123", type: "admin" });
|
|
139
186
|
matcher.push("item"); // No attributes
|
|
187
|
+
matcher.push("Envelope", null, "soap"); // With namespace
|
|
188
|
+
matcher.push("Body", { version: "1.1" }, "soap"); // With both
|
|
140
189
|
```
|
|
141
190
|
|
|
142
191
|
**Position vs Counter:**
|
|
@@ -199,6 +248,14 @@ Get current tag name.
|
|
|
199
248
|
const tag = matcher.getCurrentTag(); // "user"
|
|
200
249
|
```
|
|
201
250
|
|
|
251
|
+
##### `getCurrentNamespace()`
|
|
252
|
+
|
|
253
|
+
Get current namespace.
|
|
254
|
+
|
|
255
|
+
```javascript
|
|
256
|
+
const ns = matcher.getCurrentNamespace(); // "soap" or undefined
|
|
257
|
+
```
|
|
258
|
+
|
|
202
259
|
##### `getAttrValue(attrName)`
|
|
203
260
|
|
|
204
261
|
Get attribute value of current node.
|
|
@@ -249,13 +306,18 @@ Get current path depth.
|
|
|
249
306
|
const depth = matcher.getDepth(); // 3 for "root.users.user"
|
|
250
307
|
```
|
|
251
308
|
|
|
252
|
-
##### `toString(separator?)`
|
|
309
|
+
##### `toString(separator?, includeNamespace?)`
|
|
253
310
|
|
|
254
311
|
Get path as string.
|
|
255
312
|
|
|
313
|
+
**Parameters:**
|
|
314
|
+
- `separator` (string, optional): Path separator (uses default if not provided)
|
|
315
|
+
- `includeNamespace` (boolean, optional): Whether to include namespaces (default: true)
|
|
316
|
+
|
|
256
317
|
```javascript
|
|
257
|
-
const path = matcher.toString();
|
|
258
|
-
const path2 = matcher.toString('/');
|
|
318
|
+
const path = matcher.toString(); // "root.ns:user.item"
|
|
319
|
+
const path2 = matcher.toString('/'); // "root/ns:user/item"
|
|
320
|
+
const path3 = matcher.toString('.', false); // "root.user.item" (no namespaces)
|
|
259
321
|
```
|
|
260
322
|
|
|
261
323
|
##### `toArray()`
|
|
@@ -419,6 +481,48 @@ const expr = new Expression("root.item:first");
|
|
|
419
481
|
console.log(matcher.matches(expr)); // false (counter=1, not 0)
|
|
420
482
|
```
|
|
421
483
|
|
|
484
|
+
### Example 7: Namespace Support (XML/SOAP)
|
|
485
|
+
|
|
486
|
+
```javascript
|
|
487
|
+
const matcher = new Matcher();
|
|
488
|
+
const soapExpr = new Expression("soap::Envelope.soap::Body..ns::UserId");
|
|
489
|
+
|
|
490
|
+
// Parse SOAP document
|
|
491
|
+
matcher.push("Envelope", { xmlns: "..." }, "soap");
|
|
492
|
+
matcher.push("Body", null, "soap");
|
|
493
|
+
matcher.push("GetUserRequest", null, "ns");
|
|
494
|
+
matcher.push("UserId", null, "ns");
|
|
495
|
+
|
|
496
|
+
// Match namespaced pattern
|
|
497
|
+
if (matcher.matches(soapExpr)) {
|
|
498
|
+
console.log("Found UserId in SOAP body");
|
|
499
|
+
console.log(matcher.toString()); // "soap:Envelope.soap:Body.ns:GetUserRequest.ns:UserId"
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// Namespace-specific counters
|
|
503
|
+
matcher.reset();
|
|
504
|
+
matcher.push("root");
|
|
505
|
+
matcher.push("item", null, "ns1"); // ns1::item counter=0
|
|
506
|
+
matcher.pop();
|
|
507
|
+
matcher.push("item", null, "ns2"); // ns2::item counter=0 (different namespace)
|
|
508
|
+
matcher.pop();
|
|
509
|
+
matcher.push("item", null, "ns1"); // ns1::item counter=1
|
|
510
|
+
|
|
511
|
+
const firstNs1Item = new Expression("root.ns1::item:first");
|
|
512
|
+
console.log(matcher.matches(firstNs1Item)); // false (counter=1)
|
|
513
|
+
|
|
514
|
+
const secondNs1Item = new Expression("root.ns1::item:nth(1)");
|
|
515
|
+
console.log(matcher.matches(secondNs1Item)); // true
|
|
516
|
+
|
|
517
|
+
// NO AMBIGUITY: Tags named after position keywords
|
|
518
|
+
matcher.reset();
|
|
519
|
+
matcher.push("root");
|
|
520
|
+
matcher.push("first", null, "ns"); // Tag named "first" with namespace
|
|
521
|
+
|
|
522
|
+
const expr = new Expression("root.ns::first");
|
|
523
|
+
console.log(matcher.matches(expr)); // true - matches namespace "ns", tag "first"
|
|
524
|
+
```
|
|
525
|
+
|
|
422
526
|
## 🏗️ Architecture
|
|
423
527
|
|
|
424
528
|
### Data Storage Strategy
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "path-expression-matcher",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "Efficient path tracking and pattern matching for XML/JSON parsers",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"./Matcher": "./src/Matcher.js"
|
|
11
11
|
},
|
|
12
12
|
"scripts": {
|
|
13
|
-
"test": "node test/test.js"
|
|
13
|
+
"test": "node test/namespace_test.js && node test/test.js"
|
|
14
14
|
},
|
|
15
15
|
"keywords": [
|
|
16
16
|
"xml",
|
package/src/Expression.js
CHANGED
|
@@ -76,51 +76,120 @@ export default class Expression {
|
|
|
76
76
|
/**
|
|
77
77
|
* Parse a single segment
|
|
78
78
|
* @private
|
|
79
|
-
* @param {string} part - Segment string (e.g., "user", "user[id]", "user:first")
|
|
79
|
+
* @param {string} part - Segment string (e.g., "user", "ns::user", "user[id]", "ns::user:first")
|
|
80
80
|
* @returns {Object} Segment object
|
|
81
81
|
*/
|
|
82
82
|
_parseSegment(part) {
|
|
83
83
|
const segment = { type: 'tag' };
|
|
84
84
|
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
// CRITICAL: Handle wildcard FIRST (before any other parsing)
|
|
86
|
+
if (part === '*') {
|
|
87
|
+
segment.tag = '*';
|
|
88
|
+
return segment;
|
|
89
|
+
}
|
|
88
90
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
+
// NEW NAMESPACE SYNTAX (v2.0):
|
|
92
|
+
// ============================
|
|
93
|
+
// Namespace uses DOUBLE colon (::)
|
|
94
|
+
// Position uses SINGLE colon (:)
|
|
95
|
+
//
|
|
96
|
+
// Examples:
|
|
97
|
+
// "user" → tag
|
|
98
|
+
// "user:first" → tag + position
|
|
99
|
+
// "user[id]" → tag + attribute
|
|
100
|
+
// "user[id]:first" → tag + attribute + position
|
|
101
|
+
// "ns::user" → namespace + tag
|
|
102
|
+
// "ns::user:first" → namespace + tag + position
|
|
103
|
+
// "ns::user[id]" → namespace + tag + attribute
|
|
104
|
+
// "ns::user[id]:first" → namespace + tag + attribute + position
|
|
105
|
+
// "ns::first" → namespace + tag named "first" (NO ambiguity!)
|
|
106
|
+
//
|
|
107
|
+
// This eliminates all ambiguity:
|
|
108
|
+
// :: = namespace separator
|
|
109
|
+
// : = position selector
|
|
110
|
+
// [] = attributes
|
|
111
|
+
|
|
112
|
+
// Step 1: Extract brackets [attr] or [attr=value]
|
|
113
|
+
let bracketContent = null;
|
|
114
|
+
let withoutBrackets = part;
|
|
115
|
+
|
|
116
|
+
const bracketMatch = part.match(/^([^\[]+)(\[[^\]]*\])(.*)$/);
|
|
117
|
+
if (bracketMatch) {
|
|
118
|
+
withoutBrackets = bracketMatch[1] + bracketMatch[3];
|
|
119
|
+
if (bracketMatch[2]) {
|
|
120
|
+
const content = bracketMatch[2].slice(1, -1);
|
|
121
|
+
if (content) {
|
|
122
|
+
bracketContent = content;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Step 2: Check for namespace (double colon ::)
|
|
128
|
+
let namespace = undefined;
|
|
129
|
+
let tagAndPosition = withoutBrackets;
|
|
130
|
+
|
|
131
|
+
if (withoutBrackets.includes('::')) {
|
|
132
|
+
const nsIndex = withoutBrackets.indexOf('::');
|
|
133
|
+
namespace = withoutBrackets.substring(0, nsIndex).trim();
|
|
134
|
+
tagAndPosition = withoutBrackets.substring(nsIndex + 2).trim(); // Skip ::
|
|
135
|
+
|
|
136
|
+
if (!namespace) {
|
|
137
|
+
throw new Error(`Invalid namespace in pattern: ${part}`);
|
|
138
|
+
}
|
|
91
139
|
}
|
|
92
140
|
|
|
93
|
-
|
|
141
|
+
// Step 3: Parse tag and position (single colon :)
|
|
142
|
+
let tag = undefined;
|
|
143
|
+
let positionMatch = null;
|
|
94
144
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
const
|
|
145
|
+
if (tagAndPosition.includes(':')) {
|
|
146
|
+
const colonIndex = tagAndPosition.lastIndexOf(':'); // Use last colon for position
|
|
147
|
+
const tagPart = tagAndPosition.substring(0, colonIndex).trim();
|
|
148
|
+
const posPart = tagAndPosition.substring(colonIndex + 1).trim();
|
|
98
149
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
const attrValue = attrExpr.substring(eqIndex + 1).trim();
|
|
150
|
+
// Verify position is a valid keyword
|
|
151
|
+
const isPositionKeyword = ['first', 'last', 'odd', 'even'].includes(posPart) ||
|
|
152
|
+
/^nth\(\d+\)$/.test(posPart);
|
|
103
153
|
|
|
104
|
-
|
|
105
|
-
|
|
154
|
+
if (isPositionKeyword) {
|
|
155
|
+
tag = tagPart;
|
|
156
|
+
positionMatch = posPart;
|
|
106
157
|
} else {
|
|
107
|
-
|
|
158
|
+
// Not a valid position keyword, treat whole thing as tag
|
|
159
|
+
tag = tagAndPosition;
|
|
108
160
|
}
|
|
161
|
+
} else {
|
|
162
|
+
tag = tagAndPosition;
|
|
109
163
|
}
|
|
110
164
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
165
|
+
if (!tag) {
|
|
166
|
+
throw new Error(`Invalid segment pattern: ${part}`);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
segment.tag = tag;
|
|
170
|
+
if (namespace) {
|
|
171
|
+
segment.namespace = namespace;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Step 4: Parse attributes
|
|
175
|
+
if (bracketContent) {
|
|
176
|
+
if (bracketContent.includes('=')) {
|
|
177
|
+
const eqIndex = bracketContent.indexOf('=');
|
|
178
|
+
segment.attrName = bracketContent.substring(0, eqIndex).trim();
|
|
179
|
+
segment.attrValue = bracketContent.substring(eqIndex + 1).trim();
|
|
180
|
+
} else {
|
|
181
|
+
segment.attrName = bracketContent.trim();
|
|
182
|
+
}
|
|
183
|
+
}
|
|
114
184
|
|
|
115
|
-
|
|
116
|
-
|
|
185
|
+
// Step 5: Parse position selector
|
|
186
|
+
if (positionMatch) {
|
|
187
|
+
const nthMatch = positionMatch.match(/^nth\((\d+)\)$/);
|
|
117
188
|
if (nthMatch) {
|
|
118
189
|
segment.position = 'nth';
|
|
119
190
|
segment.positionValue = parseInt(nthMatch[1], 10);
|
|
120
|
-
} else if (['first', 'odd', 'even'].includes(posExpr)) {
|
|
121
|
-
segment.position = posExpr;
|
|
122
191
|
} else {
|
|
123
|
-
|
|
192
|
+
segment.position = positionMatch;
|
|
124
193
|
}
|
|
125
194
|
}
|
|
126
195
|
|
package/src/Matcher.js
CHANGED
|
@@ -33,8 +33,9 @@ export default class Matcher {
|
|
|
33
33
|
* Push a new tag onto the path
|
|
34
34
|
* @param {string} tagName - Name of the tag
|
|
35
35
|
* @param {Object} attrValues - Attribute key-value pairs for current node (optional)
|
|
36
|
+
* @param {string} namespace - Namespace for the tag (optional)
|
|
36
37
|
*/
|
|
37
|
-
push(tagName, attrValues = null) {
|
|
38
|
+
push(tagName, attrValues = null, namespace = null) {
|
|
38
39
|
// Remove values from previous current node (now becoming ancestor)
|
|
39
40
|
if (this.path.length > 0) {
|
|
40
41
|
const prev = this.path[this.path.length - 1];
|
|
@@ -49,8 +50,11 @@ export default class Matcher {
|
|
|
49
50
|
|
|
50
51
|
const siblings = this.siblingStacks[currentLevel];
|
|
51
52
|
|
|
53
|
+
// Create a unique key for sibling tracking that includes namespace
|
|
54
|
+
const siblingKey = namespace ? `${namespace}:${tagName}` : tagName;
|
|
55
|
+
|
|
52
56
|
// Calculate counter (how many times this tag appeared at this level)
|
|
53
|
-
const counter = siblings.get(
|
|
57
|
+
const counter = siblings.get(siblingKey) || 0;
|
|
54
58
|
|
|
55
59
|
// Calculate position (total children at this level so far)
|
|
56
60
|
let position = 0;
|
|
@@ -59,7 +63,7 @@ export default class Matcher {
|
|
|
59
63
|
}
|
|
60
64
|
|
|
61
65
|
// Update sibling count for this tag
|
|
62
|
-
siblings.set(
|
|
66
|
+
siblings.set(siblingKey, counter + 1);
|
|
63
67
|
|
|
64
68
|
// Create new node
|
|
65
69
|
const node = {
|
|
@@ -68,6 +72,11 @@ export default class Matcher {
|
|
|
68
72
|
counter: counter
|
|
69
73
|
};
|
|
70
74
|
|
|
75
|
+
// Store namespace if provided
|
|
76
|
+
if (namespace !== null && namespace !== undefined) {
|
|
77
|
+
node.namespace = namespace;
|
|
78
|
+
}
|
|
79
|
+
|
|
71
80
|
// Store values only for current node
|
|
72
81
|
if (attrValues !== null && attrValues !== undefined) {
|
|
73
82
|
node.values = attrValues;
|
|
@@ -87,9 +96,11 @@ export default class Matcher {
|
|
|
87
96
|
|
|
88
97
|
const node = this.path.pop();
|
|
89
98
|
|
|
90
|
-
// Clean up sibling tracking for
|
|
91
|
-
|
|
92
|
-
|
|
99
|
+
// Clean up sibling tracking for levels deeper than current
|
|
100
|
+
// After pop, path.length is the new depth
|
|
101
|
+
// We need to clean up siblingStacks[path.length + 1] and beyond
|
|
102
|
+
if (this.siblingStacks.length > this.path.length + 1) {
|
|
103
|
+
this.siblingStacks.length = this.path.length + 1;
|
|
93
104
|
}
|
|
94
105
|
|
|
95
106
|
return node;
|
|
@@ -117,6 +128,14 @@ export default class Matcher {
|
|
|
117
128
|
return this.path.length > 0 ? this.path[this.path.length - 1].tag : undefined;
|
|
118
129
|
}
|
|
119
130
|
|
|
131
|
+
/**
|
|
132
|
+
* Get current namespace
|
|
133
|
+
* @returns {string|undefined}
|
|
134
|
+
*/
|
|
135
|
+
getCurrentNamespace() {
|
|
136
|
+
return this.path.length > 0 ? this.path[this.path.length - 1].namespace : undefined;
|
|
137
|
+
}
|
|
138
|
+
|
|
120
139
|
/**
|
|
121
140
|
* Get current node's attribute value
|
|
122
141
|
* @param {string} attrName - Attribute name
|
|
@@ -177,11 +196,17 @@ export default class Matcher {
|
|
|
177
196
|
/**
|
|
178
197
|
* Get path as string
|
|
179
198
|
* @param {string} separator - Optional separator (uses default if not provided)
|
|
199
|
+
* @param {boolean} includeNamespace - Whether to include namespace in output (default: true)
|
|
180
200
|
* @returns {string}
|
|
181
201
|
*/
|
|
182
|
-
toString(separator) {
|
|
202
|
+
toString(separator, includeNamespace = true) {
|
|
183
203
|
const sep = separator || this.separator;
|
|
184
|
-
return this.path.map(n =>
|
|
204
|
+
return this.path.map(n => {
|
|
205
|
+
if (includeNamespace && n.namespace) {
|
|
206
|
+
return `${n.namespace}:${n.tag}`;
|
|
207
|
+
}
|
|
208
|
+
return n.tag;
|
|
209
|
+
}).join(sep);
|
|
185
210
|
}
|
|
186
211
|
|
|
187
212
|
/**
|
|
@@ -311,6 +336,15 @@ export default class Matcher {
|
|
|
311
336
|
return false;
|
|
312
337
|
}
|
|
313
338
|
|
|
339
|
+
// Match namespace if specified in segment
|
|
340
|
+
if (segment.namespace !== undefined) {
|
|
341
|
+
// Segment has namespace - node must match it
|
|
342
|
+
if (segment.namespace !== '*' && segment.namespace !== node.namespace) {
|
|
343
|
+
return false;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
// If segment has no namespace, it matches nodes with or without namespace
|
|
347
|
+
|
|
314
348
|
// Match attribute name (check if node has this attribute)
|
|
315
349
|
// Can only check for current node since ancestors don't have values
|
|
316
350
|
if (segment.attrName !== undefined) {
|