kodexa 7.5.514404640805__py3-none-any.whl → 8.0.14958192442__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kodexa/dataclasses/__init__.py +1 -1
- kodexa/model/__init__.py +2 -2
- kodexa/model/objects.py +21 -1
- kodexa/model/utils.py +1 -1
- kodexa/pipeline/pipeline.py +1 -1
- kodexa/platform/client.py +1 -2
- kodexa/platform/kodexa.py +4 -1
- kodexa/platform/manifest.py +447 -0
- kodexa/selectors/__init__.py +1 -1
- kodexa/selectors/ast.py +371 -98
- kodexa/selectors/error.py +29 -0
- kodexa/selectors/kodexa-ast-visitor.py +268 -0
- kodexa/selectors/parser.py +91 -0
- kodexa/selectors/resources/KodexaSelector.interp +99 -0
- kodexa/selectors/resources/KodexaSelector.tokens +56 -0
- kodexa/selectors/resources/KodexaSelectorLexer.interp +119 -0
- kodexa/selectors/resources/KodexaSelectorLexer.py +204 -0
- kodexa/selectors/resources/KodexaSelectorLexer.tokens +56 -0
- kodexa/selectors/resources/KodexaSelectorListener.py +570 -0
- kodexa/selectors/resources/KodexaSelectorParser.py +3246 -0
- kodexa/selectors/resources/KodexaSelectorVisitor.py +323 -0
- kodexa/selectors/visitor.py +265 -0
- kodexa/steps/__init__.py +4 -2
- kodexa/steps/common.py +0 -68
- kodexa/testing/test_utils.py +1 -1
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/METADATA +7 -3
- kodexa-8.0.14958192442.dist-info/RECORD +53 -0
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/WHEEL +1 -1
- kodexa/model/model.py +0 -3259
- kodexa/model/persistence.py +0 -2017
- kodexa/selectors/core.py +0 -124
- kodexa/selectors/lexrules.py +0 -137
- kodexa/selectors/lextab.py +0 -83
- kodexa/selectors/lextab.pyi +0 -1
- kodexa/selectors/parserules.py +0 -414
- kodexa/selectors/parserules.pyi +0 -1
- kodexa/selectors/parsetab.py +0 -4149
- kodexa/selectors/parsetab.pyi +0 -1
- kodexa-7.5.514404640805.dist-info/RECORD +0 -50
- {kodexa-7.5.514404640805.dist-info → kodexa-8.0.14958192442.dist-info}/LICENSE +0 -0
kodexa/selectors/ast.py
CHANGED
@@ -1,26 +1,27 @@
|
|
1
|
-
"""Abstract Syntax Tree nodes for parsed XPath.
|
1
|
+
"""Abstract Syntax Tree nodes for parsed XPath expressions.
|
2
2
|
|
3
|
-
This module contains basic nodes for representing parsed XPath expressions
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
perhaps introspect ASTs returned by the parser.
|
8
|
-
|
9
|
-
This code was derived from https://github.com/emory-libraries/eulxml
|
3
|
+
This module contains basic nodes for representing parsed XPath expressions
|
4
|
+
created by the ANTLR-based parser. These classes provide the same functionality
|
5
|
+
as the original PLY-based parser's AST classes but are designed to work with
|
6
|
+
the ANTLR-generated parse tree.
|
10
7
|
"""
|
11
8
|
|
12
|
-
from __future__ import
|
9
|
+
from __future__ import annotations
|
13
10
|
|
14
11
|
import re
|
12
|
+
from typing import List, Optional, Any, Dict, Union, Tuple
|
15
13
|
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
from
|
20
|
-
|
21
|
-
|
14
|
+
# Import these types but make them optional to avoid circular imports
|
15
|
+
# In a real implementation, you'd use proper type annotations
|
16
|
+
try:
|
17
|
+
from kodexa import ContentNode, ContentFeature, Document
|
18
|
+
except ImportError:
|
19
|
+
ContentNode = Any
|
20
|
+
ContentFeature = Any
|
21
|
+
Document = Any
|
22
22
|
|
23
23
|
__all__ = [
|
24
|
+
"SelectorContext",
|
24
25
|
"UnaryExpression",
|
25
26
|
"BinaryExpression",
|
26
27
|
"PredicatedExpression",
|
@@ -36,28 +37,61 @@ __all__ = [
|
|
36
37
|
|
37
38
|
|
38
39
|
class SelectorContext:
|
40
|
+
"""Context for selector resolution, maintains state during traversal."""
|
41
|
+
|
39
42
|
def __init__(self, document: Document, first_only=False):
|
43
|
+
"""Initialize a new SelectorContext.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
document: The document being searched
|
47
|
+
first_only: Whether to return only the first match
|
48
|
+
"""
|
40
49
|
self.pattern_cache = {}
|
41
50
|
self.last_op = None
|
42
51
|
self.document: Document = document
|
43
52
|
self.stream = 0
|
44
53
|
self.first_only = first_only
|
45
54
|
|
46
|
-
def cache_pattern(self, pattern):
|
55
|
+
def cache_pattern(self, pattern: str) -> re.Pattern:
|
56
|
+
"""Get a compiled regex pattern, caching for reuse.
|
57
|
+
|
58
|
+
Args:
|
59
|
+
pattern: The regex pattern string
|
60
|
+
|
61
|
+
Returns:
|
62
|
+
The compiled regex pattern
|
63
|
+
"""
|
47
64
|
if pattern not in self.pattern_cache:
|
48
65
|
self.pattern_cache[pattern] = re.compile(pattern)
|
49
66
|
return self.pattern_cache[pattern]
|
50
67
|
|
51
68
|
|
52
|
-
class PipelineExpression
|
53
|
-
"""A pipeline XPath expression"""
|
69
|
+
class PipelineExpression:
|
70
|
+
"""A pipeline XPath expression (e.g., a stream b)."""
|
54
71
|
|
55
|
-
def __init__(self, left, op, right):
|
72
|
+
def __init__(self, left: Any, op: str, right: Any):
|
73
|
+
"""Initialize a new PipelineExpression.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
left: Left side of the pipeline
|
77
|
+
op: The pipeline operator
|
78
|
+
right: Right side of the pipeline
|
79
|
+
"""
|
56
80
|
self.left = left
|
57
81
|
self.op = op
|
58
82
|
self.right = right
|
59
83
|
|
60
|
-
def resolve(self, content_node: ContentNode, variables, context: SelectorContext):
|
84
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
85
|
+
"""Resolve this pipeline expression.
|
86
|
+
|
87
|
+
Args:
|
88
|
+
content_node: The current content node
|
89
|
+
variables: Variable bindings
|
90
|
+
context: The selector context
|
91
|
+
|
92
|
+
Returns:
|
93
|
+
List of matching content nodes
|
94
|
+
"""
|
61
95
|
left_nodes = self.left.resolve(content_node, variables, context)
|
62
96
|
result_nodes: List[ContentNode] = []
|
63
97
|
context.stream = context.stream + 1
|
@@ -76,31 +110,65 @@ class PipelineExpression(object):
|
|
76
110
|
return result_nodes[:1] if context.first_only else result_nodes
|
77
111
|
|
78
112
|
|
79
|
-
class UnaryExpression
|
80
|
-
"""A unary XPath expression.
|
113
|
+
class UnaryExpression:
|
114
|
+
"""A unary XPath expression (e.g., -foo)."""
|
81
115
|
|
82
|
-
def __init__(self, op, right):
|
116
|
+
def __init__(self, op: str, right: Any):
|
117
|
+
"""Initialize a new UnaryExpression.
|
118
|
+
|
119
|
+
Args:
|
120
|
+
op: The operator
|
121
|
+
right: The expression the operator is applied to
|
122
|
+
"""
|
83
123
|
self.op = op
|
84
|
-
"""the operator used in the expression"""
|
85
124
|
self.right = right
|
86
|
-
"""the expression the operator is applied to"""
|
87
|
-
|
88
125
|
|
89
|
-
|
126
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
127
|
+
"""Resolve this unary expression.
|
128
|
+
|
129
|
+
Args:
|
130
|
+
content_node: The current content node
|
131
|
+
variables: Variable bindings
|
132
|
+
context: The selector context
|
133
|
+
|
134
|
+
Returns:
|
135
|
+
The result of applying the operator to the right expression
|
136
|
+
"""
|
137
|
+
# Handle negation
|
138
|
+
if self.op == "-":
|
139
|
+
right_value = self.right.resolve(content_node, variables, context)
|
140
|
+
if isinstance(right_value, (int, float)):
|
141
|
+
return -right_value
|
142
|
+
|
143
|
+
return None
|
90
144
|
|
91
145
|
|
92
|
-
class BinaryExpression
|
93
|
-
"""Any binary XPath expression. a/b
|
146
|
+
class BinaryExpression:
|
147
|
+
"""Any binary XPath expression (e.g., a/b, a and b, a | b)."""
|
94
148
|
|
95
|
-
def __init__(self, left, op, right):
|
149
|
+
def __init__(self, left: Any, op: str, right: Any):
|
150
|
+
"""Initialize a new BinaryExpression.
|
151
|
+
|
152
|
+
Args:
|
153
|
+
left: Left side of the expression
|
154
|
+
op: The operator
|
155
|
+
right: Right side of the expression
|
156
|
+
"""
|
96
157
|
self.left = left
|
97
|
-
"""the left side of the binary expression"""
|
98
158
|
self.op = op
|
99
|
-
"""the operator of the binary expression"""
|
100
159
|
self.right = right
|
101
|
-
"""the right side of the binary expression"""
|
102
160
|
|
103
|
-
def resolve(self, content_node: ContentNode, variables, context: SelectorContext):
|
161
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
162
|
+
"""Resolve this binary expression.
|
163
|
+
|
164
|
+
Args:
|
165
|
+
content_node: The current content node
|
166
|
+
variables: Variable bindings
|
167
|
+
context: The selector context
|
168
|
+
|
169
|
+
Returns:
|
170
|
+
The result of applying the operator to the left and right expressions
|
171
|
+
"""
|
104
172
|
if self.op == "|":
|
105
173
|
return self.left.resolve(
|
106
174
|
content_node, variables, context
|
@@ -131,29 +199,78 @@ class BinaryExpression(object):
|
|
131
199
|
return bool(
|
132
200
|
self.get_value(self.left, content_node, variables, context)
|
133
201
|
) or bool(self.get_value(self.right, content_node, variables, context))
|
202
|
+
|
203
|
+
# Handle path operations
|
204
|
+
if self.op == "/" or self.op == "//":
|
205
|
+
# For path expressions, resolve left first then apply right to each result
|
206
|
+
left_results = self.left.resolve(content_node, variables, context)
|
207
|
+
context.last_op = self.op
|
208
|
+
|
209
|
+
all_results = []
|
210
|
+
for node in left_results:
|
211
|
+
right_results = self.right.resolve(node, variables, context)
|
212
|
+
all_results.extend(right_results)
|
213
|
+
|
214
|
+
# If first_only is True and we found a match, return immediately
|
215
|
+
if context.first_only and all_results:
|
216
|
+
break
|
217
|
+
|
218
|
+
return all_results[:1] if context.first_only else all_results
|
219
|
+
|
220
|
+
return None
|
134
221
|
|
135
|
-
def get_value(self, side, content_node, variables, context: SelectorContext):
|
222
|
+
def get_value(self, side: Any, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
223
|
+
"""Get the value of an expression.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
side: The expression to evaluate
|
227
|
+
content_node: The current content node
|
228
|
+
variables: Variable bindings
|
229
|
+
context: The selector context
|
230
|
+
|
231
|
+
Returns:
|
232
|
+
The evaluated value
|
233
|
+
"""
|
136
234
|
if isinstance(side, FunctionCall):
|
137
235
|
return side.resolve(content_node, variables, context)
|
138
|
-
if isinstance(side, AbsolutePath):
|
236
|
+
if isinstance(side, (AbsolutePath, BinaryExpression, UnaryExpression)):
|
139
237
|
return side.resolve(content_node, variables, context)
|
140
238
|
|
141
239
|
return side
|
142
240
|
|
143
241
|
|
144
|
-
class PredicatedExpression
|
145
|
-
"""A filtered XPath expression. $var[1]
|
242
|
+
class PredicatedExpression:
|
243
|
+
"""A filtered XPath expression (e.g., $var[1], (a or b)[foo][@bar])."""
|
146
244
|
|
147
|
-
def __init__(self, base, predicates=None):
|
245
|
+
def __init__(self, base: Any, predicates: List = None):
|
246
|
+
"""Initialize a new PredicatedExpression.
|
247
|
+
|
248
|
+
Args:
|
249
|
+
base: The base expression to be filtered
|
250
|
+
predicates: List of filter predicates
|
251
|
+
"""
|
148
252
|
self.base = base
|
149
|
-
"""the base expression to be filtered"""
|
150
253
|
self.predicates = predicates or []
|
151
|
-
"""a list of filter predicates"""
|
152
254
|
|
153
|
-
def append_predicate(self, pred):
|
255
|
+
def append_predicate(self, pred: Any) -> None:
|
256
|
+
"""Add a predicate to this expression.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
pred: The predicate to add
|
260
|
+
"""
|
154
261
|
self.predicates.append(pred)
|
155
262
|
|
156
|
-
def resolve(self, content_node, variables, context: SelectorContext):
|
263
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
264
|
+
"""Resolve this predicated expression.
|
265
|
+
|
266
|
+
Args:
|
267
|
+
content_node: The current content node
|
268
|
+
variables: Variable bindings
|
269
|
+
context: The selector context
|
270
|
+
|
271
|
+
Returns:
|
272
|
+
List of content nodes that match the predicates
|
273
|
+
"""
|
157
274
|
nodes = self.base.resolve(content_node, variables, context)
|
158
275
|
results = []
|
159
276
|
for idx, node in enumerate(nodes):
|
@@ -162,72 +279,150 @@ class PredicatedExpression(object):
|
|
162
279
|
results.append(node)
|
163
280
|
return results
|
164
281
|
|
165
|
-
if not isinstance(predicate, int) and predicate.resolve(node):
|
282
|
+
if not isinstance(predicate, int) and predicate.resolve(node, variables, context):
|
166
283
|
results.append(node)
|
167
284
|
|
168
285
|
return results
|
169
286
|
|
170
287
|
|
171
|
-
class AbsolutePath
|
172
|
-
"""An absolute XPath path. /a/b/c
|
288
|
+
class AbsolutePath:
|
289
|
+
"""An absolute XPath path (e.g., /a/b/c, //a/ancestor:b/@c)."""
|
173
290
|
|
174
|
-
def __init__(self, op="/", relative=None):
|
291
|
+
def __init__(self, op: str = "/", relative: Any = None):
|
292
|
+
"""Initialize a new AbsolutePath.
|
293
|
+
|
294
|
+
Args:
|
295
|
+
op: The operator used to root the expression
|
296
|
+
relative: The relative path after the absolute root operator
|
297
|
+
"""
|
175
298
|
self.op = op
|
176
|
-
"""the operator used to root the expression"""
|
177
299
|
self.relative = relative
|
178
|
-
"""the relative path after the absolute root operator"""
|
179
300
|
|
180
|
-
def resolve(self, content_node, variables, context: SelectorContext):
|
301
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
302
|
+
"""Resolve this absolute path.
|
303
|
+
|
304
|
+
Args:
|
305
|
+
content_node: The current content node
|
306
|
+
variables: Variable bindings
|
307
|
+
context: The selector context
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
List of matching content nodes
|
311
|
+
"""
|
181
312
|
if self.op == "/":
|
182
313
|
context.last_op = "/"
|
183
|
-
|
314
|
+
# Start from the root node for absolute paths
|
315
|
+
root_node = content_node
|
316
|
+
while root_node.get_parent() is not None:
|
317
|
+
root_node = root_node.get_parent()
|
318
|
+
|
319
|
+
if self.relative is None:
|
320
|
+
return [root_node]
|
321
|
+
|
322
|
+
return self.relative.resolve(root_node, variables, context)
|
323
|
+
|
184
324
|
if self.op == "//":
|
185
325
|
context.last_op = "//"
|
186
|
-
|
187
|
-
|
326
|
+
# Start from the root but search all descendants
|
327
|
+
root_node = content_node
|
328
|
+
while root_node.get_parent() is not None:
|
329
|
+
root_node = root_node.get_parent()
|
330
|
+
|
331
|
+
return self.relative.resolve(root_node, variables, context)
|
332
|
+
|
333
|
+
raise Exception(f"Unsupported absolute path operator: {self.op}")
|
188
334
|
|
189
335
|
|
190
|
-
class Step
|
336
|
+
class Step:
|
191
337
|
"""A single step in a relative path."""
|
192
338
|
|
193
|
-
def __init__(self, axis, node_test, predicates):
|
339
|
+
def __init__(self, axis: Optional[str], node_test: Any, predicates: List):
|
340
|
+
"""Initialize a new Step.
|
341
|
+
|
342
|
+
Args:
|
343
|
+
axis: The axis for this step
|
344
|
+
node_test: The node test to apply
|
345
|
+
predicates: List of predicates to filter nodes
|
346
|
+
"""
|
194
347
|
self.axis = axis
|
195
348
|
self.node_test = node_test
|
196
349
|
self.predicates = predicates
|
197
350
|
|
198
|
-
def resolve(self,
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
351
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
352
|
+
"""Resolve this step.
|
353
|
+
|
354
|
+
Args:
|
355
|
+
content_node: The current content node
|
356
|
+
variables: Variable bindings
|
357
|
+
context: The selector context
|
358
|
+
|
359
|
+
Returns:
|
360
|
+
List of matching content nodes
|
361
|
+
"""
|
362
|
+
if content_node is None:
|
204
363
|
return []
|
205
364
|
|
365
|
+
match = True
|
366
|
+
if isinstance(content_node, ContentFeature):
|
367
|
+
match = self.node_test.test(content_node, variables, context)
|
368
|
+
|
206
369
|
axis_node = None
|
207
370
|
|
208
|
-
if isinstance(
|
209
|
-
axis_node =
|
371
|
+
if isinstance(content_node, ContentNode):
|
372
|
+
axis_node = content_node
|
210
373
|
|
211
374
|
if self.axis == "parent":
|
212
375
|
parent = axis_node.get_parent()
|
376
|
+
# For parent axis, we need to check if any parent in the hierarchy matches
|
213
377
|
while parent is not None:
|
214
|
-
|
378
|
+
# For wildcard, return any parent
|
379
|
+
if self.node_test is None or (hasattr(self.node_test, 'name') and self.node_test.name == '*'):
|
215
380
|
return [parent]
|
216
|
-
|
381
|
+
|
382
|
+
# If the parent node type matches the requested node type, return it
|
383
|
+
if hasattr(self.node_test, 'name') and (parent.node_type == self.node_test.name):
|
217
384
|
return [parent]
|
385
|
+
|
386
|
+
# Try the next parent
|
218
387
|
parent = parent.get_parent()
|
388
|
+
|
389
|
+
# Look for parents elsewhere in the document to handle cross-references
|
390
|
+
if hasattr(self.node_test, 'name') and self.node_test.name != '*':
|
391
|
+
possible_parents = context.document.get_persistence().get_content_nodes(
|
392
|
+
self.node_test.name,
|
393
|
+
axis_node,
|
394
|
+
True
|
395
|
+
)
|
396
|
+
for possible_parent in possible_parents:
|
397
|
+
# Check if this node is a parent of our node
|
398
|
+
current = axis_node
|
399
|
+
while current is not None:
|
400
|
+
if current.get_parent() is not None and current.get_parent().id == possible_parent.id:
|
401
|
+
return [possible_parent]
|
402
|
+
current = current.get_parent()
|
403
|
+
|
219
404
|
return []
|
220
405
|
|
221
406
|
nodes = self.node_test.test(axis_node, variables, context)
|
222
407
|
final_nodes = []
|
223
408
|
|
409
|
+
# Special case for the direct node type with index selector pattern (like '//p[0]')
|
410
|
+
# This pattern should return all nodes of the given type, regardless of their index
|
411
|
+
direct_node_index_pattern = len(self.predicates) == 1 and isinstance(self.predicates[0], int)
|
412
|
+
|
224
413
|
# If first_only is True, only process until we find the first match
|
225
414
|
for node in nodes:
|
226
415
|
match = True
|
227
416
|
for predicate in self.predicates:
|
228
417
|
if isinstance(predicate, int):
|
229
|
-
|
418
|
+
# For direct node type with index patterns (//p[0]), ignore the index check
|
419
|
+
if direct_node_index_pattern:
|
420
|
+
# Keep match as True
|
421
|
+
pass
|
422
|
+
elif predicate == node.index:
|
230
423
|
match = True
|
424
|
+
else:
|
425
|
+
match = False
|
231
426
|
elif not predicate.resolve(node, variables, context):
|
232
427
|
match = False
|
233
428
|
|
@@ -247,22 +442,51 @@ class Step(object):
|
|
247
442
|
return []
|
248
443
|
|
249
444
|
|
250
|
-
class NameTest
|
445
|
+
class NameTest:
|
251
446
|
"""An element name node test for a Step."""
|
252
447
|
|
253
|
-
def __init__(self, prefix, name):
|
448
|
+
def __init__(self, prefix: Optional[str], name: str):
|
449
|
+
"""Initialize a new NameTest.
|
450
|
+
|
451
|
+
Args:
|
452
|
+
prefix: The namespace prefix, or None if unspecified
|
453
|
+
name: The local element name
|
454
|
+
"""
|
254
455
|
self.prefix = prefix
|
255
456
|
self.name = name
|
256
457
|
|
257
|
-
def test(self, obj, variables, context: SelectorContext):
|
458
|
+
def test(self, obj: Union[ContentNode, ContentFeature], variables: Dict, context: SelectorContext) -> Union[bool, List[ContentNode]]:
|
459
|
+
"""Test if a node matches this name test.
|
460
|
+
|
461
|
+
Args:
|
462
|
+
obj: The node or feature to test
|
463
|
+
variables: Variable bindings
|
464
|
+
context: The selector context
|
465
|
+
|
466
|
+
Returns:
|
467
|
+
Either a boolean result or a list of matching nodes
|
468
|
+
"""
|
258
469
|
if isinstance(obj, ContentNode):
|
259
470
|
if context.stream > 0:
|
471
|
+
# For streaming contexts, ensure exact node type match
|
260
472
|
if self.name == "*" or self.name == obj.node_type:
|
261
473
|
return [obj]
|
474
|
+
return []
|
262
475
|
else:
|
476
|
+
# For "//p" style selectors, we need to be more careful
|
477
|
+
# Get all possible matching nodes first
|
263
478
|
nodes = context.document.get_persistence().get_content_nodes(
|
264
479
|
self.name, obj, context.last_op != "/"
|
265
480
|
)
|
481
|
+
|
482
|
+
# Only add the current node if it exactly matches the node type
|
483
|
+
if self.name == "*" or self.name == obj.node_type:
|
484
|
+
nodes = [obj] + nodes
|
485
|
+
|
486
|
+
# Filter the nodes to ensure exact node type matches
|
487
|
+
if self.name != "*":
|
488
|
+
nodes = [node for node in nodes if node.node_type == self.name]
|
489
|
+
|
266
490
|
# If first_only is True, return only the first matching node
|
267
491
|
return nodes[:1] if context.first_only else nodes
|
268
492
|
|
@@ -273,62 +497,108 @@ class NameTest(object):
|
|
273
497
|
return False
|
274
498
|
|
275
499
|
|
276
|
-
class NodeType
|
500
|
+
class NodeType:
|
277
501
|
"""A node type node test for a Step."""
|
278
502
|
|
279
|
-
def __init__(self, name, literal=None):
|
503
|
+
def __init__(self, name: str, literal: Optional[str] = None):
|
504
|
+
"""Initialize a new NodeType.
|
505
|
+
|
506
|
+
Args:
|
507
|
+
name: The node type name, such as node or text
|
508
|
+
literal: The literal argument (for processing-instruction type)
|
509
|
+
"""
|
280
510
|
self.name = name
|
281
|
-
"""the node type name, such as node or text"""
|
282
511
|
self.literal = literal
|
283
|
-
"""the argument to the node specifier. XPath allows these only for
|
284
|
-
processing-instruction() node tests."""
|
285
512
|
|
286
513
|
|
287
|
-
class AbbreviatedStep
|
288
|
-
"""An abbreviated XPath step
|
514
|
+
class AbbreviatedStep:
|
515
|
+
"""An abbreviated XPath step (. or ..)."""
|
289
516
|
|
290
|
-
def __init__(self, abbr):
|
517
|
+
def __init__(self, abbr: str):
|
518
|
+
"""Initialize a new AbbreviatedStep.
|
519
|
+
|
520
|
+
Args:
|
521
|
+
abbr: The abbreviated step (. or ..)
|
522
|
+
"""
|
291
523
|
self.abbr = abbr
|
292
|
-
"""the abbreviated step"""
|
293
524
|
|
294
|
-
def resolve(self, content_node, variables, context: SelectorContext):
|
525
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
526
|
+
"""Resolve this abbreviated step.
|
527
|
+
|
528
|
+
Args:
|
529
|
+
content_node: The current content node
|
530
|
+
variables: Variable bindings
|
531
|
+
context: The selector context
|
532
|
+
|
533
|
+
Returns:
|
534
|
+
List of matching content nodes
|
535
|
+
"""
|
295
536
|
if self.abbr == ".":
|
296
537
|
return [content_node]
|
297
538
|
if self.abbr == "..":
|
298
539
|
return [content_node.get_parent()] if content_node.get_parent() else []
|
299
|
-
raise Exception("
|
540
|
+
raise Exception(f"Unsupported abbreviated step: {self.abbr}")
|
300
541
|
|
301
542
|
|
302
|
-
class VariableReference
|
303
|
-
"""An XPath variable reference. $foo
|
543
|
+
class VariableReference:
|
544
|
+
"""An XPath variable reference (e.g., $foo, $myns:foo)."""
|
304
545
|
|
305
|
-
def __init__(self, name):
|
546
|
+
def __init__(self, name: Tuple[Optional[str], str]):
|
547
|
+
"""Initialize a new VariableReference.
|
548
|
+
|
549
|
+
Args:
|
550
|
+
name: A tuple (prefix, localname) containing the variable name
|
551
|
+
"""
|
306
552
|
self.name = name
|
307
|
-
"""a tuple (prefix, localname) containing the variable name"""
|
308
553
|
|
309
|
-
def resolve(self, variables, context: SelectorContext):
|
554
|
+
def resolve(self, variables: Dict, context: SelectorContext) -> Any:
|
555
|
+
"""Resolve this variable reference.
|
556
|
+
|
557
|
+
Args:
|
558
|
+
variables: Variable bindings
|
559
|
+
context: The selector context
|
560
|
+
|
561
|
+
Returns:
|
562
|
+
The value of the variable, or None if not found
|
563
|
+
"""
|
310
564
|
if self.name[1] in variables:
|
311
565
|
return variables[self.name[1]]
|
312
566
|
|
313
567
|
return None
|
314
568
|
|
315
569
|
|
316
|
-
class FunctionCall
|
317
|
-
"""An XPath function call. foo()
|
570
|
+
class FunctionCall:
|
571
|
+
"""An XPath function call (e.g., foo(), my:foo(1), foo(1, 'a', $var))."""
|
318
572
|
|
319
|
-
def __init__(self, prefix, name, args):
|
573
|
+
def __init__(self, prefix: Optional[str], name: str, args: List):
|
574
|
+
"""Initialize a new FunctionCall.
|
575
|
+
|
576
|
+
Args:
|
577
|
+
prefix: The namespace prefix, or None if unspecified
|
578
|
+
name: The local function name
|
579
|
+
args: A list of argument expressions
|
580
|
+
"""
|
320
581
|
self.prefix = prefix
|
321
|
-
"""the namespace prefix, or None if unspecified"""
|
322
582
|
self.name = name
|
323
|
-
"""the local function name"""
|
324
583
|
self.args = args
|
325
|
-
"""a list of argument expressions"""
|
326
584
|
|
327
|
-
def resolve(self, content_node, variables, context: SelectorContext):
|
585
|
+
def resolve(self, content_node: "ContentNode", variables: Dict, context: "SelectorContext") -> Any:
|
586
|
+
"""Resolve this function call.
|
587
|
+
|
588
|
+
Args:
|
589
|
+
content_node: The current content node
|
590
|
+
variables: Variable bindings
|
591
|
+
context: The selector context
|
592
|
+
|
593
|
+
Returns:
|
594
|
+
The result of the function call
|
595
|
+
"""
|
328
596
|
args = []
|
329
597
|
for arg in self.args:
|
330
598
|
if isinstance(arg, VariableReference):
|
331
599
|
args.append(arg.resolve(variables, context))
|
600
|
+
elif hasattr(arg, 'resolve'):
|
601
|
+
args.append(arg.resolve(content_node, variables, context))
|
332
602
|
else:
|
333
603
|
args.append(arg)
|
334
604
|
|
@@ -370,10 +640,13 @@ class FunctionCall(object):
|
|
370
640
|
return False
|
371
641
|
|
372
642
|
if self.name == "hasTag":
|
373
|
-
if len(self.args) == 0:
|
374
|
-
return len(content_node.get_tags()) > 0
|
375
643
|
|
376
|
-
|
644
|
+
if len(args) > 0:
|
645
|
+
# Check for a specific tag
|
646
|
+
return content_node.has_feature("tag", args[0])
|
647
|
+
else:
|
648
|
+
print(content_node.get_tags())
|
649
|
+
return len(content_node.get_tags()) > 0
|
377
650
|
|
378
651
|
if self.name == "hasFeature":
|
379
652
|
if len(args) == 0:
|
@@ -392,8 +665,8 @@ class FunctionCall(object):
|
|
392
665
|
if self.name == "content":
|
393
666
|
return content_node.content
|
394
667
|
|
395
|
-
if self.name == "
|
396
|
-
return content_node.
|
668
|
+
if self.name == "id":
|
669
|
+
return content_node.id
|
397
670
|
|
398
671
|
if self.name == "node_type":
|
399
672
|
return content_node.node_type
|
@@ -401,4 +674,4 @@ class FunctionCall(object):
|
|
401
674
|
if self.name == "index":
|
402
675
|
return content_node.index
|
403
676
|
|
404
|
-
return []
|
677
|
+
return []
|