kodexa-document 7.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodexa-document might be problematic. Click here for more details.
- kodexa_document/connectors.py +456 -0
- kodexa_document/model.py +3642 -0
- kodexa_document/persistence.py +2057 -0
- kodexa_document/persistence_models.py +421 -0
- kodexa_document/selectors/__init__.py +5 -0
- kodexa_document/selectors/ast.py +677 -0
- kodexa_document/selectors/error.py +29 -0
- kodexa_document/selectors/kodexa-ast-visitor.py +268 -0
- kodexa_document/selectors/parser.py +91 -0
- kodexa_document/selectors/resources/KodexaSelector.interp +99 -0
- kodexa_document/selectors/resources/KodexaSelector.tokens +56 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.interp +119 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.py +204 -0
- kodexa_document/selectors/resources/KodexaSelectorLexer.tokens +56 -0
- kodexa_document/selectors/resources/KodexaSelectorListener.py +570 -0
- kodexa_document/selectors/resources/KodexaSelectorParser.py +3246 -0
- kodexa_document/selectors/resources/KodexaSelectorVisitor.py +323 -0
- kodexa_document/selectors/visitor.py +265 -0
- kodexa_document/steps.py +109 -0
- kodexa_document-7.5.0.dist-info/METADATA +27 -0
- kodexa_document-7.5.0.dist-info/RECORD +22 -0
- kodexa_document-7.5.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,677 @@
|
|
|
1
|
+
"""Abstract Syntax Tree nodes for parsed XPath expressions.
|
|
2
|
+
|
|
3
|
+
This module contains basic nodes for representing parsed XPath expressions
|
|
4
|
+
created by the ANTLR-based parser. These classes provide the same functionality
|
|
5
|
+
as the original PLY-based parser's AST classes but are designed to work with
|
|
6
|
+
the ANTLR-generated parse tree.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import re
|
|
12
|
+
from typing import List, Optional, Any, Dict, Union, Tuple
|
|
13
|
+
|
|
14
|
+
# Import these types but make them optional to avoid circular imports
|
|
15
|
+
# In a real implementation, you'd use proper type annotations
|
|
16
|
+
try:
|
|
17
|
+
from kodexa_document.model import ContentNode, ContentFeature, Document
|
|
18
|
+
except ImportError:
|
|
19
|
+
ContentNode = Any
|
|
20
|
+
ContentFeature = Any
|
|
21
|
+
Document = Any
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
"SelectorContext",
|
|
25
|
+
"UnaryExpression",
|
|
26
|
+
"BinaryExpression",
|
|
27
|
+
"PredicatedExpression",
|
|
28
|
+
"PipelineExpression",
|
|
29
|
+
"AbsolutePath",
|
|
30
|
+
"Step",
|
|
31
|
+
"NameTest",
|
|
32
|
+
"NodeType",
|
|
33
|
+
"AbbreviatedStep",
|
|
34
|
+
"VariableReference",
|
|
35
|
+
"FunctionCall",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class SelectorContext:
|
|
40
|
+
"""Context for selector resolution, maintains state during traversal."""
|
|
41
|
+
|
|
42
|
+
def __init__(self, document: Document, first_only=False):
|
|
43
|
+
"""Initialize a new SelectorContext.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
document: The document being searched
|
|
47
|
+
first_only: Whether to return only the first match
|
|
48
|
+
"""
|
|
49
|
+
self.pattern_cache = {}
|
|
50
|
+
self.last_op = None
|
|
51
|
+
self.document: Document = document
|
|
52
|
+
self.stream = 0
|
|
53
|
+
self.first_only = first_only
|
|
54
|
+
|
|
55
|
+
def cache_pattern(self, pattern: str) -> re.Pattern:
|
|
56
|
+
"""Get a compiled regex pattern, caching for reuse.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
pattern: The regex pattern string
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The compiled regex pattern
|
|
63
|
+
"""
|
|
64
|
+
if pattern not in self.pattern_cache:
|
|
65
|
+
self.pattern_cache[pattern] = re.compile(pattern)
|
|
66
|
+
return self.pattern_cache[pattern]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class PipelineExpression:
|
|
70
|
+
"""A pipeline XPath expression (e.g., a stream b)."""
|
|
71
|
+
|
|
72
|
+
def __init__(self, left: Any, op: str, right: Any):
|
|
73
|
+
"""Initialize a new PipelineExpression.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
left: Left side of the pipeline
|
|
77
|
+
op: The pipeline operator
|
|
78
|
+
right: Right side of the pipeline
|
|
79
|
+
"""
|
|
80
|
+
self.left = left
|
|
81
|
+
self.op = op
|
|
82
|
+
self.right = right
|
|
83
|
+
|
|
84
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
|
85
|
+
"""Resolve this pipeline expression.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
content_node: The current content node
|
|
89
|
+
variables: Variable bindings
|
|
90
|
+
context: The selector context
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
List of matching content nodes
|
|
94
|
+
"""
|
|
95
|
+
left_nodes = self.left.resolve(content_node, variables, context)
|
|
96
|
+
result_nodes: List[ContentNode] = []
|
|
97
|
+
context.stream = context.stream + 1
|
|
98
|
+
|
|
99
|
+
# If first_only is True and we already have left nodes, only process the first one
|
|
100
|
+
nodes_to_process = left_nodes[:1] if context.first_only and left_nodes else left_nodes
|
|
101
|
+
|
|
102
|
+
for node in nodes_to_process:
|
|
103
|
+
right_results = self.right.resolve(node, variables, context)
|
|
104
|
+
result_nodes.extend(right_results)
|
|
105
|
+
# If first_only is True and we found a match, return immediately
|
|
106
|
+
if context.first_only and result_nodes:
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
context.stream = context.stream - 1
|
|
110
|
+
return result_nodes[:1] if context.first_only else result_nodes
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class UnaryExpression:
|
|
114
|
+
"""A unary XPath expression (e.g., -foo)."""
|
|
115
|
+
|
|
116
|
+
def __init__(self, op: str, right: Any):
|
|
117
|
+
"""Initialize a new UnaryExpression.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
op: The operator
|
|
121
|
+
right: The expression the operator is applied to
|
|
122
|
+
"""
|
|
123
|
+
self.op = op
|
|
124
|
+
self.right = right
|
|
125
|
+
|
|
126
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
|
127
|
+
"""Resolve this unary expression.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
content_node: The current content node
|
|
131
|
+
variables: Variable bindings
|
|
132
|
+
context: The selector context
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The result of applying the operator to the right expression
|
|
136
|
+
"""
|
|
137
|
+
# Handle negation
|
|
138
|
+
if self.op == "-":
|
|
139
|
+
right_value = self.right.resolve(content_node, variables, context)
|
|
140
|
+
if isinstance(right_value, (int, float)):
|
|
141
|
+
return -right_value
|
|
142
|
+
|
|
143
|
+
return None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class BinaryExpression:
|
|
147
|
+
"""Any binary XPath expression (e.g., a/b, a and b, a | b)."""
|
|
148
|
+
|
|
149
|
+
def __init__(self, left: Any, op: str, right: Any):
|
|
150
|
+
"""Initialize a new BinaryExpression.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
left: Left side of the expression
|
|
154
|
+
op: The operator
|
|
155
|
+
right: Right side of the expression
|
|
156
|
+
"""
|
|
157
|
+
self.left = left
|
|
158
|
+
self.op = op
|
|
159
|
+
self.right = right
|
|
160
|
+
|
|
161
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
|
162
|
+
"""Resolve this binary expression.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
content_node: The current content node
|
|
166
|
+
variables: Variable bindings
|
|
167
|
+
context: The selector context
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
The result of applying the operator to the left and right expressions
|
|
171
|
+
"""
|
|
172
|
+
if self.op == "|":
|
|
173
|
+
return self.left.resolve(
|
|
174
|
+
content_node, variables, context
|
|
175
|
+
) + self.right.resolve(content_node, variables, context)
|
|
176
|
+
if self.op == "=":
|
|
177
|
+
return self.get_value(
|
|
178
|
+
self.left, content_node, variables, context
|
|
179
|
+
) == self.get_value(self.right, content_node, variables, context)
|
|
180
|
+
if self.op == "!=":
|
|
181
|
+
return self.get_value(
|
|
182
|
+
self.left, content_node, variables, context
|
|
183
|
+
) != self.get_value(self.right, content_node, variables, context)
|
|
184
|
+
if self.op == "intersect":
|
|
185
|
+
left_value = self.get_value(self.left, content_node, variables, context)
|
|
186
|
+
right_value = self.get_value(self.right, content_node, variables, context)
|
|
187
|
+
if isinstance(left_value, list) and isinstance(right_value, list):
|
|
188
|
+
intersection_list = [
|
|
189
|
+
value for value in left_value if value in right_value
|
|
190
|
+
]
|
|
191
|
+
return intersection_list
|
|
192
|
+
|
|
193
|
+
return []
|
|
194
|
+
if self.op == "and":
|
|
195
|
+
return bool(
|
|
196
|
+
self.get_value(self.left, content_node, variables, context)
|
|
197
|
+
) and bool(self.get_value(self.right, content_node, variables, context))
|
|
198
|
+
if self.op == "or":
|
|
199
|
+
return bool(
|
|
200
|
+
self.get_value(self.left, content_node, variables, context)
|
|
201
|
+
) or bool(self.get_value(self.right, content_node, variables, context))
|
|
202
|
+
|
|
203
|
+
# Handle path operations
|
|
204
|
+
if self.op == "/" or self.op == "//":
|
|
205
|
+
# For path expressions, resolve left first then apply right to each result
|
|
206
|
+
left_results = self.left.resolve(content_node, variables, context)
|
|
207
|
+
context.last_op = self.op
|
|
208
|
+
|
|
209
|
+
all_results = []
|
|
210
|
+
for node in left_results:
|
|
211
|
+
right_results = self.right.resolve(node, variables, context)
|
|
212
|
+
all_results.extend(right_results)
|
|
213
|
+
|
|
214
|
+
# If first_only is True and we found a match, return immediately
|
|
215
|
+
if context.first_only and all_results:
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
return all_results[:1] if context.first_only else all_results
|
|
219
|
+
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
def get_value(self, side: Any, content_node: ContentNode, variables: Dict, context: SelectorContext) -> Any:
|
|
223
|
+
"""Get the value of an expression.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
side: The expression to evaluate
|
|
227
|
+
content_node: The current content node
|
|
228
|
+
variables: Variable bindings
|
|
229
|
+
context: The selector context
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
The evaluated value
|
|
233
|
+
"""
|
|
234
|
+
if isinstance(side, FunctionCall):
|
|
235
|
+
return side.resolve(content_node, variables, context)
|
|
236
|
+
if isinstance(side, (AbsolutePath, BinaryExpression, UnaryExpression)):
|
|
237
|
+
return side.resolve(content_node, variables, context)
|
|
238
|
+
|
|
239
|
+
return side
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
class PredicatedExpression:
|
|
243
|
+
"""A filtered XPath expression (e.g., $var[1], (a or b)[foo][@bar])."""
|
|
244
|
+
|
|
245
|
+
def __init__(self, base: Any, predicates: List = None):
|
|
246
|
+
"""Initialize a new PredicatedExpression.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
base: The base expression to be filtered
|
|
250
|
+
predicates: List of filter predicates
|
|
251
|
+
"""
|
|
252
|
+
self.base = base
|
|
253
|
+
self.predicates = predicates or []
|
|
254
|
+
|
|
255
|
+
def append_predicate(self, pred: Any) -> None:
|
|
256
|
+
"""Add a predicate to this expression.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
pred: The predicate to add
|
|
260
|
+
"""
|
|
261
|
+
self.predicates.append(pred)
|
|
262
|
+
|
|
263
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
|
264
|
+
"""Resolve this predicated expression.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
content_node: The current content node
|
|
268
|
+
variables: Variable bindings
|
|
269
|
+
context: The selector context
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
List of content nodes that match the predicates
|
|
273
|
+
"""
|
|
274
|
+
nodes = self.base.resolve(content_node, variables, context)
|
|
275
|
+
results = []
|
|
276
|
+
for idx, node in enumerate(nodes):
|
|
277
|
+
for predicate in self.predicates:
|
|
278
|
+
if isinstance(predicate, int) and predicate == idx:
|
|
279
|
+
results.append(node)
|
|
280
|
+
return results
|
|
281
|
+
|
|
282
|
+
if not isinstance(predicate, int) and predicate.resolve(node, variables, context):
|
|
283
|
+
results.append(node)
|
|
284
|
+
|
|
285
|
+
return results
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class AbsolutePath:
|
|
289
|
+
"""An absolute XPath path (e.g., /a/b/c, //a/ancestor:b/@c)."""
|
|
290
|
+
|
|
291
|
+
def __init__(self, op: str = "/", relative: Any = None):
|
|
292
|
+
"""Initialize a new AbsolutePath.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
op: The operator used to root the expression
|
|
296
|
+
relative: The relative path after the absolute root operator
|
|
297
|
+
"""
|
|
298
|
+
self.op = op
|
|
299
|
+
self.relative = relative
|
|
300
|
+
|
|
301
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
|
302
|
+
"""Resolve this absolute path.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
content_node: The current content node
|
|
306
|
+
variables: Variable bindings
|
|
307
|
+
context: The selector context
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
List of matching content nodes
|
|
311
|
+
"""
|
|
312
|
+
if self.op == "/":
|
|
313
|
+
context.last_op = "/"
|
|
314
|
+
# Start from the root node for absolute paths
|
|
315
|
+
root_node = content_node
|
|
316
|
+
while root_node.get_parent() is not None:
|
|
317
|
+
root_node = root_node.get_parent()
|
|
318
|
+
|
|
319
|
+
if self.relative is None:
|
|
320
|
+
return [root_node]
|
|
321
|
+
|
|
322
|
+
return self.relative.resolve(root_node, variables, context)
|
|
323
|
+
|
|
324
|
+
if self.op == "//":
|
|
325
|
+
context.last_op = "//"
|
|
326
|
+
# Start from the root but search all descendants
|
|
327
|
+
root_node = content_node
|
|
328
|
+
while root_node.get_parent() is not None:
|
|
329
|
+
root_node = root_node.get_parent()
|
|
330
|
+
|
|
331
|
+
return self.relative.resolve(root_node, variables, context)
|
|
332
|
+
|
|
333
|
+
raise Exception(f"Unsupported absolute path operator: {self.op}")
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
class Step:
|
|
337
|
+
"""A single step in a relative path."""
|
|
338
|
+
|
|
339
|
+
def __init__(self, axis: Optional[str], node_test: Any, predicates: List):
|
|
340
|
+
"""Initialize a new Step.
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
axis: The axis for this step
|
|
344
|
+
node_test: The node test to apply
|
|
345
|
+
predicates: List of predicates to filter nodes
|
|
346
|
+
"""
|
|
347
|
+
self.axis = axis
|
|
348
|
+
self.node_test = node_test
|
|
349
|
+
self.predicates = predicates
|
|
350
|
+
|
|
351
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
|
352
|
+
"""Resolve this step.
|
|
353
|
+
|
|
354
|
+
Args:
|
|
355
|
+
content_node: The current content node
|
|
356
|
+
variables: Variable bindings
|
|
357
|
+
context: The selector context
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
List of matching content nodes
|
|
361
|
+
"""
|
|
362
|
+
if content_node is None:
|
|
363
|
+
return []
|
|
364
|
+
|
|
365
|
+
match = True
|
|
366
|
+
if isinstance(content_node, ContentFeature):
|
|
367
|
+
match = self.node_test.test(content_node, variables, context)
|
|
368
|
+
|
|
369
|
+
axis_node = None
|
|
370
|
+
|
|
371
|
+
if isinstance(content_node, ContentNode):
|
|
372
|
+
axis_node = content_node
|
|
373
|
+
|
|
374
|
+
if self.axis == "parent":
|
|
375
|
+
parent = axis_node.get_parent()
|
|
376
|
+
# For parent axis, we need to check if any parent in the hierarchy matches
|
|
377
|
+
while parent is not None:
|
|
378
|
+
# For wildcard, return any parent
|
|
379
|
+
if self.node_test is None or (hasattr(self.node_test, 'name') and self.node_test.name == '*'):
|
|
380
|
+
return [parent]
|
|
381
|
+
|
|
382
|
+
# If the parent node type matches the requested node type, return it
|
|
383
|
+
if hasattr(self.node_test, 'name') and (parent.node_type == self.node_test.name):
|
|
384
|
+
return [parent]
|
|
385
|
+
|
|
386
|
+
# Try the next parent
|
|
387
|
+
parent = parent.get_parent()
|
|
388
|
+
|
|
389
|
+
# Look for parents elsewhere in the document to handle cross-references
|
|
390
|
+
if hasattr(self.node_test, 'name') and self.node_test.name != '*':
|
|
391
|
+
possible_parents = context.document.get_persistence().get_content_nodes(
|
|
392
|
+
self.node_test.name,
|
|
393
|
+
axis_node,
|
|
394
|
+
True
|
|
395
|
+
)
|
|
396
|
+
for possible_parent in possible_parents:
|
|
397
|
+
# Check if this node is a parent of our node
|
|
398
|
+
current = axis_node
|
|
399
|
+
while current is not None:
|
|
400
|
+
if current.get_parent() is not None and current.get_parent().id == possible_parent.id:
|
|
401
|
+
return [possible_parent]
|
|
402
|
+
current = current.get_parent()
|
|
403
|
+
|
|
404
|
+
return []
|
|
405
|
+
|
|
406
|
+
nodes = self.node_test.test(axis_node, variables, context)
|
|
407
|
+
final_nodes = []
|
|
408
|
+
|
|
409
|
+
# Special case for the direct node type with index selector pattern (like '//p[0]')
|
|
410
|
+
# This pattern should return all nodes of the given type, regardless of their index
|
|
411
|
+
direct_node_index_pattern = len(self.predicates) == 1 and isinstance(self.predicates[0], int)
|
|
412
|
+
|
|
413
|
+
# If first_only is True, only process until we find the first match
|
|
414
|
+
for node in nodes:
|
|
415
|
+
match = True
|
|
416
|
+
for predicate in self.predicates:
|
|
417
|
+
if isinstance(predicate, int):
|
|
418
|
+
# For direct node type with index patterns (//p[0]), ignore the index check
|
|
419
|
+
if direct_node_index_pattern:
|
|
420
|
+
# Keep match as True
|
|
421
|
+
pass
|
|
422
|
+
elif predicate == node.index:
|
|
423
|
+
match = True
|
|
424
|
+
else:
|
|
425
|
+
match = False
|
|
426
|
+
elif not predicate.resolve(node, variables, context):
|
|
427
|
+
match = False
|
|
428
|
+
|
|
429
|
+
if match:
|
|
430
|
+
final_nodes.append(node)
|
|
431
|
+
if context.first_only:
|
|
432
|
+
break
|
|
433
|
+
|
|
434
|
+
return final_nodes
|
|
435
|
+
|
|
436
|
+
if match:
|
|
437
|
+
return [axis_node]
|
|
438
|
+
|
|
439
|
+
if self.axis is not None:
|
|
440
|
+
return self.resolve(axis_node, variables, context)
|
|
441
|
+
|
|
442
|
+
return []
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
class NameTest:
|
|
446
|
+
"""An element name node test for a Step."""
|
|
447
|
+
|
|
448
|
+
def __init__(self, prefix: Optional[str], name: str):
|
|
449
|
+
"""Initialize a new NameTest.
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
prefix: The namespace prefix, or None if unspecified
|
|
453
|
+
name: The local element name
|
|
454
|
+
"""
|
|
455
|
+
self.prefix = prefix
|
|
456
|
+
self.name = name
|
|
457
|
+
|
|
458
|
+
def test(self, obj: Union[ContentNode, ContentFeature], variables: Dict, context: SelectorContext) -> Union[bool, List[ContentNode]]:
|
|
459
|
+
"""Test if a node matches this name test.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
obj: The node or feature to test
|
|
463
|
+
variables: Variable bindings
|
|
464
|
+
context: The selector context
|
|
465
|
+
|
|
466
|
+
Returns:
|
|
467
|
+
Either a boolean result or a list of matching nodes
|
|
468
|
+
"""
|
|
469
|
+
if isinstance(obj, ContentNode):
|
|
470
|
+
if context.stream > 0:
|
|
471
|
+
# For streaming contexts, ensure exact node type match
|
|
472
|
+
if self.name == "*" or self.name == obj.node_type:
|
|
473
|
+
return [obj]
|
|
474
|
+
return []
|
|
475
|
+
else:
|
|
476
|
+
# For "//p" style selectors, we need to be more careful
|
|
477
|
+
# Get all possible matching nodes first
|
|
478
|
+
nodes = context.document.get_persistence().get_content_nodes(
|
|
479
|
+
self.name, obj, context.last_op != "/"
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
# Only add the current node if it exactly matches the node type
|
|
483
|
+
if self.name == "*" or self.name == obj.node_type:
|
|
484
|
+
nodes = [obj] + nodes
|
|
485
|
+
|
|
486
|
+
# Filter the nodes to ensure exact node type matches
|
|
487
|
+
if self.name != "*":
|
|
488
|
+
nodes = [node for node in nodes if node.node_type == self.name]
|
|
489
|
+
|
|
490
|
+
# If first_only is True, return only the first matching node
|
|
491
|
+
return nodes[:1] if context.first_only else nodes
|
|
492
|
+
|
|
493
|
+
if isinstance(obj, ContentFeature):
|
|
494
|
+
return self.name == "*" or (
|
|
495
|
+
obj.feature_type == self.prefix and obj.name == self.name
|
|
496
|
+
)
|
|
497
|
+
return False
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
class NodeType:
|
|
501
|
+
"""A node type node test for a Step."""
|
|
502
|
+
|
|
503
|
+
def __init__(self, name: str, literal: Optional[str] = None):
|
|
504
|
+
"""Initialize a new NodeType.
|
|
505
|
+
|
|
506
|
+
Args:
|
|
507
|
+
name: The node type name, such as node or text
|
|
508
|
+
literal: The literal argument (for processing-instruction type)
|
|
509
|
+
"""
|
|
510
|
+
self.name = name
|
|
511
|
+
self.literal = literal
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
class AbbreviatedStep:
|
|
515
|
+
"""An abbreviated XPath step (. or ..)."""
|
|
516
|
+
|
|
517
|
+
def __init__(self, abbr: str):
|
|
518
|
+
"""Initialize a new AbbreviatedStep.
|
|
519
|
+
|
|
520
|
+
Args:
|
|
521
|
+
abbr: The abbreviated step (. or ..)
|
|
522
|
+
"""
|
|
523
|
+
self.abbr = abbr
|
|
524
|
+
|
|
525
|
+
def resolve(self, content_node: ContentNode, variables: Dict, context: SelectorContext) -> List[ContentNode]:
|
|
526
|
+
"""Resolve this abbreviated step.
|
|
527
|
+
|
|
528
|
+
Args:
|
|
529
|
+
content_node: The current content node
|
|
530
|
+
variables: Variable bindings
|
|
531
|
+
context: The selector context
|
|
532
|
+
|
|
533
|
+
Returns:
|
|
534
|
+
List of matching content nodes
|
|
535
|
+
"""
|
|
536
|
+
if self.abbr == ".":
|
|
537
|
+
return [content_node]
|
|
538
|
+
if self.abbr == "..":
|
|
539
|
+
return [content_node.get_parent()] if content_node.get_parent() else []
|
|
540
|
+
raise Exception(f"Unsupported abbreviated step: {self.abbr}")
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
class VariableReference:
|
|
544
|
+
"""An XPath variable reference (e.g., $foo, $myns:foo)."""
|
|
545
|
+
|
|
546
|
+
def __init__(self, name: Tuple[Optional[str], str]):
|
|
547
|
+
"""Initialize a new VariableReference.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
name: A tuple (prefix, localname) containing the variable name
|
|
551
|
+
"""
|
|
552
|
+
self.name = name
|
|
553
|
+
|
|
554
|
+
def resolve(self, variables: Dict, context: SelectorContext) -> Any:
|
|
555
|
+
"""Resolve this variable reference.
|
|
556
|
+
|
|
557
|
+
Args:
|
|
558
|
+
variables: Variable bindings
|
|
559
|
+
context: The selector context
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
The value of the variable, or None if not found
|
|
563
|
+
"""
|
|
564
|
+
if self.name[1] in variables:
|
|
565
|
+
return variables[self.name[1]]
|
|
566
|
+
|
|
567
|
+
return None
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
class FunctionCall:
|
|
571
|
+
"""An XPath function call (e.g., foo(), my:foo(1), foo(1, 'a', $var))."""
|
|
572
|
+
|
|
573
|
+
def __init__(self, prefix: Optional[str], name: str, args: List):
|
|
574
|
+
"""Initialize a new FunctionCall.
|
|
575
|
+
|
|
576
|
+
Args:
|
|
577
|
+
prefix: The namespace prefix, or None if unspecified
|
|
578
|
+
name: The local function name
|
|
579
|
+
args: A list of argument expressions
|
|
580
|
+
"""
|
|
581
|
+
self.prefix = prefix
|
|
582
|
+
self.name = name
|
|
583
|
+
self.args = args
|
|
584
|
+
|
|
585
|
+
def resolve(self, content_node: "ContentNode", variables: Dict, context: "SelectorContext") -> Any:
|
|
586
|
+
"""Resolve this function call.
|
|
587
|
+
|
|
588
|
+
Args:
|
|
589
|
+
content_node: The current content node
|
|
590
|
+
variables: Variable bindings
|
|
591
|
+
context: The selector context
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
The result of the function call
|
|
595
|
+
"""
|
|
596
|
+
args = []
|
|
597
|
+
for arg in self.args:
|
|
598
|
+
if isinstance(arg, VariableReference):
|
|
599
|
+
args.append(arg.resolve(variables, context))
|
|
600
|
+
elif hasattr(arg, 'resolve'):
|
|
601
|
+
args.append(arg.resolve(content_node, variables, context))
|
|
602
|
+
else:
|
|
603
|
+
args.append(arg)
|
|
604
|
+
|
|
605
|
+
if self.name == "true":
|
|
606
|
+
return True
|
|
607
|
+
|
|
608
|
+
if self.name == "false":
|
|
609
|
+
return False
|
|
610
|
+
|
|
611
|
+
if self.name == "contentRegex":
|
|
612
|
+
compiled_pattern = context.cache_pattern(args[0])
|
|
613
|
+
|
|
614
|
+
content_to_test = content_node.content
|
|
615
|
+
|
|
616
|
+
if len(args) > 1:
|
|
617
|
+
if bool(args[1]):
|
|
618
|
+
content_to_test = content_node.get_all_content()
|
|
619
|
+
|
|
620
|
+
if content_to_test is not None and compiled_pattern.match(content_to_test):
|
|
621
|
+
return content_to_test
|
|
622
|
+
|
|
623
|
+
return None
|
|
624
|
+
|
|
625
|
+
if self.name == "typeRegex":
|
|
626
|
+
compiled_pattern = context.cache_pattern(args[0])
|
|
627
|
+
if content_node.node_type is not None and compiled_pattern.match(
|
|
628
|
+
content_node.node_type
|
|
629
|
+
):
|
|
630
|
+
return content_node.node_type
|
|
631
|
+
|
|
632
|
+
return None
|
|
633
|
+
|
|
634
|
+
if self.name == "tagRegex":
|
|
635
|
+
compiled_pattern = context.cache_pattern(args[0])
|
|
636
|
+
for feature in content_node.get_features_of_type("tag"):
|
|
637
|
+
if feature.name is not None and compiled_pattern.match(feature.name):
|
|
638
|
+
return True
|
|
639
|
+
|
|
640
|
+
return False
|
|
641
|
+
|
|
642
|
+
if self.name == "hasTag":
|
|
643
|
+
|
|
644
|
+
if len(args) > 0:
|
|
645
|
+
# Check for a specific tag
|
|
646
|
+
return content_node.has_feature("tag", args[0])
|
|
647
|
+
else:
|
|
648
|
+
print(content_node.get_tags())
|
|
649
|
+
return len(content_node.get_tags()) > 0
|
|
650
|
+
|
|
651
|
+
if self.name == "hasFeature":
|
|
652
|
+
if len(args) == 0:
|
|
653
|
+
return len(content_node.get_features()) > 0
|
|
654
|
+
|
|
655
|
+
return content_node.has_feature(args[0], args[1])
|
|
656
|
+
|
|
657
|
+
if self.name == "hasFeatureValue":
|
|
658
|
+
values = content_node.get_feature_values(args[0], args[1])
|
|
659
|
+
if values:
|
|
660
|
+
for value in values:
|
|
661
|
+
if value == args[2]:
|
|
662
|
+
return True
|
|
663
|
+
return False
|
|
664
|
+
|
|
665
|
+
if self.name == "content":
|
|
666
|
+
return content_node.content
|
|
667
|
+
|
|
668
|
+
if self.name == "id":
|
|
669
|
+
return content_node.id
|
|
670
|
+
|
|
671
|
+
if self.name == "node_type":
|
|
672
|
+
return content_node.node_type
|
|
673
|
+
|
|
674
|
+
if self.name == "index":
|
|
675
|
+
return content_node.index
|
|
676
|
+
|
|
677
|
+
return []
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from antlr4.error.ErrorListener import ErrorListener
|
|
2
|
+
|
|
3
|
+
class KodexaSyntaxErrorListener(ErrorListener):
|
|
4
|
+
"""
|
|
5
|
+
Custom error listener for ANTLR parser to provide better error messages.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def __init__(self):
|
|
9
|
+
super(KodexaSyntaxErrorListener, self).__init__()
|
|
10
|
+
self.errors = []
|
|
11
|
+
|
|
12
|
+
def syntaxError(self, recognizer, offendingSymbol, line, column, msg, e):
|
|
13
|
+
"""
|
|
14
|
+
Collect syntax errors during parsing.
|
|
15
|
+
"""
|
|
16
|
+
error_msg = f"line {line}:{column} {msg}"
|
|
17
|
+
self.errors.append(error_msg)
|
|
18
|
+
|
|
19
|
+
def hasErrors(self):
|
|
20
|
+
"""
|
|
21
|
+
Check if any errors were detected.
|
|
22
|
+
"""
|
|
23
|
+
return len(self.errors) > 0
|
|
24
|
+
|
|
25
|
+
def getErrors(self):
|
|
26
|
+
"""
|
|
27
|
+
Get the list of error messages.
|
|
28
|
+
"""
|
|
29
|
+
return self.errors
|