moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,485 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# XPath expression parser
|
|
6
|
+
#
|
|
7
|
+
# Implements a recursive descent parser for XPath 1.0 expressions.
|
|
8
|
+
# Builds an Abstract Syntax Tree (AST) from tokenized input.
|
|
9
|
+
#
|
|
10
|
+
# Grammar (simplified XPath 1.0):
|
|
11
|
+
# expr ::= or_expr
|
|
12
|
+
# or_expr ::= and_expr ('or' and_expr)*
|
|
13
|
+
# and_expr ::= equality ('and' equality)*
|
|
14
|
+
# equality ::= relational (('=' | '!=') relational)*
|
|
15
|
+
# relational ::= additive (('<' | '>' | '<=' | '>=') additive)*
|
|
16
|
+
# additive ::= multiplicative (('+' | '-') multiplicative)*
|
|
17
|
+
# multiplicative ::= unary (('*' | 'div' | 'mod') unary)*
|
|
18
|
+
# unary ::= ('-')? union
|
|
19
|
+
# union ::= path_expr ('|' path_expr)*
|
|
20
|
+
# path_expr ::= filter_expr | location_path
|
|
21
|
+
# filter_expr ::= primary_expr predicate*
|
|
22
|
+
# primary ::= variable | '(' expr ')' | literal | number | function
|
|
23
|
+
# location_path ::= absolute_path | relative_path
|
|
24
|
+
#
|
|
25
|
+
# @example
|
|
26
|
+
# ast = Parser.parse("//book[@id='123']")
|
|
27
|
+
# ast = Parser.parse_with_cache("//book[@id='123']")
|
|
28
|
+
class Parser
|
|
29
|
+
# Parse cache for compiled expressions
|
|
30
|
+
CACHE = Cache.new(100)
|
|
31
|
+
|
|
32
|
+
# Parse an XPath expression
|
|
33
|
+
#
|
|
34
|
+
# @param expression [String] XPath expression to parse
|
|
35
|
+
# @return [AST::Node] Root node of AST
|
|
36
|
+
# @raise [XPath::SyntaxError] if expression is invalid
|
|
37
|
+
def self.parse(expression)
|
|
38
|
+
new(expression).parse
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Parse with caching
|
|
42
|
+
#
|
|
43
|
+
# @param expression [String] XPath expression to parse
|
|
44
|
+
# @return [AST::Node] Root node of AST (possibly cached)
|
|
45
|
+
def self.parse_with_cache(expression)
|
|
46
|
+
CACHE.get_or_set(expression) { parse(expression) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Initialize parser with expression
|
|
50
|
+
#
|
|
51
|
+
# @param expression [String] XPath expression
|
|
52
|
+
def initialize(expression)
|
|
53
|
+
@expression = expression.to_s
|
|
54
|
+
@lexer = Lexer.new(@expression)
|
|
55
|
+
@tokens = @lexer.tokenize
|
|
56
|
+
@position = 0
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Parse the expression into an AST
|
|
60
|
+
#
|
|
61
|
+
# @return [AST::Node] Root node of AST
|
|
62
|
+
# @raise [XPath::SyntaxError] if expression is invalid
|
|
63
|
+
def parse
|
|
64
|
+
return AST::Node.new(:empty) if @tokens.empty?
|
|
65
|
+
|
|
66
|
+
result = parse_expr
|
|
67
|
+
|
|
68
|
+
unless at_end?
|
|
69
|
+
raise_syntax_error("Unexpected token after expression: #{current_token}")
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
|
|
77
|
+
# Get current token
|
|
78
|
+
#
|
|
79
|
+
# @return [Array, nil] Current token [type, value, position]
|
|
80
|
+
def current_token
|
|
81
|
+
@tokens[@position]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get current token type
|
|
85
|
+
#
|
|
86
|
+
# @return [Symbol, nil] Token type
|
|
87
|
+
def current_type
|
|
88
|
+
current_token&.first
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Get current token value
|
|
92
|
+
#
|
|
93
|
+
# @return [String, nil] Token value
|
|
94
|
+
def current_value
|
|
95
|
+
current_token&.[](1)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Check if at end of tokens
|
|
99
|
+
#
|
|
100
|
+
# @return [Boolean]
|
|
101
|
+
def at_end?
|
|
102
|
+
@position >= @tokens.length
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Advance to next token
|
|
106
|
+
#
|
|
107
|
+
# @return [Array, nil] Previous token
|
|
108
|
+
def advance
|
|
109
|
+
token = current_token
|
|
110
|
+
@position += 1
|
|
111
|
+
token
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Check if current token matches type
|
|
115
|
+
#
|
|
116
|
+
# @param types [Array<Symbol>] Token types to check
|
|
117
|
+
# @return [Boolean]
|
|
118
|
+
def match?(*types)
|
|
119
|
+
types.any?(current_type)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Consume token if it matches, otherwise error
|
|
123
|
+
#
|
|
124
|
+
# @param type [Symbol] Expected token type
|
|
125
|
+
# @param message [String] Error message if not found
|
|
126
|
+
# @return [Array] Consumed token
|
|
127
|
+
# @raise [XPath::SyntaxError] if token doesn't match
|
|
128
|
+
def consume(type, message)
|
|
129
|
+
if current_type == type
|
|
130
|
+
advance
|
|
131
|
+
else
|
|
132
|
+
raise_syntax_error(message)
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Raise syntax error
|
|
137
|
+
#
|
|
138
|
+
# @param message [String] Error message
|
|
139
|
+
# @raise [XPath::SyntaxError]
|
|
140
|
+
def raise_syntax_error(message)
|
|
141
|
+
position = current_token&.[](2) || @expression.length
|
|
142
|
+
raise XPath::SyntaxError.new(
|
|
143
|
+
message,
|
|
144
|
+
expression: @expression,
|
|
145
|
+
position: position,
|
|
146
|
+
)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Parse top-level expression
|
|
150
|
+
def parse_expr
|
|
151
|
+
parse_or_expr
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Parse OR expression
|
|
155
|
+
def parse_or_expr
|
|
156
|
+
left = parse_and_expr
|
|
157
|
+
|
|
158
|
+
while match?(:or)
|
|
159
|
+
advance
|
|
160
|
+
right = parse_and_expr
|
|
161
|
+
left = AST::Node.binary_op(:or, left, right)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
left
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Parse AND expression
|
|
168
|
+
def parse_and_expr
|
|
169
|
+
left = parse_equality
|
|
170
|
+
|
|
171
|
+
while match?(:and)
|
|
172
|
+
advance
|
|
173
|
+
right = parse_equality
|
|
174
|
+
left = AST::Node.binary_op(:and, left, right)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
left
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Parse equality expression
|
|
181
|
+
def parse_equality
|
|
182
|
+
left = parse_relational
|
|
183
|
+
|
|
184
|
+
while match?(:eq, :neq)
|
|
185
|
+
op = current_type
|
|
186
|
+
advance
|
|
187
|
+
right = parse_relational
|
|
188
|
+
left = AST::Node.binary_op(op, left, right)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
left
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Parse relational expression
|
|
195
|
+
def parse_relational
|
|
196
|
+
left = parse_additive
|
|
197
|
+
|
|
198
|
+
while match?(:lt, :gt, :lte, :gte)
|
|
199
|
+
op = current_type
|
|
200
|
+
advance
|
|
201
|
+
right = parse_additive
|
|
202
|
+
left = AST::Node.binary_op(op, left, right)
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
left
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Parse additive expression
|
|
209
|
+
def parse_additive
|
|
210
|
+
left = parse_multiplicative
|
|
211
|
+
|
|
212
|
+
while match?(:plus, :minus)
|
|
213
|
+
op = current_type
|
|
214
|
+
advance
|
|
215
|
+
right = parse_multiplicative
|
|
216
|
+
left = AST::Node.binary_op(op, left, right)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
left
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Parse multiplicative expression
|
|
223
|
+
def parse_multiplicative
|
|
224
|
+
left = parse_unary
|
|
225
|
+
|
|
226
|
+
while match?(:star, :div, :mod)
|
|
227
|
+
op = current_type
|
|
228
|
+
advance
|
|
229
|
+
right = parse_unary
|
|
230
|
+
left = AST::Node.binary_op(op, left, right)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
left
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Parse unary expression
|
|
237
|
+
def parse_unary
|
|
238
|
+
if match?(:minus)
|
|
239
|
+
advance
|
|
240
|
+
operand = parse_union
|
|
241
|
+
return AST::Node.unary_op(:minus, operand)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
parse_union
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Parse union expression
|
|
248
|
+
def parse_union
|
|
249
|
+
left = parse_path_expr
|
|
250
|
+
|
|
251
|
+
if match?(:pipe)
|
|
252
|
+
paths = [left]
|
|
253
|
+
while match?(:pipe)
|
|
254
|
+
advance
|
|
255
|
+
paths << parse_path_expr
|
|
256
|
+
end
|
|
257
|
+
return AST::Node.union(*paths)
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
left
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Parse path expression (location path or filter expression)
|
|
264
|
+
def parse_path_expr
|
|
265
|
+
# Check for absolute path
|
|
266
|
+
if match?(:slash, :dslash)
|
|
267
|
+
return parse_location_path
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Check for primary expression (could be filter expression)
|
|
271
|
+
if match?(:string, :number, :dollar, :lparen) ||
|
|
272
|
+
(match?(:name) && peek_is?(:lparen))
|
|
273
|
+
# Primary expression that could be filtered
|
|
274
|
+
expr = parse_primary
|
|
275
|
+
|
|
276
|
+
# Check for predicates (filter expression)
|
|
277
|
+
if match?(:lbracket)
|
|
278
|
+
predicates = []
|
|
279
|
+
while match?(:lbracket)
|
|
280
|
+
advance
|
|
281
|
+
condition = parse_expr
|
|
282
|
+
consume(:rbracket, "Expected ']' after predicate")
|
|
283
|
+
predicates << AST::Node.predicate(condition)
|
|
284
|
+
end
|
|
285
|
+
expr = AST::Node.new(:filter_expr, [expr] + predicates)
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
return expr
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Otherwise, it's a location path
|
|
292
|
+
parse_location_path
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# Check if next token matches type
|
|
296
|
+
def peek_is?(type)
|
|
297
|
+
@tokens[@position + 1]&.first == type
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Parse location path
|
|
301
|
+
def parse_location_path
|
|
302
|
+
if match?(:slash)
|
|
303
|
+
advance
|
|
304
|
+
# Absolute path: /
|
|
305
|
+
if at_end? || match?(:pipe, :rbracket, :rparen, :comma)
|
|
306
|
+
return AST::Node.absolute_path(AST::Node.current)
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Absolute path with steps: /step1/step2
|
|
310
|
+
steps = parse_relative_path
|
|
311
|
+
return AST::Node.absolute_path(*steps.children)
|
|
312
|
+
elsif match?(:dslash)
|
|
313
|
+
advance
|
|
314
|
+
# Descendant-or-self: //
|
|
315
|
+
steps = parse_relative_path
|
|
316
|
+
return AST::Node.absolute_path(
|
|
317
|
+
AST::Node.axis("descendant-or-self", AST::Node.wildcard),
|
|
318
|
+
*steps.children,
|
|
319
|
+
)
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Relative path
|
|
323
|
+
parse_relative_path
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# Parse relative path (series of steps)
|
|
327
|
+
def parse_relative_path
|
|
328
|
+
steps = [parse_step]
|
|
329
|
+
|
|
330
|
+
while match?(:slash) && !at_end?
|
|
331
|
+
advance
|
|
332
|
+
if match?(:slash)
|
|
333
|
+
# Double slash within path
|
|
334
|
+
advance
|
|
335
|
+
steps << AST::Node.axis("descendant-or-self", AST::Node.wildcard)
|
|
336
|
+
end
|
|
337
|
+
steps << parse_step unless at_end? || match?(:pipe, :rbracket,
|
|
338
|
+
:rparen, :comma)
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
AST::Node.relative_path(*steps)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Parse a single step
|
|
345
|
+
def parse_step
|
|
346
|
+
# Abbreviated steps
|
|
347
|
+
if match?(:dot)
|
|
348
|
+
advance
|
|
349
|
+
return AST::Node.current
|
|
350
|
+
elsif match?(:ddot)
|
|
351
|
+
advance
|
|
352
|
+
return AST::Node.parent
|
|
353
|
+
elsif match?(:at)
|
|
354
|
+
advance
|
|
355
|
+
# Attribute: @name
|
|
356
|
+
name = consume(:name, "Expected attribute name after @")
|
|
357
|
+
node_test = AST::Node.test(nil, name[1])
|
|
358
|
+
step = AST::Node.axis("attribute", node_test)
|
|
359
|
+
return parse_predicates(step)
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# Full axis step or abbreviated child step
|
|
363
|
+
if match?(:axis)
|
|
364
|
+
axis_name = current_value
|
|
365
|
+
advance
|
|
366
|
+
consume(:dcolon, "Expected '::' after axis name")
|
|
367
|
+
node_test = parse_node_test
|
|
368
|
+
step = AST::Node.axis(axis_name, node_test)
|
|
369
|
+
else
|
|
370
|
+
# Abbreviated child axis
|
|
371
|
+
node_test = parse_node_test
|
|
372
|
+
step = AST::Node.axis("child", node_test)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
parse_predicates(step)
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Parse node test
|
|
379
|
+
def parse_node_test
|
|
380
|
+
if match?(:star)
|
|
381
|
+
advance
|
|
382
|
+
return AST::Node.wildcard
|
|
383
|
+
elsif match?(:node_type)
|
|
384
|
+
type_name = current_value
|
|
385
|
+
advance
|
|
386
|
+
consume(:lparen, "Expected '(' after node type")
|
|
387
|
+
consume(:rparen, "Expected ')' after node type")
|
|
388
|
+
return AST::Node.node_type(type_name)
|
|
389
|
+
elsif match?(:name, :and, :or, :mod, :div)
|
|
390
|
+
# Accept keywords as valid element names (they're valid XML names)
|
|
391
|
+
name = current_value
|
|
392
|
+
advance
|
|
393
|
+
|
|
394
|
+
# Check for namespace prefix
|
|
395
|
+
if match?(:colon) && !match?(:dcolon)
|
|
396
|
+
advance
|
|
397
|
+
if match?(:star)
|
|
398
|
+
advance
|
|
399
|
+
return AST::Node.test(name, "*")
|
|
400
|
+
elsif match?(:name, :and, :or, :mod, :div)
|
|
401
|
+
# Accept keywords as local names too
|
|
402
|
+
local_name = current_value
|
|
403
|
+
advance
|
|
404
|
+
return AST::Node.test(name, local_name)
|
|
405
|
+
else
|
|
406
|
+
raise_syntax_error("Expected local name after namespace")
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
return AST::Node.test(nil, name)
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
raise_syntax_error("Expected node test")
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
# Parse predicates
|
|
417
|
+
def parse_predicates(step)
|
|
418
|
+
predicates = []
|
|
419
|
+
|
|
420
|
+
while match?(:lbracket)
|
|
421
|
+
advance
|
|
422
|
+
condition = parse_expr
|
|
423
|
+
consume(:rbracket, "Expected ']' after predicate")
|
|
424
|
+
predicates << AST::Node.predicate(condition)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
return step if predicates.empty?
|
|
428
|
+
|
|
429
|
+
# Attach predicates to step
|
|
430
|
+
AST::Node.new(:step_with_predicates, [step] + predicates)
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
# Parse primary expression
|
|
434
|
+
def parse_primary
|
|
435
|
+
if match?(:string)
|
|
436
|
+
value = current_value
|
|
437
|
+
advance
|
|
438
|
+
return AST::Node.string(value)
|
|
439
|
+
elsif match?(:number)
|
|
440
|
+
value = current_value
|
|
441
|
+
advance
|
|
442
|
+
# Convert string to actual numeric value
|
|
443
|
+
numeric_value = value.include?(".") ? value.to_f : value.to_i
|
|
444
|
+
return AST::Node.number(numeric_value)
|
|
445
|
+
elsif match?(:dollar)
|
|
446
|
+
advance
|
|
447
|
+
name = consume(:name, "Expected variable name after $")
|
|
448
|
+
return AST::Node.variable(name[1])
|
|
449
|
+
elsif match?(:lparen)
|
|
450
|
+
advance
|
|
451
|
+
expr = parse_expr
|
|
452
|
+
consume(:rparen, "Expected ')' after expression")
|
|
453
|
+
return expr
|
|
454
|
+
elsif match?(:name)
|
|
455
|
+
name = current_value
|
|
456
|
+
advance
|
|
457
|
+
|
|
458
|
+
# Check for function call
|
|
459
|
+
if match?(:lparen)
|
|
460
|
+
advance
|
|
461
|
+
args = []
|
|
462
|
+
|
|
463
|
+
unless match?(:rparen)
|
|
464
|
+
args << parse_expr
|
|
465
|
+
while match?(:comma)
|
|
466
|
+
advance
|
|
467
|
+
args << parse_expr
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
consume(:rparen, "Expected ')' after function arguments")
|
|
472
|
+
return AST::Node.function(name, *args)
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Just a name without function call - shouldn't happen in parse_primary
|
|
476
|
+
# but return it as a relative path
|
|
477
|
+
@position -= 1 # Put the name back
|
|
478
|
+
return parse_location_path
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
raise_syntax_error("Expected primary expression")
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
end
|