moxml 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +238 -40
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +45 -0
- data/docs/_guides/modifying-xml.adoc +293 -0
- data/docs/_guides/parsing-xml.adoc +231 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +98 -0
- data/docs/_pages/adapters/libxml.adoc +286 -0
- data/docs/_pages/adapters/nokogiri.adoc +252 -0
- data/docs/_pages/adapters/oga.adoc +292 -0
- data/docs/_pages/adapters/ox.adoc +55 -0
- data/docs/_pages/adapters/rexml.adoc +293 -0
- data/docs/_pages/best-practices.adoc +430 -0
- data/docs/_pages/compatibility.adoc +468 -0
- data/docs/_pages/configuration.adoc +251 -0
- data/docs/_pages/error-handling.adoc +350 -0
- data/docs/_pages/headed-ox-limitations.adoc +558 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +141 -0
- data/docs/_pages/node-api-reference.adoc +50 -0
- data/docs/_pages/performance.adoc +36 -0
- data/docs/_pages/quick-start.adoc +244 -0
- data/docs/_pages/thread-safety.adoc +29 -0
- data/docs/_references/document-api.adoc +408 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +268 -0
- data/docs/_tutorials/builder-pattern.adoc +343 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +325 -0
- data/docs/_tutorials/xpath-queries.adoc +359 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1548 -0
- data/lib/moxml/adapter/nokogiri.rb +121 -9
- data/lib/moxml/adapter/oga.rb +123 -12
- data/lib/moxml/adapter/ox.rb +282 -26
- data/lib/moxml/adapter/rexml.rb +127 -20
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +39 -1
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +39 -6
- data/lib/moxml/document_builder.rb +27 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +94 -3
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1768 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +176 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# Class used as the context for compiled XPath Procs.
|
|
6
|
+
#
|
|
7
|
+
# The binding of this class is used for the binding of Procs compiled by
|
|
8
|
+
# {Compiler}. Not using a specific binding would result in the procs using
|
|
9
|
+
# the binding of {Compiler#compile}, which could lead to race conditions.
|
|
10
|
+
#
|
|
11
|
+
# @private
|
|
12
|
+
class Context
|
|
13
|
+
def initialize
|
|
14
|
+
@binding = binding
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Evaluates a Ruby code string in this context's binding.
|
|
18
|
+
#
|
|
19
|
+
# @param [String] string Ruby code to evaluate
|
|
20
|
+
# @return [Proc]
|
|
21
|
+
def evaluate(string)
|
|
22
|
+
@binding.eval(string)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# Module for converting XPath objects such as NodeSets to different types.
|
|
6
|
+
#
|
|
7
|
+
# @private
|
|
8
|
+
module Conversion
|
|
9
|
+
# Converts both arguments to a type that can be compared using ==.
|
|
10
|
+
#
|
|
11
|
+
# @param [Object] left
|
|
12
|
+
# @param [Object] right
|
|
13
|
+
# @return [Array<Object, Object>]
|
|
14
|
+
def self.to_compatible_types(left, right)
|
|
15
|
+
if left.is_a?(Moxml::NodeSet) || left.respond_to?(:text)
|
|
16
|
+
left = to_string(left)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
if right.is_a?(Moxml::NodeSet) || right.respond_to?(:text)
|
|
20
|
+
right = to_string(right)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
if left.is_a?(Numeric) && !right.is_a?(Numeric)
|
|
24
|
+
right = to_float(right)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
if left.is_a?(String) && !right.is_a?(String)
|
|
28
|
+
right = to_string(right)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
if boolean?(left) && !boolean?(right)
|
|
32
|
+
right = to_boolean(right)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
[left, right]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Converts a value to an XPath string.
|
|
39
|
+
#
|
|
40
|
+
# @param [Object] value
|
|
41
|
+
# @return [String]
|
|
42
|
+
def self.to_string(value)
|
|
43
|
+
# If we have a number that has a zero decimal (e.g. 10.0) we want to
|
|
44
|
+
# get rid of that decimal. For this we'll first convert the number to
|
|
45
|
+
# an integer.
|
|
46
|
+
if value.is_a?(Float) && value.modulo(1).zero?
|
|
47
|
+
value = value.to_i
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
if value.is_a?(Moxml::NodeSet)
|
|
51
|
+
value = first_node_text(value)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
if value.respond_to?(:text)
|
|
55
|
+
value = value.text
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
value.to_s
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Converts a value to an XPath number (float).
|
|
62
|
+
#
|
|
63
|
+
# @param [Object] value
|
|
64
|
+
# @return [Float]
|
|
65
|
+
def self.to_float(value)
|
|
66
|
+
if value.is_a?(Moxml::NodeSet)
|
|
67
|
+
value = first_node_text(value)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
if value.respond_to?(:text)
|
|
71
|
+
value = value.text
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if value == true
|
|
75
|
+
1.0
|
|
76
|
+
elsif value == false
|
|
77
|
+
0.0
|
|
78
|
+
else
|
|
79
|
+
begin
|
|
80
|
+
Float(value)
|
|
81
|
+
rescue ArgumentError, TypeError
|
|
82
|
+
Float::NAN
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Converts a value to an XPath boolean.
|
|
88
|
+
#
|
|
89
|
+
# @param [Object] value
|
|
90
|
+
# @return [Boolean]
|
|
91
|
+
def self.to_boolean(value)
|
|
92
|
+
bool = false
|
|
93
|
+
|
|
94
|
+
if value.is_a?(Float)
|
|
95
|
+
bool = !value.nan? && !value.zero?
|
|
96
|
+
elsif value.is_a?(Integer)
|
|
97
|
+
bool = !value.zero?
|
|
98
|
+
elsif value.respond_to?(:empty?)
|
|
99
|
+
bool = !value.empty?
|
|
100
|
+
elsif value
|
|
101
|
+
bool = true
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
bool
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# Checks if a value is a boolean.
|
|
108
|
+
#
|
|
109
|
+
# @param [Object] value
|
|
110
|
+
# @return [Boolean]
|
|
111
|
+
def self.boolean?(value)
|
|
112
|
+
[true, false].include?(value)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Gets the text of the first node in a NodeSet.
|
|
116
|
+
#
|
|
117
|
+
# @param [Moxml::NodeSet] set
|
|
118
|
+
# @return [String]
|
|
119
|
+
def self.first_node_text(set)
|
|
120
|
+
set[0].respond_to?(:text) ? set[0].text : ""
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# XPath 1.0 evaluation engine
|
|
6
|
+
#
|
|
7
|
+
# This engine provides complete XPath 1.0 support for Moxml documents,
|
|
8
|
+
# particularly useful for the Ox adapter which has limited native XPath.
|
|
9
|
+
#
|
|
10
|
+
# @example Evaluate XPath expression
|
|
11
|
+
# engine = Moxml::XPath::Engine.new(document)
|
|
12
|
+
# results = engine.evaluate("//book[@id='123']/title")
|
|
13
|
+
#
|
|
14
|
+
# @example With context node
|
|
15
|
+
# engine = Moxml::XPath::Engine.new(document)
|
|
16
|
+
# results = engine.evaluate("./author", context: book_element)
|
|
17
|
+
#
|
|
18
|
+
class Engine
|
|
19
|
+
attr_reader :document
|
|
20
|
+
|
|
21
|
+
# Initialize engine with a document
|
|
22
|
+
#
|
|
23
|
+
# @param document [Moxml::Document] The document to query
|
|
24
|
+
def initialize(document)
|
|
25
|
+
@document = document
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Evaluate an XPath expression
|
|
29
|
+
#
|
|
30
|
+
# @param expression [String] XPath expression to evaluate
|
|
31
|
+
# @param context [Moxml::Node, nil] Context node (defaults to document root)
|
|
32
|
+
# @return [Moxml::NodeSet, String, Numeric, Boolean] Result depends on expression
|
|
33
|
+
# @raise [Moxml::XPath::SyntaxError] If expression syntax is invalid
|
|
34
|
+
# @raise [Moxml::XPath::EvaluationError] If evaluation fails
|
|
35
|
+
def evaluate(expression, context: nil)
|
|
36
|
+
raise ::NotImplementedError,
|
|
37
|
+
"XPath engine implementation in progress (Phase 1.1+)"
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Check if expression is valid XPath syntax
|
|
41
|
+
#
|
|
42
|
+
# @param expression [String] XPath expression to validate
|
|
43
|
+
# @return [Boolean] true if valid, false otherwise
|
|
44
|
+
def valid?(expression)
|
|
45
|
+
evaluate(expression, context: document.root)
|
|
46
|
+
true
|
|
47
|
+
rescue Moxml::XPath::SyntaxError
|
|
48
|
+
false
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# Base error for XPath-specific errors
|
|
6
|
+
# Inherits from Moxml::XPathError to maintain compatibility
|
|
7
|
+
class Error < Moxml::XPathError; end
|
|
8
|
+
|
|
9
|
+
# Error raised when XPath syntax is invalid
|
|
10
|
+
class SyntaxError < Error
|
|
11
|
+
attr_reader :position, :token
|
|
12
|
+
|
|
13
|
+
def initialize(message, expression: nil, position: nil, token: nil)
|
|
14
|
+
@position = position
|
|
15
|
+
@token = token
|
|
16
|
+
super(message, expression: expression)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_s
|
|
20
|
+
msg = super
|
|
21
|
+
msg += "\n Position: #{@position}" if @position
|
|
22
|
+
msg += "\n Unexpected token: #{@token.inspect}" if @token
|
|
23
|
+
msg
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Error raised when XPath evaluation fails
|
|
28
|
+
class EvaluationError < Error
|
|
29
|
+
attr_reader :context_node, :step
|
|
30
|
+
|
|
31
|
+
def initialize(message, expression: nil, context_node: nil, step: nil)
|
|
32
|
+
@context_node = context_node
|
|
33
|
+
@step = step
|
|
34
|
+
super(message, expression: expression)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def to_s
|
|
38
|
+
msg = super
|
|
39
|
+
msg += "\n Context node: <#{@context_node.name}>" if @context_node.respond_to?(:name)
|
|
40
|
+
msg += "\n Step: #{@step}" if @step
|
|
41
|
+
msg
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Error raised when an XPath function is not found or invalid
|
|
46
|
+
class FunctionError < Error
|
|
47
|
+
attr_reader :function_name, :argument_count
|
|
48
|
+
|
|
49
|
+
def initialize(message, expression: nil, function_name: nil,
|
|
50
|
+
argument_count: nil)
|
|
51
|
+
@function_name = function_name
|
|
52
|
+
@argument_count = argument_count
|
|
53
|
+
super(message, expression: expression)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def to_s
|
|
57
|
+
msg = super
|
|
58
|
+
msg += "\n Function: #{@function_name}" if @function_name
|
|
59
|
+
msg += "\n Arguments: #{@argument_count}" if @argument_count
|
|
60
|
+
msg
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Error raised when an XPath operation on unsupported node type
|
|
65
|
+
class NodeTypeError < Error
|
|
66
|
+
attr_reader :node_type, :operation
|
|
67
|
+
|
|
68
|
+
def initialize(message, expression: nil, node_type: nil, operation: nil)
|
|
69
|
+
@node_type = node_type
|
|
70
|
+
@operation = operation
|
|
71
|
+
super(message, expression: expression)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def to_s
|
|
75
|
+
msg = super
|
|
76
|
+
msg += "\n Node type: #{@node_type}" if @node_type
|
|
77
|
+
msg += "\n Operation: #{@operation}" if @operation
|
|
78
|
+
msg
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Error raised when an XPath function is called without required context
|
|
83
|
+
class InvalidContextError < Error
|
|
84
|
+
attr_reader :function_name, :required_context
|
|
85
|
+
|
|
86
|
+
def initialize(message, expression: nil, function_name: nil,
|
|
87
|
+
required_context: nil)
|
|
88
|
+
@function_name = function_name
|
|
89
|
+
@required_context = required_context
|
|
90
|
+
super(message, expression: expression)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def to_s
|
|
94
|
+
msg = super
|
|
95
|
+
msg += "\n Function: #{@function_name}" if @function_name
|
|
96
|
+
msg += "\n Required context: #{@required_context}" if @required_context
|
|
97
|
+
msg
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module XPath
|
|
5
|
+
# XPath expression lexer/tokenizer
|
|
6
|
+
#
|
|
7
|
+
# Converts XPath expressions into a stream of tokens for parsing.
|
|
8
|
+
# Each token is represented as [type, value, position].
|
|
9
|
+
#
|
|
10
|
+
# @example
|
|
11
|
+
# lexer = Lexer.new("//book[@id='123']")
|
|
12
|
+
# tokens = lexer.tokenize
|
|
13
|
+
# # => [[:dslash, "//", 0], [:name, "book", 2], ...]
|
|
14
|
+
class Lexer
|
|
15
|
+
# XPath axis names for recognition
|
|
16
|
+
AXIS_NAMES = %w[
|
|
17
|
+
ancestor ancestor-or-self attribute child descendant
|
|
18
|
+
descendant-or-self following following-sibling namespace
|
|
19
|
+
parent preceding preceding-sibling self
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
# XPath node type names
|
|
23
|
+
NODE_TYPES = %w[
|
|
24
|
+
comment text processing-instruction node
|
|
25
|
+
].freeze
|
|
26
|
+
|
|
27
|
+
# Reserved keywords
|
|
28
|
+
KEYWORDS = %w[and or mod div].freeze
|
|
29
|
+
|
|
30
|
+
# Initialize lexer with XPath expression
|
|
31
|
+
#
|
|
32
|
+
# @param expression [String] XPath expression to tokenize
|
|
33
|
+
def initialize(expression)
|
|
34
|
+
@expression = expression.to_s
|
|
35
|
+
@position = 0
|
|
36
|
+
@length = @expression.length
|
|
37
|
+
@tokens = []
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Tokenize the XPath expression
|
|
41
|
+
#
|
|
42
|
+
# @return [Array<Array>] Array of [type, value, position] tuples
|
|
43
|
+
# @raise [XPath::SyntaxError] if expression contains invalid syntax
|
|
44
|
+
def tokenize
|
|
45
|
+
@tokens = []
|
|
46
|
+
@position = 0
|
|
47
|
+
|
|
48
|
+
while @position < @length
|
|
49
|
+
skip_whitespace
|
|
50
|
+
break if @position >= @length
|
|
51
|
+
|
|
52
|
+
token_start = @position
|
|
53
|
+
|
|
54
|
+
case current_char
|
|
55
|
+
when "/"
|
|
56
|
+
if peek_char == "/"
|
|
57
|
+
add_token(:dslash, "//", token_start)
|
|
58
|
+
advance(2)
|
|
59
|
+
else
|
|
60
|
+
add_token(:slash, "/", token_start)
|
|
61
|
+
advance
|
|
62
|
+
end
|
|
63
|
+
when "|"
|
|
64
|
+
add_token(:pipe, "|", token_start)
|
|
65
|
+
advance
|
|
66
|
+
when "+"
|
|
67
|
+
add_token(:plus, "+", token_start)
|
|
68
|
+
advance
|
|
69
|
+
when "-"
|
|
70
|
+
add_token(:minus, "-", token_start)
|
|
71
|
+
advance
|
|
72
|
+
when "*"
|
|
73
|
+
add_token(:star, "*", token_start)
|
|
74
|
+
advance
|
|
75
|
+
when "="
|
|
76
|
+
add_token(:eq, "=", token_start)
|
|
77
|
+
advance
|
|
78
|
+
when "!"
|
|
79
|
+
if peek_char == "="
|
|
80
|
+
add_token(:neq, "!=", token_start)
|
|
81
|
+
advance(2)
|
|
82
|
+
else
|
|
83
|
+
raise_syntax_error("Unexpected '!' at position #{@position}")
|
|
84
|
+
end
|
|
85
|
+
when "<"
|
|
86
|
+
if peek_char == "="
|
|
87
|
+
add_token(:lte, "<=", token_start)
|
|
88
|
+
advance(2)
|
|
89
|
+
else
|
|
90
|
+
add_token(:lt, "<", token_start)
|
|
91
|
+
advance
|
|
92
|
+
end
|
|
93
|
+
when ">"
|
|
94
|
+
if peek_char == "="
|
|
95
|
+
add_token(:gte, ">=", token_start)
|
|
96
|
+
advance(2)
|
|
97
|
+
else
|
|
98
|
+
add_token(:gt, ">", token_start)
|
|
99
|
+
advance
|
|
100
|
+
end
|
|
101
|
+
when "("
|
|
102
|
+
add_token(:lparen, "(", token_start)
|
|
103
|
+
advance
|
|
104
|
+
when ")"
|
|
105
|
+
add_token(:rparen, ")", token_start)
|
|
106
|
+
advance
|
|
107
|
+
when "["
|
|
108
|
+
add_token(:lbracket, "[", token_start)
|
|
109
|
+
advance
|
|
110
|
+
when "]"
|
|
111
|
+
add_token(:rbracket, "]", token_start)
|
|
112
|
+
advance
|
|
113
|
+
when ","
|
|
114
|
+
add_token(:comma, ",", token_start)
|
|
115
|
+
advance
|
|
116
|
+
when "@"
|
|
117
|
+
add_token(:at, "@", token_start)
|
|
118
|
+
advance
|
|
119
|
+
when ":"
|
|
120
|
+
if peek_char == ":"
|
|
121
|
+
add_token(:dcolon, "::", token_start)
|
|
122
|
+
advance(2)
|
|
123
|
+
else
|
|
124
|
+
add_token(:colon, ":", token_start)
|
|
125
|
+
advance
|
|
126
|
+
end
|
|
127
|
+
when "."
|
|
128
|
+
if peek_char == "."
|
|
129
|
+
add_token(:ddot, "..", token_start)
|
|
130
|
+
advance(2)
|
|
131
|
+
elsif /\d/.match?(peek_char)
|
|
132
|
+
scan_number(token_start)
|
|
133
|
+
else
|
|
134
|
+
add_token(:dot, ".", token_start)
|
|
135
|
+
advance
|
|
136
|
+
end
|
|
137
|
+
when "$"
|
|
138
|
+
add_token(:dollar, "$", token_start)
|
|
139
|
+
advance
|
|
140
|
+
when '"', "'"
|
|
141
|
+
scan_string(token_start)
|
|
142
|
+
when /\d/
|
|
143
|
+
scan_number(token_start)
|
|
144
|
+
when /[a-zA-Z_]/
|
|
145
|
+
scan_name_or_keyword(token_start)
|
|
146
|
+
else
|
|
147
|
+
raise_syntax_error(
|
|
148
|
+
"Unexpected character '#{current_char}' at position #{@position}",
|
|
149
|
+
)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
@tokens
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
private
|
|
157
|
+
|
|
158
|
+
# Get current character
|
|
159
|
+
#
|
|
160
|
+
# @return [String, nil] Current character or nil if at end
|
|
161
|
+
def current_char
|
|
162
|
+
@expression[@position]
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Peek at next character
|
|
166
|
+
#
|
|
167
|
+
# @return [String, nil] Next character or nil if at end
|
|
168
|
+
def peek_char
|
|
169
|
+
@expression[@position + 1]
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Advance position by n characters
|
|
173
|
+
#
|
|
174
|
+
# @param n [Integer] Number of characters to advance
|
|
175
|
+
def advance(n = 1)
|
|
176
|
+
@position += n
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Skip whitespace characters
|
|
180
|
+
def skip_whitespace
|
|
181
|
+
@position += 1 while @position < @length &&
|
|
182
|
+
@expression[@position] =~ /\s/
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Add token to token list
|
|
186
|
+
#
|
|
187
|
+
# @param type [Symbol] Token type
|
|
188
|
+
# @param value [String] Token value
|
|
189
|
+
# @param position [Integer] Token position
|
|
190
|
+
def add_token(type, value, position)
|
|
191
|
+
@tokens << [type, value, position]
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# Scan string literal
|
|
195
|
+
#
|
|
196
|
+
# @param start_pos [Integer] Starting position
|
|
197
|
+
def scan_string(start_pos)
|
|
198
|
+
quote = current_char
|
|
199
|
+
advance
|
|
200
|
+
|
|
201
|
+
value = ""
|
|
202
|
+
while @position < @length && current_char != quote
|
|
203
|
+
if current_char == "\\"
|
|
204
|
+
advance
|
|
205
|
+
if @position < @length
|
|
206
|
+
# Handle escape sequences
|
|
207
|
+
value += case current_char
|
|
208
|
+
when "t"
|
|
209
|
+
"\t"
|
|
210
|
+
when "n"
|
|
211
|
+
"\n"
|
|
212
|
+
when "r"
|
|
213
|
+
"\r"
|
|
214
|
+
when "\\"
|
|
215
|
+
"\\"
|
|
216
|
+
when '"'
|
|
217
|
+
'"'
|
|
218
|
+
when "'"
|
|
219
|
+
"'"
|
|
220
|
+
else
|
|
221
|
+
# Unknown escape - add literally
|
|
222
|
+
current_char
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
else
|
|
226
|
+
value += current_char
|
|
227
|
+
end
|
|
228
|
+
advance
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
if @position >= @length
|
|
232
|
+
raise_syntax_error("Unterminated string starting at position #{start_pos}")
|
|
233
|
+
end
|
|
234
|
+
|
|
235
|
+
advance # Skip closing quote
|
|
236
|
+
add_token(:string, value, start_pos)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Scan number (integer or decimal)
|
|
240
|
+
#
|
|
241
|
+
# @param start_pos [Integer] Starting position
|
|
242
|
+
def scan_number(start_pos)
|
|
243
|
+
value = ""
|
|
244
|
+
|
|
245
|
+
# Integer part
|
|
246
|
+
while @position < @length && current_char =~ /\d/
|
|
247
|
+
value += current_char
|
|
248
|
+
advance
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Decimal part
|
|
252
|
+
if @position < @length && current_char == "."
|
|
253
|
+
value += current_char
|
|
254
|
+
advance
|
|
255
|
+
|
|
256
|
+
while @position < @length && current_char =~ /\d/
|
|
257
|
+
value += current_char
|
|
258
|
+
advance
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
add_token(:number, value, start_pos)
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
# Scan name or keyword
|
|
266
|
+
#
|
|
267
|
+
# @param start_pos [Integer] Starting position
|
|
268
|
+
def scan_name_or_keyword(start_pos)
|
|
269
|
+
value = ""
|
|
270
|
+
|
|
271
|
+
# Name can contain letters, digits, underscores, hyphens, and dots
|
|
272
|
+
while @position < @length && current_char =~ /[a-zA-Z0-9_\-.]/
|
|
273
|
+
value += current_char
|
|
274
|
+
advance
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Check if it's an axis name followed by ::
|
|
278
|
+
if AXIS_NAMES.include?(value) &&
|
|
279
|
+
@position < @length - 1 &&
|
|
280
|
+
@expression[@position, 2] == "::"
|
|
281
|
+
add_token(:axis, value, start_pos)
|
|
282
|
+
elsif KEYWORDS.include?(value)
|
|
283
|
+
add_token(value.to_sym, value, start_pos)
|
|
284
|
+
elsif NODE_TYPES.include?(value)
|
|
285
|
+
add_token(:node_type, value, start_pos)
|
|
286
|
+
else
|
|
287
|
+
add_token(:name, value, start_pos)
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Raise syntax error
|
|
292
|
+
#
|
|
293
|
+
# @param message [String] Error message
|
|
294
|
+
# @raise [XPath::SyntaxError]
|
|
295
|
+
def raise_syntax_error(message)
|
|
296
|
+
raise Moxml::XPath::SyntaxError.new(
|
|
297
|
+
message,
|
|
298
|
+
expression: @expression,
|
|
299
|
+
position: @position,
|
|
300
|
+
)
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
end
|