moxml 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +12 -4
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
  80. data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
  81. data/lib/moxml/adapter/customized_ox/text.rb +0 -2
  82. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  83. data/lib/moxml/adapter/headed_ox.rb +161 -0
  84. data/lib/moxml/adapter/libxml.rb +1548 -0
  85. data/lib/moxml/adapter/nokogiri.rb +121 -9
  86. data/lib/moxml/adapter/oga.rb +123 -12
  87. data/lib/moxml/adapter/ox.rb +283 -27
  88. data/lib/moxml/adapter/rexml.rb +127 -20
  89. data/lib/moxml/adapter.rb +21 -4
  90. data/lib/moxml/attribute.rb +6 -0
  91. data/lib/moxml/builder.rb +40 -4
  92. data/lib/moxml/config.rb +8 -3
  93. data/lib/moxml/context.rb +39 -1
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +39 -6
  96. data/lib/moxml/document_builder.rb +27 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +94 -3
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1768 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_spec.rb +36 -0
  164. data/spec/moxml/doctype_spec.rb +33 -0
  165. data/spec/moxml/document_builder_spec.rb +30 -0
  166. data/spec/moxml/document_spec.rb +105 -0
  167. data/spec/moxml/element_spec.rb +143 -0
  168. data/spec/moxml/error_spec.rb +266 -22
  169. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  170. data/spec/moxml/namespace_spec.rb +32 -0
  171. data/spec/moxml/node_set_spec.rb +39 -0
  172. data/spec/moxml/node_spec.rb +37 -0
  173. data/spec/moxml/processing_instruction_spec.rb +34 -0
  174. data/spec/moxml/sax_spec.rb +1067 -0
  175. data/spec/moxml/text_spec.rb +31 -0
  176. data/spec/moxml/version_spec.rb +14 -0
  177. data/spec/moxml/xml_utils/.gitkeep +0 -0
  178. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  179. data/spec/moxml/xml_utils_spec.rb +49 -0
  180. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  181. data/spec/moxml/xpath/axes_spec.rb +296 -0
  182. data/spec/moxml/xpath/cache_spec.rb +358 -0
  183. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  184. data/spec/moxml/xpath/context_spec.rb +210 -0
  185. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  186. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  187. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  188. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  189. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  190. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  191. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  192. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  193. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  194. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  195. data/spec/moxml/xpath/parser_spec.rb +364 -0
  196. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  197. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  198. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  199. data/spec/moxml/xpath_spec.rb +77 -0
  200. data/spec/performance/README.md +83 -0
  201. data/spec/performance/benchmark_spec.rb +64 -0
  202. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  203. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  204. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  205. data/spec/spec_helper.rb +58 -1
  206. data/spec/support/xml_matchers.rb +1 -1
  207. metadata +176 -35
  208. data/lib/ox/node.rb +0 -9
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module XPath
5
+ # Class used as the context for compiled XPath Procs.
6
+ #
7
+ # The binding of this class is used for the binding of Procs compiled by
8
+ # {Compiler}. Not using a specific binding would result in the procs using
9
+ # the binding of {Compiler#compile}, which could lead to race conditions.
10
+ #
11
+ # @private
12
+ class Context
13
+ def initialize
14
+ @binding = binding
15
+ end
16
+
17
+ # Evaluates a Ruby code string in this context's binding.
18
+ #
19
+ # @param [String] string Ruby code to evaluate
20
+ # @return [Proc]
21
+ def evaluate(string)
22
+ @binding.eval(string)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module XPath
5
+ # Module for converting XPath objects such as NodeSets to different types.
6
+ #
7
+ # @private
8
+ module Conversion
9
+ # Converts both arguments to a type that can be compared using ==.
10
+ #
11
+ # @param [Object] left
12
+ # @param [Object] right
13
+ # @return [Array<Object, Object>]
14
+ def self.to_compatible_types(left, right)
15
+ if left.is_a?(Moxml::NodeSet) || left.respond_to?(:text)
16
+ left = to_string(left)
17
+ end
18
+
19
+ if right.is_a?(Moxml::NodeSet) || right.respond_to?(:text)
20
+ right = to_string(right)
21
+ end
22
+
23
+ if left.is_a?(Numeric) && !right.is_a?(Numeric)
24
+ right = to_float(right)
25
+ end
26
+
27
+ if left.is_a?(String) && !right.is_a?(String)
28
+ right = to_string(right)
29
+ end
30
+
31
+ if boolean?(left) && !boolean?(right)
32
+ right = to_boolean(right)
33
+ end
34
+
35
+ [left, right]
36
+ end
37
+
38
+ # Converts a value to an XPath string.
39
+ #
40
+ # @param [Object] value
41
+ # @return [String]
42
+ def self.to_string(value)
43
+ # If we have a number that has a zero decimal (e.g. 10.0) we want to
44
+ # get rid of that decimal. For this we'll first convert the number to
45
+ # an integer.
46
+ if value.is_a?(Float) && value.modulo(1).zero?
47
+ value = value.to_i
48
+ end
49
+
50
+ if value.is_a?(Moxml::NodeSet)
51
+ value = first_node_text(value)
52
+ end
53
+
54
+ if value.respond_to?(:text)
55
+ value = value.text
56
+ end
57
+
58
+ value.to_s
59
+ end
60
+
61
+ # Converts a value to an XPath number (float).
62
+ #
63
+ # @param [Object] value
64
+ # @return [Float]
65
+ def self.to_float(value)
66
+ if value.is_a?(Moxml::NodeSet)
67
+ value = first_node_text(value)
68
+ end
69
+
70
+ if value.respond_to?(:text)
71
+ value = value.text
72
+ end
73
+
74
+ if value == true
75
+ 1.0
76
+ elsif value == false
77
+ 0.0
78
+ else
79
+ begin
80
+ Float(value)
81
+ rescue ArgumentError, TypeError
82
+ Float::NAN
83
+ end
84
+ end
85
+ end
86
+
87
+ # Converts a value to an XPath boolean.
88
+ #
89
+ # @param [Object] value
90
+ # @return [Boolean]
91
+ def self.to_boolean(value)
92
+ bool = false
93
+
94
+ if value.is_a?(Float)
95
+ bool = !value.nan? && !value.zero?
96
+ elsif value.is_a?(Integer)
97
+ bool = !value.zero?
98
+ elsif value.respond_to?(:empty?)
99
+ bool = !value.empty?
100
+ elsif value
101
+ bool = true
102
+ end
103
+
104
+ bool
105
+ end
106
+
107
+ # Checks if a value is a boolean.
108
+ #
109
+ # @param [Object] value
110
+ # @return [Boolean]
111
+ def self.boolean?(value)
112
+ [true, false].include?(value)
113
+ end
114
+
115
+ # Gets the text of the first node in a NodeSet.
116
+ #
117
+ # @param [Moxml::NodeSet] set
118
+ # @return [String]
119
+ def self.first_node_text(set)
120
+ set[0].respond_to?(:text) ? set[0].text : ""
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module XPath
5
+ # XPath 1.0 evaluation engine
6
+ #
7
+ # This engine provides complete XPath 1.0 support for Moxml documents,
8
+ # particularly useful for the Ox adapter which has limited native XPath.
9
+ #
10
+ # @example Evaluate XPath expression
11
+ # engine = Moxml::XPath::Engine.new(document)
12
+ # results = engine.evaluate("//book[@id='123']/title")
13
+ #
14
+ # @example With context node
15
+ # engine = Moxml::XPath::Engine.new(document)
16
+ # results = engine.evaluate("./author", context: book_element)
17
+ #
18
+ class Engine
19
+ attr_reader :document
20
+
21
+ # Initialize engine with a document
22
+ #
23
+ # @param document [Moxml::Document] The document to query
24
+ def initialize(document)
25
+ @document = document
26
+ end
27
+
28
+ # Evaluate an XPath expression
29
+ #
30
+ # @param expression [String] XPath expression to evaluate
31
+ # @param context [Moxml::Node, nil] Context node (defaults to document root)
32
+ # @return [Moxml::NodeSet, String, Numeric, Boolean] Result depends on expression
33
+ # @raise [Moxml::XPath::SyntaxError] If expression syntax is invalid
34
+ # @raise [Moxml::XPath::EvaluationError] If evaluation fails
35
+ def evaluate(expression, context: nil)
36
+ raise ::NotImplementedError,
37
+ "XPath engine implementation in progress (Phase 1.1+)"
38
+ end
39
+
40
+ # Check if expression is valid XPath syntax
41
+ #
42
+ # @param expression [String] XPath expression to validate
43
+ # @return [Boolean] true if valid, false otherwise
44
+ def valid?(expression)
45
+ evaluate(expression, context: document.root)
46
+ true
47
+ rescue Moxml::XPath::SyntaxError
48
+ false
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module XPath
5
+ # Base error for XPath-specific errors
6
+ # Inherits from Moxml::XPathError to maintain compatibility
7
+ class Error < Moxml::XPathError; end
8
+
9
+ # Error raised when XPath syntax is invalid
10
+ class SyntaxError < Error
11
+ attr_reader :position, :token
12
+
13
+ def initialize(message, expression: nil, position: nil, token: nil)
14
+ @position = position
15
+ @token = token
16
+ super(message, expression: expression)
17
+ end
18
+
19
+ def to_s
20
+ msg = super
21
+ msg += "\n Position: #{@position}" if @position
22
+ msg += "\n Unexpected token: #{@token.inspect}" if @token
23
+ msg
24
+ end
25
+ end
26
+
27
+ # Error raised when XPath evaluation fails
28
+ class EvaluationError < Error
29
+ attr_reader :context_node, :step
30
+
31
+ def initialize(message, expression: nil, context_node: nil, step: nil)
32
+ @context_node = context_node
33
+ @step = step
34
+ super(message, expression: expression)
35
+ end
36
+
37
+ def to_s
38
+ msg = super
39
+ msg += "\n Context node: <#{@context_node.name}>" if @context_node.respond_to?(:name)
40
+ msg += "\n Step: #{@step}" if @step
41
+ msg
42
+ end
43
+ end
44
+
45
+ # Error raised when an XPath function is not found or invalid
46
+ class FunctionError < Error
47
+ attr_reader :function_name, :argument_count
48
+
49
+ def initialize(message, expression: nil, function_name: nil,
50
+ argument_count: nil)
51
+ @function_name = function_name
52
+ @argument_count = argument_count
53
+ super(message, expression: expression)
54
+ end
55
+
56
+ def to_s
57
+ msg = super
58
+ msg += "\n Function: #{@function_name}" if @function_name
59
+ msg += "\n Arguments: #{@argument_count}" if @argument_count
60
+ msg
61
+ end
62
+ end
63
+
64
+ # Error raised when an XPath operation on unsupported node type
65
+ class NodeTypeError < Error
66
+ attr_reader :node_type, :operation
67
+
68
+ def initialize(message, expression: nil, node_type: nil, operation: nil)
69
+ @node_type = node_type
70
+ @operation = operation
71
+ super(message, expression: expression)
72
+ end
73
+
74
+ def to_s
75
+ msg = super
76
+ msg += "\n Node type: #{@node_type}" if @node_type
77
+ msg += "\n Operation: #{@operation}" if @operation
78
+ msg
79
+ end
80
+ end
81
+
82
+ # Error raised when an XPath function is called without required context
83
+ class InvalidContextError < Error
84
+ attr_reader :function_name, :required_context
85
+
86
+ def initialize(message, expression: nil, function_name: nil,
87
+ required_context: nil)
88
+ @function_name = function_name
89
+ @required_context = required_context
90
+ super(message, expression: expression)
91
+ end
92
+
93
+ def to_s
94
+ msg = super
95
+ msg += "\n Function: #{@function_name}" if @function_name
96
+ msg += "\n Required context: #{@required_context}" if @required_context
97
+ msg
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,304 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module XPath
5
+ # XPath expression lexer/tokenizer
6
+ #
7
+ # Converts XPath expressions into a stream of tokens for parsing.
8
+ # Each token is represented as [type, value, position].
9
+ #
10
+ # @example
11
+ # lexer = Lexer.new("//book[@id='123']")
12
+ # tokens = lexer.tokenize
13
+ # # => [[:dslash, "//", 0], [:name, "book", 2], ...]
14
+ class Lexer
15
+ # XPath axis names for recognition
16
+ AXIS_NAMES = %w[
17
+ ancestor ancestor-or-self attribute child descendant
18
+ descendant-or-self following following-sibling namespace
19
+ parent preceding preceding-sibling self
20
+ ].freeze
21
+
22
+ # XPath node type names
23
+ NODE_TYPES = %w[
24
+ comment text processing-instruction node
25
+ ].freeze
26
+
27
+ # Reserved keywords
28
+ KEYWORDS = %w[and or mod div].freeze
29
+
30
+ # Initialize lexer with XPath expression
31
+ #
32
+ # @param expression [String] XPath expression to tokenize
33
+ def initialize(expression)
34
+ @expression = expression.to_s
35
+ @position = 0
36
+ @length = @expression.length
37
+ @tokens = []
38
+ end
39
+
40
+ # Tokenize the XPath expression
41
+ #
42
+ # @return [Array<Array>] Array of [type, value, position] tuples
43
+ # @raise [XPath::SyntaxError] if expression contains invalid syntax
44
+ def tokenize
45
+ @tokens = []
46
+ @position = 0
47
+
48
+ while @position < @length
49
+ skip_whitespace
50
+ break if @position >= @length
51
+
52
+ token_start = @position
53
+
54
+ case current_char
55
+ when "/"
56
+ if peek_char == "/"
57
+ add_token(:dslash, "//", token_start)
58
+ advance(2)
59
+ else
60
+ add_token(:slash, "/", token_start)
61
+ advance
62
+ end
63
+ when "|"
64
+ add_token(:pipe, "|", token_start)
65
+ advance
66
+ when "+"
67
+ add_token(:plus, "+", token_start)
68
+ advance
69
+ when "-"
70
+ add_token(:minus, "-", token_start)
71
+ advance
72
+ when "*"
73
+ add_token(:star, "*", token_start)
74
+ advance
75
+ when "="
76
+ add_token(:eq, "=", token_start)
77
+ advance
78
+ when "!"
79
+ if peek_char == "="
80
+ add_token(:neq, "!=", token_start)
81
+ advance(2)
82
+ else
83
+ raise_syntax_error("Unexpected '!' at position #{@position}")
84
+ end
85
+ when "<"
86
+ if peek_char == "="
87
+ add_token(:lte, "<=", token_start)
88
+ advance(2)
89
+ else
90
+ add_token(:lt, "<", token_start)
91
+ advance
92
+ end
93
+ when ">"
94
+ if peek_char == "="
95
+ add_token(:gte, ">=", token_start)
96
+ advance(2)
97
+ else
98
+ add_token(:gt, ">", token_start)
99
+ advance
100
+ end
101
+ when "("
102
+ add_token(:lparen, "(", token_start)
103
+ advance
104
+ when ")"
105
+ add_token(:rparen, ")", token_start)
106
+ advance
107
+ when "["
108
+ add_token(:lbracket, "[", token_start)
109
+ advance
110
+ when "]"
111
+ add_token(:rbracket, "]", token_start)
112
+ advance
113
+ when ","
114
+ add_token(:comma, ",", token_start)
115
+ advance
116
+ when "@"
117
+ add_token(:at, "@", token_start)
118
+ advance
119
+ when ":"
120
+ if peek_char == ":"
121
+ add_token(:dcolon, "::", token_start)
122
+ advance(2)
123
+ else
124
+ add_token(:colon, ":", token_start)
125
+ advance
126
+ end
127
+ when "."
128
+ if peek_char == "."
129
+ add_token(:ddot, "..", token_start)
130
+ advance(2)
131
+ elsif /\d/.match?(peek_char)
132
+ scan_number(token_start)
133
+ else
134
+ add_token(:dot, ".", token_start)
135
+ advance
136
+ end
137
+ when "$"
138
+ add_token(:dollar, "$", token_start)
139
+ advance
140
+ when '"', "'"
141
+ scan_string(token_start)
142
+ when /\d/
143
+ scan_number(token_start)
144
+ when /[a-zA-Z_]/
145
+ scan_name_or_keyword(token_start)
146
+ else
147
+ raise_syntax_error(
148
+ "Unexpected character '#{current_char}' at position #{@position}",
149
+ )
150
+ end
151
+ end
152
+
153
+ @tokens
154
+ end
155
+
156
+ private
157
+
158
+ # Get current character
159
+ #
160
+ # @return [String, nil] Current character or nil if at end
161
+ def current_char
162
+ @expression[@position]
163
+ end
164
+
165
+ # Peek at next character
166
+ #
167
+ # @return [String, nil] Next character or nil if at end
168
+ def peek_char
169
+ @expression[@position + 1]
170
+ end
171
+
172
+ # Advance position by n characters
173
+ #
174
+ # @param n [Integer] Number of characters to advance
175
+ def advance(n = 1)
176
+ @position += n
177
+ end
178
+
179
+ # Skip whitespace characters
180
+ def skip_whitespace
181
+ @position += 1 while @position < @length &&
182
+ @expression[@position] =~ /\s/
183
+ end
184
+
185
+ # Add token to token list
186
+ #
187
+ # @param type [Symbol] Token type
188
+ # @param value [String] Token value
189
+ # @param position [Integer] Token position
190
+ def add_token(type, value, position)
191
+ @tokens << [type, value, position]
192
+ end
193
+
194
+ # Scan string literal
195
+ #
196
+ # @param start_pos [Integer] Starting position
197
+ def scan_string(start_pos)
198
+ quote = current_char
199
+ advance
200
+
201
+ value = ""
202
+ while @position < @length && current_char != quote
203
+ if current_char == "\\"
204
+ advance
205
+ if @position < @length
206
+ # Handle escape sequences
207
+ value += case current_char
208
+ when "t"
209
+ "\t"
210
+ when "n"
211
+ "\n"
212
+ when "r"
213
+ "\r"
214
+ when "\\"
215
+ "\\"
216
+ when '"'
217
+ '"'
218
+ when "'"
219
+ "'"
220
+ else
221
+ # Unknown escape - add literally
222
+ current_char
223
+ end
224
+ end
225
+ else
226
+ value += current_char
227
+ end
228
+ advance
229
+ end
230
+
231
+ if @position >= @length
232
+ raise_syntax_error("Unterminated string starting at position #{start_pos}")
233
+ end
234
+
235
+ advance # Skip closing quote
236
+ add_token(:string, value, start_pos)
237
+ end
238
+
239
+ # Scan number (integer or decimal)
240
+ #
241
+ # @param start_pos [Integer] Starting position
242
+ def scan_number(start_pos)
243
+ value = ""
244
+
245
+ # Integer part
246
+ while @position < @length && current_char =~ /\d/
247
+ value += current_char
248
+ advance
249
+ end
250
+
251
+ # Decimal part
252
+ if @position < @length && current_char == "."
253
+ value += current_char
254
+ advance
255
+
256
+ while @position < @length && current_char =~ /\d/
257
+ value += current_char
258
+ advance
259
+ end
260
+ end
261
+
262
+ add_token(:number, value, start_pos)
263
+ end
264
+
265
+ # Scan name or keyword
266
+ #
267
+ # @param start_pos [Integer] Starting position
268
+ def scan_name_or_keyword(start_pos)
269
+ value = ""
270
+
271
+ # Name can contain letters, digits, underscores, hyphens, and dots
272
+ while @position < @length && current_char =~ /[a-zA-Z0-9_\-.]/
273
+ value += current_char
274
+ advance
275
+ end
276
+
277
+ # Check if it's an axis name followed by ::
278
+ if AXIS_NAMES.include?(value) &&
279
+ @position < @length - 1 &&
280
+ @expression[@position, 2] == "::"
281
+ add_token(:axis, value, start_pos)
282
+ elsif KEYWORDS.include?(value)
283
+ add_token(value.to_sym, value, start_pos)
284
+ elsif NODE_TYPES.include?(value)
285
+ add_token(:node_type, value, start_pos)
286
+ else
287
+ add_token(:name, value, start_pos)
288
+ end
289
+ end
290
+
291
+ # Raise syntax error
292
+ #
293
+ # @param message [String] Error message
294
+ # @raise [XPath::SyntaxError]
295
+ def raise_syntax_error(message)
296
+ raise Moxml::XPath::SyntaxError.new(
297
+ message,
298
+ expression: @expression,
299
+ position: @position,
300
+ )
301
+ end
302
+ end
303
+ end
304
+ end