moxml 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +12 -4
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
  80. data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
  81. data/lib/moxml/adapter/customized_ox/text.rb +0 -2
  82. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  83. data/lib/moxml/adapter/headed_ox.rb +161 -0
  84. data/lib/moxml/adapter/libxml.rb +1548 -0
  85. data/lib/moxml/adapter/nokogiri.rb +121 -9
  86. data/lib/moxml/adapter/oga.rb +123 -12
  87. data/lib/moxml/adapter/ox.rb +283 -27
  88. data/lib/moxml/adapter/rexml.rb +127 -20
  89. data/lib/moxml/adapter.rb +21 -4
  90. data/lib/moxml/attribute.rb +6 -0
  91. data/lib/moxml/builder.rb +40 -4
  92. data/lib/moxml/config.rb +8 -3
  93. data/lib/moxml/context.rb +39 -1
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +39 -6
  96. data/lib/moxml/document_builder.rb +27 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +94 -3
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1768 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_spec.rb +36 -0
  164. data/spec/moxml/doctype_spec.rb +33 -0
  165. data/spec/moxml/document_builder_spec.rb +30 -0
  166. data/spec/moxml/document_spec.rb +105 -0
  167. data/spec/moxml/element_spec.rb +143 -0
  168. data/spec/moxml/error_spec.rb +266 -22
  169. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  170. data/spec/moxml/namespace_spec.rb +32 -0
  171. data/spec/moxml/node_set_spec.rb +39 -0
  172. data/spec/moxml/node_spec.rb +37 -0
  173. data/spec/moxml/processing_instruction_spec.rb +34 -0
  174. data/spec/moxml/sax_spec.rb +1067 -0
  175. data/spec/moxml/text_spec.rb +31 -0
  176. data/spec/moxml/version_spec.rb +14 -0
  177. data/spec/moxml/xml_utils/.gitkeep +0 -0
  178. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  179. data/spec/moxml/xml_utils_spec.rb +49 -0
  180. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  181. data/spec/moxml/xpath/axes_spec.rb +296 -0
  182. data/spec/moxml/xpath/cache_spec.rb +358 -0
  183. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  184. data/spec/moxml/xpath/context_spec.rb +210 -0
  185. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  186. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  187. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  188. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  189. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  190. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  191. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  192. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  193. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  194. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  195. data/spec/moxml/xpath/parser_spec.rb +364 -0
  196. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  197. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  198. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  199. data/spec/moxml/xpath_spec.rb +77 -0
  200. data/spec/performance/README.md +83 -0
  201. data/spec/performance/benchmark_spec.rb +64 -0
  202. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  203. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  204. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  205. data/spec/spec_helper.rb +58 -1
  206. data/spec/support/xml_matchers.rb +1 -1
  207. metadata +176 -35
  208. data/lib/ox/node.rb +0 -9
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/lib/moxml/node.rb CHANGED
@@ -31,7 +31,7 @@ module Moxml
31
31
  def children
32
32
  NodeSet.new(
33
33
  adapter.children(@native).map { adapter.patch_node(_1, @native) },
34
- context
34
+ context,
35
35
  )
36
36
  end
37
37
 
@@ -84,6 +84,92 @@ module Moxml
84
84
  Node.wrap(adapter.at_xpath(@native, expression, namespaces), context)
85
85
  end
86
86
 
87
+ # Convenience find methods (aliases for xpath methods)
88
+ def find(xpath_expression, namespaces = {})
89
+ at_xpath(xpath_expression, namespaces)
90
+ end
91
+
92
+ def find_all(xpath_expression, namespaces = {})
93
+ xpath(xpath_expression, namespaces).to_a
94
+ end
95
+
96
+ # Check if node has any children
97
+ def has_children?
98
+ !children.empty?
99
+ end
100
+
101
+ # Get first/last child
102
+ def first_child
103
+ children.first
104
+ end
105
+
106
+ def last_child
107
+ children.last
108
+ end
109
+
110
+ # Returns the text content of this node
111
+ # For elements, returns concatenated text of all text children
112
+ # For text nodes, returns the content if available
113
+ def text
114
+ if respond_to?(:content)
115
+ content
116
+ elsif respond_to?(:children)
117
+ children.select { |c| c.is_a?(Text) }.map(&:content).join
118
+ else
119
+ ""
120
+ end
121
+ end
122
+
123
+ # Returns the text content of this node
124
+ # Subclasses should override this method
125
+ # Element and Text have their own implementations
126
+ def text
127
+ ""
128
+ end
129
+
130
+ # Attribute accessor - only works on Element nodes
131
+ # Returns nil for non-element nodes
132
+ def [](name)
133
+ return nil unless respond_to?(:attribute)
134
+
135
+ attr = attribute(name)
136
+ attr&.value if attr.respond_to?(:value)
137
+ end
138
+
139
+ # Returns the namespace of this node
140
+ # Only applicable to Element nodes, returns nil for others
141
+ def namespace
142
+ return nil unless element?
143
+
144
+ ns = adapter.namespace(@native)
145
+ ns && Namespace.new(ns, context)
146
+ end
147
+
148
+ # Returns all namespace definitions on this node
149
+ # Only applicable to Element nodes, returns empty array for others
150
+ def namespaces
151
+ return [] unless element?
152
+
153
+ adapter.namespace_definitions(@native).map do |ns|
154
+ Namespace.new(ns, context)
155
+ end
156
+ end
157
+
158
+ # Recursively yield all descendant nodes
159
+ # Used by XPath descendant-or-self and descendant axes
160
+ def each_node(&block)
161
+ children.each do |child|
162
+ yield child
163
+ child.each_node(&block) if child.respond_to?(:each_node)
164
+ end
165
+ end
166
+
167
+ # Clone the node (deep copy)
168
+ def clone
169
+ Node.wrap(adapter.dup(@native), context)
170
+ end
171
+ alias dup clone
172
+
87
173
  def ==(other)
88
174
  self.class == other.class && @native == other.native
89
175
  end
@@ -106,6 +192,7 @@ module Moxml
106
192
  when :document then Document
107
193
  when :declaration then Declaration
108
194
  when :doctype then Doctype
195
+ when :attribute then Attribute
109
196
  else self
110
197
  end
111
198
 
@@ -129,7 +216,11 @@ module Moxml
129
216
  when String then Text.new(adapter.create_text(node), context)
130
217
  when Node then node
131
218
  else
132
- raise ArgumentError, "Invalid node type: #{node.class}"
219
+ raise Moxml::DocumentStructureError.new(
220
+ "Invalid node type: #{node.class}. Expected String or Moxml::Node",
221
+ operation: "prepare_node",
222
+ state: "node_type: #{node.class}",
223
+ )
133
224
  end
134
225
  end
135
226
 
@@ -141,7 +232,7 @@ module Moxml
141
232
  # Oga: <empty /> (with a space)
142
233
  # Nokogiri: <empty/> (without a space)
143
234
  # The expanded format is enforced to avoid this conflict
144
- expand_empty: true
235
+ expand_empty: true,
145
236
  }
146
237
  end
147
238
  end
@@ -52,6 +52,31 @@ module Moxml
52
52
  self.class.new(nodes + other.nodes, context)
53
53
  end
54
54
 
55
+ def <<(node)
56
+ # If it's a wrapped Moxml node, unwrap to native before storing
57
+ native_node = node.respond_to?(:native) ? node.native : node
58
+ @nodes << native_node
59
+ self
60
+ end
61
+ alias push <<
62
+
63
+ # Deduplicate nodes based on native object identity
64
+ # This is crucial for XPath operations like descendant-or-self
65
+ # which may yield the same native node multiple times
66
+ def uniq_by_native
67
+ seen = {}
68
+ unique_natives = @nodes.select do |native|
69
+ id = native.object_id
70
+ if seen[id]
71
+ false
72
+ else
73
+ seen[id] = true
74
+ true
75
+ end
76
+ end
77
+ self.class.new(unique_natives, context)
78
+ end
79
+
55
80
  def ==(other)
56
81
  self.class == other.class &&
57
82
  length == other.length &&
@@ -68,5 +93,14 @@ module Moxml
68
93
  each(&:remove)
69
94
  self
70
95
  end
96
+
97
+ # Delete a node from the set
98
+ # Accepts both wrapped Moxml nodes and native nodes
99
+ def delete(node)
100
+ # If it's a wrapped Moxml node, unwrap to native
101
+ native_node = node.respond_to?(:native) ? node.native : node
102
+ @nodes.delete(native_node)
103
+ self
104
+ end
71
105
  end
72
106
  end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "handler"
4
+
5
+ module Moxml
6
+ module SAX
7
+ # Block-based SAX handler with DSL
8
+ #
9
+ # Provides a convenient block-based API for simple SAX parsing cases
10
+ # without requiring a full class definition.
11
+ #
12
+ # @example Block-based parsing
13
+ # context.sax_parse(xml) do
14
+ # start_element { |name, attrs| puts "Element: #{name}" }
15
+ # characters { |text| puts "Text: #{text}" }
16
+ # end_element { |name| puts "End: #{name}" }
17
+ # end
18
+ #
19
+ # @example With instance variables
20
+ # books = []
21
+ # context.sax_parse(xml) do
22
+ # start_element do |name, attrs|
23
+ # books << { id: attrs["id"] } if name == "book"
24
+ # end
25
+ # end
26
+ #
27
+ class BlockHandler < Handler
28
+ # Create a new block handler
29
+ #
30
+ # @param block [Proc] Block containing DSL calls
31
+ # @yield DSL context for defining handlers
32
+ def initialize(&block)
33
+ super()
34
+ @handlers = {}
35
+ instance_eval(&block) if block
36
+ end
37
+
38
+ # Define handler for document start event
39
+ #
40
+ # @yield Block to execute when document parsing begins
41
+ # @yieldreturn [void]
42
+ # @return [void]
43
+ def start_document(&block)
44
+ @handlers[:start_document] = block
45
+ end
46
+
47
+ # Define handler for document end event
48
+ #
49
+ # @yield Block to execute when document parsing completes
50
+ # @yieldreturn [void]
51
+ # @return [void]
52
+ def end_document(&block)
53
+ @handlers[:end_document] = block
54
+ end
55
+
56
+ # Define handler for element start event
57
+ #
58
+ # @yield Block to execute when opening tag is encountered
59
+ # @yieldparam name [String] Element name
60
+ # @yieldparam attributes [Hash<String, String>] Element attributes
61
+ # @yieldparam namespaces [Hash<String, String>] Namespace declarations
62
+ # @yieldreturn [void]
63
+ # @return [void]
64
+ def start_element(&block)
65
+ @handlers[:start_element] = block
66
+ end
67
+
68
+ # Define handler for element end event
69
+ #
70
+ # @yield Block to execute when closing tag is encountered
71
+ # @yieldparam name [String] Element name
72
+ # @yieldreturn [void]
73
+ # @return [void]
74
+ def end_element(&block)
75
+ @handlers[:end_element] = block
76
+ end
77
+
78
+ # Define handler for character data event
79
+ #
80
+ # @yield Block to execute when character data is encountered
81
+ # @yieldparam text [String] Character data
82
+ # @yieldreturn [void]
83
+ # @return [void]
84
+ def characters(&block)
85
+ @handlers[:characters] = block
86
+ end
87
+
88
+ # Define handler for CDATA section event
89
+ #
90
+ # @yield Block to execute when CDATA section is encountered
91
+ # @yieldparam text [String] CDATA content
92
+ # @yieldreturn [void]
93
+ # @return [void]
94
+ def cdata(&block)
95
+ @handlers[:cdata] = block
96
+ end
97
+
98
+ # Define handler for comment event
99
+ #
100
+ # @yield Block to execute when comment is encountered
101
+ # @yieldparam text [String] Comment content
102
+ # @yieldreturn [void]
103
+ # @return [void]
104
+ def comment(&block)
105
+ @handlers[:comment] = block
106
+ end
107
+
108
+ # Define handler for processing instruction event
109
+ #
110
+ # @yield Block to execute when PI is encountered
111
+ # @yieldparam target [String] PI target
112
+ # @yieldparam data [String] PI data
113
+ # @yieldreturn [void]
114
+ # @return [void]
115
+ def processing_instruction(&block)
116
+ @handlers[:processing_instruction] = block
117
+ end
118
+
119
+ # Define handler for error event
120
+ #
121
+ # @yield Block to execute when error occurs
122
+ # @yieldparam error [Moxml::ParseError] The error
123
+ # @yieldreturn [void]
124
+ # @return [void]
125
+ def error(&block)
126
+ @handlers[:error] = block
127
+ end
128
+
129
+ # Define handler for warning event
130
+ #
131
+ # @yield Block to execute when warning occurs
132
+ # @yieldparam message [String] Warning message
133
+ # @yieldreturn [void]
134
+ # @return [void]
135
+ def warning(&block)
136
+ @handlers[:warning] = block
137
+ end
138
+
139
+ # @private
140
+ def on_start_document
141
+ @handlers[:start_document]&.call
142
+ end
143
+
144
+ # @private
145
+ def on_end_document
146
+ @handlers[:end_document]&.call
147
+ end
148
+
149
+ # @private
150
+ def on_start_element(name, attributes = {}, namespaces = {})
151
+ @handlers[:start_element]&.call(name, attributes, namespaces)
152
+ end
153
+
154
+ # @private
155
+ def on_end_element(name)
156
+ @handlers[:end_element]&.call(name)
157
+ end
158
+
159
+ # @private
160
+ def on_characters(text)
161
+ @handlers[:characters]&.call(text)
162
+ end
163
+
164
+ # @private
165
+ def on_cdata(text)
166
+ @handlers[:cdata]&.call(text)
167
+ end
168
+
169
+ # @private
170
+ def on_comment(text)
171
+ @handlers[:comment]&.call(text)
172
+ end
173
+
174
+ # @private
175
+ def on_processing_instruction(target, data)
176
+ @handlers[:processing_instruction]&.call(target, data)
177
+ end
178
+
179
+ # @private
180
+ def on_error(error)
181
+ if @handlers[:error]
182
+ @handlers[:error].call(error)
183
+ else
184
+ super
185
+ end
186
+ end
187
+
188
+ # @private
189
+ def on_warning(message)
190
+ @handlers[:warning]&.call(message)
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,124 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "handler"
4
+
5
+ module Moxml
6
+ module SAX
7
+ # Element-focused SAX handler with stack tracking
8
+ #
9
+ # Extends the base Handler with utilities for tracking element context:
10
+ # - Element stack (current hierarchy)
11
+ # - Current path (array of element names from root)
12
+ # - Helper methods for checking context
13
+ #
14
+ # @example Using element context
15
+ # class MyHandler < Moxml::SAX::ElementHandler
16
+ # def on_start_element(name, attributes = {}, namespaces = {})
17
+ # super # Important: call super to update stack
18
+ #
19
+ # if path_matches?(%r{/library/book/title$})
20
+ # puts "Found title at: #{current_path.join('/')}"
21
+ # end
22
+ # end
23
+ # end
24
+ #
25
+ class ElementHandler < Handler
26
+ # @return [Array<String>] Stack of currently open elements
27
+ attr_reader :element_stack
28
+
29
+ # @return [Array<String>] Current path from root to current element
30
+ attr_reader :current_path
31
+
32
+ def initialize
33
+ super
34
+ @element_stack = []
35
+ @current_path = []
36
+ end
37
+
38
+ # Tracks element on stack before calling super
39
+ #
40
+ # @param name [String] Element name
41
+ # @param attributes [Hash] Element attributes
42
+ # @param namespaces [Hash] Namespace declarations
43
+ # @return [void]
44
+ def on_start_element(name, attributes = {}, namespaces = {})
45
+ @element_stack.push(name)
46
+ @current_path.push(name)
47
+ super
48
+ end
49
+
50
+ # Removes element from stack before calling super
51
+ #
52
+ # @param name [String] Element name
53
+ # @return [void]
54
+ def on_end_element(name)
55
+ @element_stack.pop
56
+ @current_path.pop
57
+ super
58
+ end
59
+
60
+ # Check if currently inside an element with the given name
61
+ #
62
+ # @param name [String] Element name to check
63
+ # @return [Boolean] true if inside the element
64
+ # @example
65
+ # in_element?("book") # true if inside any <book> element
66
+ def in_element?(name)
67
+ @element_stack.include?(name)
68
+ end
69
+
70
+ # Get the name of the current (innermost) element
71
+ #
72
+ # @return [String, nil] Current element name, or nil if at document level
73
+ # @example
74
+ # current_element # => "title"
75
+ def current_element
76
+ @element_stack.last
77
+ end
78
+
79
+ # Get the name of the parent element
80
+ #
81
+ # @return [String, nil] Parent element name, or nil if no parent
82
+ # @example
83
+ # parent_element # => "book"
84
+ def parent_element
85
+ @element_stack[-2]
86
+ end
87
+
88
+ # Get current depth in the document tree
89
+ #
90
+ # @return [Integer] Current nesting level (0 at document root)
91
+ # @example
92
+ # depth # => 3 (e.g., /library/book/title)
93
+ def depth
94
+ @element_stack.length
95
+ end
96
+
97
+ # Check if current path matches a pattern
98
+ #
99
+ # @param pattern [String, Regexp] Pattern to match against path
100
+ # @return [Boolean] true if path matches
101
+ # @example
102
+ # path_matches?(/book\/title$/) # true if at /*/book/title
103
+ # path_matches?("/library/book/title") # exact path match
104
+ def path_matches?(pattern)
105
+ path_str = "/#{@current_path.join('/')}"
106
+ if pattern.is_a?(Regexp)
107
+ !path_str.match?(pattern).nil?
108
+ else
109
+ path_str == pattern.to_s
110
+ end
111
+ end
112
+
113
+ # Get the full path as a string
114
+ #
115
+ # @param separator [String] Path separator (default: "/")
116
+ # @return [String] Full path string
117
+ # @example
118
+ # path_string # => "/library/book/title"
119
+ def path_string(separator = "/")
120
+ separator + @current_path.join(separator)
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module SAX
5
+ # Abstract base class for SAX event handlers
6
+ #
7
+ # This class defines the interface for handling SAX parsing events.
8
+ # Subclass this and override the event methods you need to handle.
9
+ #
10
+ # All event methods have default implementations that do nothing,
11
+ # so you only need to override the events you care about.
12
+ #
13
+ # @example Create a custom handler
14
+ # class BookHandler < Moxml::SAX::Handler
15
+ # def on_start_element(name, attributes = {}, namespaces = {})
16
+ # puts "Found element: #{name}"
17
+ # end
18
+ # end
19
+ #
20
+ class Handler
21
+ # Called when parsing begins
22
+ #
23
+ # @return [void]
24
+ def on_start_document
25
+ # Override in subclass if needed
26
+ end
27
+
28
+ # Called when parsing completes successfully
29
+ #
30
+ # @return [void]
31
+ def on_end_document
32
+ # Override in subclass if needed
33
+ end
34
+
35
+ # Called when an opening tag is encountered
36
+ #
37
+ # @param name [String] Element name (with namespace prefix if present)
38
+ # @param attributes [Hash<String, String>] Element attributes
39
+ # @param namespaces [Hash<String, String>] Namespace declarations on this element
40
+ # @return [void]
41
+ def on_start_element(name, attributes = {}, namespaces = {})
42
+ # Override in subclass if needed
43
+ end
44
+
45
+ # Called when a closing tag is encountered
46
+ #
47
+ # @param name [String] Element name
48
+ # @return [void]
49
+ def on_end_element(name)
50
+ # Override in subclass if needed
51
+ end
52
+
53
+ # Called when character data is encountered
54
+ #
55
+ # Note: This may be called multiple times for a single text node
56
+ # if the parser breaks it into chunks. Concatenate if needed.
57
+ #
58
+ # @param text [String] Character data
59
+ # @return [void]
60
+ def on_characters(text)
61
+ # Override in subclass if needed
62
+ end
63
+
64
+ # Called when a CDATA section is encountered
65
+ #
66
+ # @param text [String] CDATA content
67
+ # @return [void]
68
+ def on_cdata(text)
69
+ # Override in subclass if needed
70
+ end
71
+
72
+ # Called when a comment is encountered
73
+ #
74
+ # @param text [String] Comment content
75
+ # @return [void]
76
+ def on_comment(text)
77
+ # Override in subclass if needed
78
+ end
79
+
80
+ # Called when a processing instruction is encountered
81
+ #
82
+ # @param target [String] PI target
83
+ # @param data [String] PI data/content
84
+ # @return [void]
85
+ def on_processing_instruction(target, data)
86
+ # Override in subclass if needed
87
+ end
88
+
89
+ # Called when a fatal parsing error occurs
90
+ #
91
+ # Default implementation raises the error.
92
+ # Override to handle errors differently.
93
+ #
94
+ # @param error [Moxml::ParseError] The parsing error
95
+ # @return [void]
96
+ # @raise [Moxml::ParseError] By default
97
+ def on_error(error)
98
+ raise error
99
+ end
100
+
101
+ # Called when a non-fatal warning occurs
102
+ #
103
+ # Default implementation ignores warnings.
104
+ # Override to handle warnings (e.g., log them).
105
+ #
106
+ # @param message [String] Warning message
107
+ # @return [void]
108
+ def on_warning(message)
109
+ # Override in subclass if needed
110
+ end
111
+ end
112
+ end
113
+ end
data/lib/moxml/sax.rb ADDED
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "sax/handler"
4
+ require_relative "sax/element_handler"
5
+ require_relative "sax/block_handler"
6
+
7
+ module Moxml
8
+ # SAX (Simple API for XML) parsing interface
9
+ #
10
+ # Provides event-driven XML parsing across all Moxml adapters.
11
+ # SAX parsing is memory-efficient and suitable for processing large XML files.
12
+ #
13
+ # @example Class-based handler
14
+ # class MyHandler < Moxml::SAX::Handler
15
+ # def on_start_element(name, attributes = {}, namespaces = {})
16
+ # puts "Started element: #{name}"
17
+ # end
18
+ # end
19
+ #
20
+ # context = Moxml.new
21
+ # context.sax_parse(xml_string, MyHandler.new)
22
+ #
23
+ # @example Block-based handler
24
+ # context.sax_parse(xml_string) do
25
+ # start_element { |name, attrs| puts "Element: #{name}" }
26
+ # characters { |text| puts "Text: #{text}" }
27
+ # end
28
+ #
29
+ module SAX
30
+ end
31
+ end
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.6"
4
+ VERSION = "0.1.8"
5
5
  end
@@ -10,19 +10,19 @@ module Moxml
10
10
  basic: {
11
11
  "<" => "&lt;",
12
12
  ">" => "&gt;",
13
- "&" => "&amp;"
13
+ "&" => "&amp;",
14
14
  },
15
15
  quotes: {
16
16
  "'" => "&apos;",
17
- '"' => "&quot;"
17
+ '"' => "&quot;",
18
18
  },
19
19
  full: {
20
20
  "<" => "&lt;",
21
21
  ">" => "&gt;",
22
22
  "'" => "&apos;",
23
23
  '"' => "&quot;",
24
- "&" => "&amp;"
25
- }
24
+ "&" => "&amp;",
25
+ },
26
26
  }.freeze
27
27
  MODES = MAPPINGS.keys.freeze
28
28
 
@@ -3,7 +3,7 @@
3
3
  require_relative "xml_utils/encoder"
4
4
 
5
5
  # Ruby 3.3+ requires the URI module to be explicitly required
6
- require "uri" unless defined?(::URI)
6
+ require "uri" unless defined?(URI)
7
7
 
8
8
  module Moxml
9
9
  module XmlUtils
@@ -47,17 +47,20 @@ module Moxml
47
47
  def validate_element_name(name)
48
48
  return if name.is_a?(String) && name.match?(/^[a-zA-Z_][\w\-.:]*$/)
49
49
 
50
- raise ValidationError, "Invalid XML name: #{name}"
50
+ raise ValidationError, "Invalid XML element name: #{name}"
51
51
  end
52
52
 
53
53
  def validate_pi_target(target)
54
54
  return if target.is_a?(String) && target.match?(/^[a-zA-Z_][\w\-.]*$/)
55
55
 
56
- raise ValidationError, "Invalid XML target: #{target}"
56
+ raise ValidationError,
57
+ "Invalid XML processing instruction target: #{target}"
57
58
  end
58
59
 
59
60
  def validate_uri(uri)
60
- return if uri.empty? || uri.match?(/\A#{::URI::DEFAULT_PARSER.make_regexp}\z/)
61
+ if uri.empty? || uri.match?(/\A#{::URI::DEFAULT_PARSER.make_regexp}\z/)
62
+ return
63
+ end
61
64
 
62
65
  raise ValidationError, "Invalid URI: #{uri}"
63
66
  end