moxml 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  80. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  81. data/lib/moxml/adapter/headed_ox.rb +161 -0
  82. data/lib/moxml/adapter/libxml.rb +1548 -0
  83. data/lib/moxml/adapter/nokogiri.rb +121 -9
  84. data/lib/moxml/adapter/oga.rb +123 -12
  85. data/lib/moxml/adapter/ox.rb +282 -26
  86. data/lib/moxml/adapter/rexml.rb +127 -20
  87. data/lib/moxml/adapter.rb +21 -4
  88. data/lib/moxml/attribute.rb +6 -0
  89. data/lib/moxml/builder.rb +40 -4
  90. data/lib/moxml/config.rb +8 -3
  91. data/lib/moxml/context.rb +39 -1
  92. data/lib/moxml/doctype.rb +13 -1
  93. data/lib/moxml/document.rb +39 -6
  94. data/lib/moxml/document_builder.rb +27 -5
  95. data/lib/moxml/element.rb +71 -2
  96. data/lib/moxml/error.rb +175 -6
  97. data/lib/moxml/node.rb +94 -3
  98. data/lib/moxml/node_set.rb +34 -0
  99. data/lib/moxml/sax/block_handler.rb +194 -0
  100. data/lib/moxml/sax/element_handler.rb +124 -0
  101. data/lib/moxml/sax/handler.rb +113 -0
  102. data/lib/moxml/sax.rb +31 -0
  103. data/lib/moxml/version.rb +1 -1
  104. data/lib/moxml/xml_utils/encoder.rb +4 -4
  105. data/lib/moxml/xml_utils.rb +7 -4
  106. data/lib/moxml/xpath/ast/node.rb +159 -0
  107. data/lib/moxml/xpath/cache.rb +91 -0
  108. data/lib/moxml/xpath/compiler.rb +1768 -0
  109. data/lib/moxml/xpath/context.rb +26 -0
  110. data/lib/moxml/xpath/conversion.rb +124 -0
  111. data/lib/moxml/xpath/engine.rb +52 -0
  112. data/lib/moxml/xpath/errors.rb +101 -0
  113. data/lib/moxml/xpath/lexer.rb +304 -0
  114. data/lib/moxml/xpath/parser.rb +485 -0
  115. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  116. data/lib/moxml/xpath/ruby/node.rb +193 -0
  117. data/lib/moxml/xpath.rb +37 -0
  118. data/lib/moxml.rb +5 -2
  119. data/moxml.gemspec +3 -1
  120. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  121. data/spec/consistency/README.md +77 -0
  122. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  123. data/spec/examples/README.md +75 -0
  124. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  125. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  126. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  127. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  128. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  129. data/spec/integration/README.md +71 -0
  130. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  131. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  132. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  133. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  134. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  135. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  136. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  137. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  138. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  139. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  140. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  141. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  142. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  143. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  144. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  145. data/spec/moxml/README.md +41 -0
  146. data/spec/moxml/adapter/.gitkeep +0 -0
  147. data/spec/moxml/adapter/README.md +61 -0
  148. data/spec/moxml/adapter/base_spec.rb +27 -0
  149. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  150. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  151. data/spec/moxml/adapter/ox_spec.rb +9 -8
  152. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  153. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  154. data/spec/moxml/adapter_spec.rb +16 -0
  155. data/spec/moxml/attribute_spec.rb +30 -0
  156. data/spec/moxml/builder_spec.rb +33 -0
  157. data/spec/moxml/cdata_spec.rb +31 -0
  158. data/spec/moxml/comment_spec.rb +31 -0
  159. data/spec/moxml/config_spec.rb +3 -3
  160. data/spec/moxml/context_spec.rb +28 -0
  161. data/spec/moxml/declaration_spec.rb +36 -0
  162. data/spec/moxml/doctype_spec.rb +33 -0
  163. data/spec/moxml/document_builder_spec.rb +30 -0
  164. data/spec/moxml/document_spec.rb +105 -0
  165. data/spec/moxml/element_spec.rb +143 -0
  166. data/spec/moxml/error_spec.rb +266 -22
  167. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  168. data/spec/moxml/namespace_spec.rb +32 -0
  169. data/spec/moxml/node_set_spec.rb +39 -0
  170. data/spec/moxml/node_spec.rb +37 -0
  171. data/spec/moxml/processing_instruction_spec.rb +34 -0
  172. data/spec/moxml/sax_spec.rb +1067 -0
  173. data/spec/moxml/text_spec.rb +31 -0
  174. data/spec/moxml/version_spec.rb +14 -0
  175. data/spec/moxml/xml_utils/.gitkeep +0 -0
  176. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  177. data/spec/moxml/xml_utils_spec.rb +49 -0
  178. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  179. data/spec/moxml/xpath/axes_spec.rb +296 -0
  180. data/spec/moxml/xpath/cache_spec.rb +358 -0
  181. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  182. data/spec/moxml/xpath/context_spec.rb +210 -0
  183. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  184. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  185. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  186. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  187. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  188. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  189. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  190. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  191. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  192. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  193. data/spec/moxml/xpath/parser_spec.rb +364 -0
  194. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  195. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  196. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  197. data/spec/moxml/xpath_spec.rb +77 -0
  198. data/spec/performance/README.md +83 -0
  199. data/spec/performance/benchmark_spec.rb +64 -0
  200. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  201. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  202. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  203. data/spec/spec_helper.rb +58 -1
  204. data/spec/support/xml_matchers.rb +1 -1
  205. metadata +176 -34
  206. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  207. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  208. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  209. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  210. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,292 @@
1
+ ---
2
+ title: Oga adapter
3
+ parent: Adapters
4
+ grand_parent: Overview
5
+ nav_order: 3
6
+ ---
7
+
8
+ == Oga adapter
9
+
10
+ === Purpose
11
+
12
+ The Oga adapter provides pure Ruby XML processing with full XPath 1.0
13
+ support, making it ideal for environments where C extensions are not allowed
14
+ or desired.
15
+
16
+ === Overview
17
+
18
+ link:https://github.com/yorickpeterse/oga[Oga] is a pure Ruby XML parser that
19
+ requires no C extensions. It's perfect for JRuby, TruffleRuby, Opal, or any
20
+ environment where native extensions are problematic.
21
+
22
+ === Installation
23
+
24
+ Add to your Gemfile:
25
+
26
+ [source,ruby]
27
+ ----
28
+ gem 'moxml'
29
+ gem 'oga'
30
+ ----
31
+
32
+ Install:
33
+
34
+ [source,shell]
35
+ ----
36
+ bundle install
37
+ ----
38
+
39
+ === Configuration
40
+
41
+ [source,ruby]
42
+ ----
43
+ # Explicit selection
44
+ context = Moxml.new
45
+ context.config.adapter = :oga
46
+
47
+ # Global default
48
+ Moxml::Config.default_adapter = :oga
49
+ ----
50
+
51
+ === Features
52
+
53
+ ==== Full XPath 1.0 support
54
+
55
+ Pure Ruby implementation of XPath 1.0:
56
+
57
+ [source,ruby]
58
+ ----
59
+ doc = Moxml.new.parse(xml)
60
+
61
+ # All XPath features work
62
+ books = doc.xpath('//book[@price < 30]')
63
+ count = doc.xpath('count(//book)')
64
+ titles = doc.xpath('//book[position() < 3]/title')
65
+
66
+ # Namespace-aware queries
67
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
68
+
69
+ # Functions and predicates
70
+ doc.xpath('//book[contains(title, "Ruby")]')
71
+ ----
72
+
73
+ ==== Complete namespace support
74
+
75
+ Full namespace handling in pure Ruby:
76
+
77
+ [source,ruby]
78
+ ----
79
+ # Create namespaced elements
80
+ element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
81
+
82
+ # Query with namespaces
83
+ results = doc.xpath('//dc:creator',
84
+ 'dc' => 'http://purl.org/dc/elements/1.1/')
85
+ ----
86
+
87
+ ==== All node types supported
88
+
89
+ Complete support for all XML node types:
90
+
91
+ * Elements with attributes
92
+ * Text nodes
93
+ * CDATA sections
94
+ * Comments
95
+ * Processing instructions
96
+ * DOCTYPE declarations
97
+ * XML declarations
98
+
99
+ ==== Pure Ruby benefits
100
+
101
+ * No C extension compilation required
102
+ * Works on JRuby and TruffleRuby
103
+ * Compatible with Opal for browser environments
104
+ * Easier debugging and introspection
105
+ * No platform-specific build issues
106
+
107
+ === Limitations
108
+
109
+ Minor limitations compared to native implementations:
110
+
111
+ * Slower parsing than C-based libraries
112
+ * Higher memory usage than Ox
113
+ * Some advanced XPath 1.0 functions may have edge case differences
114
+
115
+ NOTE: Oga's pure Ruby implementation makes it slower than C-based libraries,
116
+ but it provides excellent functionality and is faster than REXML.
117
+
118
+ === Performance characteristics
119
+
120
+ Based on benchmarks:
121
+
122
+ [cols="2,2,3"]
123
+ |===
124
+ | Operation | Performance | Notes
125
+
126
+ | Parse medium XML
127
+ | ~30-50 ips
128
+ | Pure Ruby parsing
129
+
130
+ | Serialize medium XML
131
+ | ~2,000+ ips
132
+ | Efficient serialization
133
+
134
+ | XPath queries
135
+ | ~20,000+ ips
136
+ | Pure Ruby XPath engine
137
+
138
+ | Memory usage
139
+ | Medium
140
+ | More than C libraries, less than REXML
141
+ |===
142
+
143
+ === Best use cases
144
+
145
+ **Choose Oga when:**
146
+
147
+ * Pure Ruby environment is required (JRuby, TruffleRuby)
148
+ * C extensions cannot be compiled or used
149
+ * Browser deployment via Opal is needed
150
+ * Full XPath 1.0 support is required without C dependencies
151
+ * Easier debugging of XML processing is desired
152
+
153
+ **Avoid Oga when:**
154
+
155
+ * Maximum parsing performance is critical (use link:ox[Ox] or
156
+ link:nokogiri[Nokogiri])
157
+ * Memory usage must be minimized (use link:ox[Ox])
158
+ * Standard library only is required (use link:rexml[REXML])
159
+
160
+ === Example usage
161
+
162
+ ==== Basic document processing
163
+
164
+ [source,ruby]
165
+ ----
166
+ require 'moxml'
167
+
168
+ # Configure Oga adapter
169
+ context = Moxml.new
170
+ context.config.adapter = :oga
171
+
172
+ xml = <<~XML
173
+ <library>
174
+ <book id="1">
175
+ <title>Ruby Programming</title>
176
+ <price>29.99</price>
177
+ </book>
178
+ </library>
179
+ XML
180
+
181
+ doc = context.parse(xml)
182
+
183
+ # Query with XPath
184
+ book = doc.at_xpath('//book[@id="1"]')
185
+ puts book.at_xpath('.//title').text # => "Ruby Programming"
186
+
187
+ # Modify
188
+ book.at_xpath('.//price').text = '24.99'
189
+
190
+ # Serialize
191
+ puts doc.to_xml(indent: 2)
192
+ ----
193
+
194
+ ==== XPath queries
195
+
196
+ [source,ruby]
197
+ ----
198
+ # Complex XPath expressions work
199
+ cheap_books = doc.xpath('//book[price < 25]')
200
+ fiction = doc.xpath('//book[@category="fiction"]')
201
+ last_book = doc.at_xpath('//book[last()]')
202
+
203
+ # Functions
204
+ book_count = doc.xpath('count(//book)')
205
+ has_isbn = doc.xpath('//book[string-length(@isbn) = 13]')
206
+ ----
207
+
208
+ ==== Namespace handling
209
+
210
+ [source,ruby]
211
+ ----
212
+ xml = <<~XML
213
+ <library xmlns:dc="http://purl.org/dc/elements/1.1/">
214
+ <book>
215
+ <dc:title>Programming</dc:title>
216
+ <dc:creator>Smith</dc:creator>
217
+ </book>
218
+ </library>
219
+ XML
220
+
221
+ doc = Moxml.new.parse(xml)
222
+
223
+ # Query with namespaces
224
+ ns = { 'dc' => 'http://purl.org/dc/elements/1.1/' }
225
+ titles = doc.xpath('//dc:title', ns)
226
+
227
+ puts titles.first.text # => "Programming"
228
+ ----
229
+
230
+ === JRuby and TruffleRuby support
231
+
232
+ Oga is particularly valuable for alternative Ruby implementations:
233
+
234
+ [source,ruby]
235
+ ----
236
+ # Works identically on JRuby
237
+ # jruby -S gem install moxml oga
238
+ # jruby script.rb
239
+
240
+ # Works on TruffleRuby
241
+ # Same code, no modifications needed
242
+
243
+ require 'moxml'
244
+ context = Moxml.new
245
+ context.config.adapter = :oga
246
+
247
+ # Full functionality on all Ruby implementations
248
+ doc = context.parse(xml)
249
+ results = doc.xpath('//book[@id="1"]')
250
+ ----
251
+
252
+ === Comparison with other pure Ruby options
253
+
254
+ [cols="2,2,2"]
255
+ |===
256
+ | Aspect | Oga | REXML
257
+
258
+ | XPath support
259
+ | Full XPath 1.0
260
+ | Limited
261
+
262
+ | Performance
263
+ | Fast for pure Ruby
264
+ | Medium
265
+
266
+ | Memory usage
267
+ | Medium
268
+ | Medium
269
+
270
+ | Namespace XPath
271
+ | Full support
272
+ | Not supported
273
+
274
+ | Standard library
275
+ | No (external gem)
276
+ | Yes (built-in)
277
+
278
+ | Maintenance
279
+ | Active
280
+ | Active
281
+ |===
282
+
283
+ === References
284
+
285
+ * link:https://github.com/yorickpeterse/oga[Oga on GitHub]
286
+ * link:https://github.com/yorickpeterse/oga/tree/master/doc[Oga documentation]
287
+
288
+ === See also
289
+
290
+ * link:../compatibility[Compatibility matrix] - Feature comparison
291
+ * link:rexml[REXML adapter] - Alternative pure Ruby option
292
+ * link:../../guides/adapter-switching[Adapter switching guide]
@@ -0,0 +1,55 @@
1
+ ---
2
+ title: Ox adapter
3
+ parent: Adapters
4
+ nav_order: 5
5
+ ---
6
+
7
+ == Ox adapter
8
+
9
+ === Overview
10
+
11
+ Ox is the fastest XML parser available for Ruby, providing excellent performance for simple to moderately complex XML documents.
12
+
13
+ **Best for:**
14
+ * Maximum parsing speed
15
+ * Simple document structures
16
+ * Memory-constrained environments
17
+ * When XPath usage is minimal
18
+
19
+ == Specific adapter limitations
20
+
21
+ === Ox adapter
22
+
23
+ ==== XPath limitations
24
+
25
+ The Ox adapter uses a custom "XPath-to-locate" translation engine.
26
+
27
+ The following XPath features are NOT supported:
28
+
29
+ * Attribute value predicates: `//book[@id='123']` ❌
30
+ * Logical operators: `//book[@id and @title]` ❌
31
+ * Position predicates: `//book[1]`, `//book[last()]` ❌
32
+ * Text predicates: `//book[text()='Title']` ❌
33
+ * Namespace queries: `//ns:element` ❌
34
+ * Parent axis: `//child/..` ❌
35
+ * Sibling axes: `following-sibling::*` ❌
36
+ * XPath functions: `count()`, `concat()`, etc. ❌
37
+
38
+ *Workaround:* Use Ruby enumerable methods after basic queries:
39
+
40
+ [source,ruby]
41
+ ----
42
+ # Instead of: doc.xpath("//book[@id='123']")
43
+ # Use:
44
+ doc.xpath("//book").find { |book| book["id"] == "123" }
45
+ ----
46
+
47
+ IMPORTANT: For complete XPath 1.0 specification with zero limitations today, use
48
+ Nokogiri or Oga adapters.
49
+
50
+
51
+
52
+ See also:
53
+
54
+ * link:headed-ox[HeadedOx adapter] - Ox with full XPath support
55
+ * link:../compatibility[Adapter compatibility matrix]
@@ -0,0 +1,293 @@
1
+ ---
2
+ title: REXML adapter
3
+ parent: Adapters
4
+ grand_parent: Overview
5
+ nav_order: 4
6
+ ---
7
+
8
+ == REXML adapter
9
+
10
+ === Purpose
11
+
12
+ The REXML adapter provides XML processing through Ruby's standard library,
13
+ offering maximum portability without requiring any external gems.
14
+
15
+ === Overview
16
+
17
+ link:https://github.com/ruby/rexml[REXML] is Ruby's built-in XML parser,
18
+ distributed as part of the standard library. It requires no external
19
+ dependencies and works on all Ruby installations.
20
+
21
+ === Installation
22
+
23
+ REXML is included with Ruby, so only Moxml needs to be installed:
24
+
25
+ [source,ruby]
26
+ ----
27
+ gem 'moxml'
28
+ # No additional gems needed - REXML is in Ruby stdlib
29
+ ----
30
+
31
+ Install:
32
+
33
+ [source,shell]
34
+ ----
35
+ bundle install
36
+ ----
37
+
38
+ === Configuration
39
+
40
+ [source,ruby]
41
+ ----
42
+ # Explicit selection
43
+ context = Moxml.new
44
+ context.config.adapter = :rexml
45
+
46
+ # Global default
47
+ Moxml::Config.default_adapter = :rexml
48
+ ----
49
+
50
+ === Features
51
+
52
+ ==== Basic XPath support
53
+
54
+ REXML provides basic XPath querying:
55
+
56
+ [source,ruby]
57
+ ----
58
+ doc = Moxml.new.parse(xml)
59
+
60
+ # Basic paths work
61
+ books = doc.xpath('//book')
62
+ first_book = doc.xpath('/library/book[1]')
63
+
64
+ # Attribute predicates work
65
+ has_id = doc.xpath('//book[@id]')
66
+ specific_book = doc.xpath('//book[@id="1"]')
67
+
68
+ # Position predicates work
69
+ first_three = doc.xpath('//book[position() < 4]')
70
+ ----
71
+
72
+ ==== Limited namespace support
73
+
74
+ REXML can parse and preserve namespaces but cannot use them in XPath queries:
75
+
76
+ [source,ruby]
77
+ ----
78
+ xml = '<library xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:title>Book</dc:title></library>'
79
+
80
+ doc = Moxml.new.parse(xml)
81
+
82
+ # Namespace metadata is preserved
83
+ element = doc.root.children.first
84
+ puts element.namespace # Works - returns namespace URI
85
+
86
+ # But namespace-aware XPath does NOT work
87
+ doc.xpath('//dc:title', 'dc' => 'http://purl.org/dc/elements/1.1/')
88
+ # => Returns empty, cannot resolve namespace prefix
89
+
90
+ # Workaround: use element name without namespace
91
+ doc.xpath('//title') # Works - finds the element
92
+ ----
93
+
94
+ ==== All basic node types
95
+
96
+ Support for standard XML node types:
97
+
98
+ * Elements with attributes
99
+ * Text nodes
100
+ * CDATA sections
101
+ * Comments
102
+ * Processing instructions
103
+ * DOCTYPE declarations
104
+ * XML declarations
105
+
106
+ ==== Standard library advantage
107
+
108
+ * Always available - no gem dependencies
109
+ * Maximum portability across Ruby versions
110
+ * Simple deployment - no compilation needed
111
+ * Guaranteed compatibility
112
+
113
+ === Limitations
114
+
115
+ REXML has several limitations compared to other adapters:
116
+
117
+ **XPath limitations:**
118
+
119
+ * No namespace-aware XPath queries (see example above)
120
+ * Limited function support
121
+ * Some axes not supported
122
+ * Complex predicates may not work
123
+
124
+ **Performance:**
125
+
126
+ * Slower parsing than C-based libraries
127
+ * Medium serialization speed
128
+ * Higher memory usage than Ox
129
+
130
+ **Namespace XPath workaround:**
131
+
132
+ [source,ruby]
133
+ ----
134
+ # Instead of:
135
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
136
+
137
+ # Use element name matching:
138
+ doc.xpath('//element')
139
+
140
+ # Then filter in Ruby:
141
+ elements.select { |e| e.namespace == 'http://example.org' }
142
+ ----
143
+
144
+ === Performance characteristics
145
+
146
+ Based on benchmarks:
147
+
148
+ [cols="2,2,3"]
149
+ |===
150
+ | Operation | Performance | Notes
151
+
152
+ | Parse medium XML
153
+ | ~10-20 ips
154
+ | Pure Ruby parsing
155
+
156
+ | Serialize medium XML
157
+ | ~500-1,000 ips
158
+ | Medium speed
159
+
160
+ | XPath queries
161
+ | ~5,000-10,000 ips
162
+ | Limited XPath
163
+
164
+ | Memory usage
165
+ | Medium
166
+ | Pure Ruby overhead
167
+ |===
168
+
169
+ === Best use cases
170
+
171
+ **Choose REXML when:**
172
+
173
+ * No external gems can be used (standard library only)
174
+ * Maximum portability is required
175
+ * Small to medium documents
176
+ * Deployment simplicity is critical
177
+ * C extensions cannot be compiled
178
+ * Basic XPath without namespaces is sufficient
179
+
180
+ **Avoid REXML when:**
181
+
182
+ * Namespace-aware XPath is required (use link:oga[Oga],
183
+ link:nokogiri[Nokogiri], or link:libxml[LibXML])
184
+ * High performance is needed (use link:ox[Ox] or link:nokogiri[Nokogiri])
185
+ * Complex XPath expressions are needed (use link:nokogiri[Nokogiri])
186
+
187
+ === Example usage
188
+
189
+ ==== Basic operations
190
+
191
+ [source,ruby]
192
+ ----
193
+ require 'moxml'
194
+
195
+ # Configure REXML adapter
196
+ context = Moxml.new
197
+ context.config.adapter = :rexml
198
+
199
+ xml = '<library><book id="1">Ruby Programming</book></library>'
200
+ doc = context.parse(xml)
201
+
202
+ # Basic XPath works
203
+ book = doc.at_xpath('//book[@id="1"]')
204
+ puts book.text # => "Ruby Programming"
205
+
206
+ # Modify
207
+ book.text = 'Advanced Ruby'
208
+ book['edition'] = '2nd'
209
+
210
+ puts doc.to_xml(indent: 2)
211
+ ----
212
+
213
+ ==== XPath queries
214
+
215
+ [source,ruby]
216
+ ----
217
+ # Supported patterns
218
+ books = doc.xpath('//book')
219
+ with_id = doc.xpath('//book[@id]')
220
+ specific = doc.xpath('//book[@id="1"]')
221
+ first_two = doc.xpath('//book[position() <= 2]')
222
+
223
+ # NOT supported - avoid these
224
+ # doc.xpath('//ns:book', namespaces) # ❌ No namespace XPath
225
+ # doc.xpath('count(//book)') # ⚠️ Limited functions
226
+ # doc.xpath('//book[price < 30]') # ⚠️ May not work
227
+ ----
228
+
229
+ ==== Namespace workarounds
230
+
231
+ [source,ruby]
232
+ ----
233
+ xml = <<~XML
234
+ <library xmlns:dc="http://purl.org/dc/elements/1.1/">
235
+ <dc:title>Programming</dc:title>
236
+ <dc:creator>Smith</dc:creator>
237
+ </library>
238
+ XML
239
+
240
+ doc = Moxml.new.parse(xml)
241
+
242
+ # Find elements by name (without namespace prefix)
243
+ titles = doc.xpath('//title')
244
+
245
+ # Check namespace programmatically
246
+ titles.each do |title|
247
+ if title.namespace == 'http://purl.org/dc/elements/1.1/'
248
+ puts "DC title: #{title.text}"
249
+ end
250
+ end
251
+ ----
252
+
253
+ === Comparison with other pure Ruby option
254
+
255
+ [cols="2,2,2"]
256
+ |===
257
+ | Aspect | REXML | Oga
258
+
259
+ | XPath support
260
+ | Limited
261
+ | Full XPath 1.0
262
+
263
+ | Performance
264
+ | Medium
265
+ | Fast
266
+
267
+ | Memory usage
268
+ | Medium
269
+ | Medium
270
+
271
+ | Namespace XPath
272
+ | Not supported
273
+ | Full support
274
+
275
+ | Standard library
276
+ | Yes
277
+ | No (external gem)
278
+
279
+ | Dependencies
280
+ | None
281
+ | None (pure Ruby)
282
+ |===
283
+
284
+ === References
285
+
286
+ * link:https://github.com/ruby/rexml[REXML on GitHub]
287
+ * link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML.html[REXML documentation]
288
+
289
+ === See also
290
+
291
+ * link:../compatibility[Compatibility matrix] - Feature comparison
292
+ * link:oga[Oga adapter] - Alternative pure Ruby with full XPath
293
+ * link:../../guides/xpath-queries[XPath queries guide]