moxml 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +12 -4
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
  80. data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
  81. data/lib/moxml/adapter/customized_ox/text.rb +0 -2
  82. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  83. data/lib/moxml/adapter/headed_ox.rb +161 -0
  84. data/lib/moxml/adapter/libxml.rb +1548 -0
  85. data/lib/moxml/adapter/nokogiri.rb +121 -9
  86. data/lib/moxml/adapter/oga.rb +123 -12
  87. data/lib/moxml/adapter/ox.rb +283 -27
  88. data/lib/moxml/adapter/rexml.rb +127 -20
  89. data/lib/moxml/adapter.rb +21 -4
  90. data/lib/moxml/attribute.rb +6 -0
  91. data/lib/moxml/builder.rb +40 -4
  92. data/lib/moxml/config.rb +8 -3
  93. data/lib/moxml/context.rb +39 -1
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +39 -6
  96. data/lib/moxml/document_builder.rb +27 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +94 -3
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1768 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_spec.rb +36 -0
  164. data/spec/moxml/doctype_spec.rb +33 -0
  165. data/spec/moxml/document_builder_spec.rb +30 -0
  166. data/spec/moxml/document_spec.rb +105 -0
  167. data/spec/moxml/element_spec.rb +143 -0
  168. data/spec/moxml/error_spec.rb +266 -22
  169. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  170. data/spec/moxml/namespace_spec.rb +32 -0
  171. data/spec/moxml/node_set_spec.rb +39 -0
  172. data/spec/moxml/node_spec.rb +37 -0
  173. data/spec/moxml/processing_instruction_spec.rb +34 -0
  174. data/spec/moxml/sax_spec.rb +1067 -0
  175. data/spec/moxml/text_spec.rb +31 -0
  176. data/spec/moxml/version_spec.rb +14 -0
  177. data/spec/moxml/xml_utils/.gitkeep +0 -0
  178. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  179. data/spec/moxml/xml_utils_spec.rb +49 -0
  180. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  181. data/spec/moxml/xpath/axes_spec.rb +296 -0
  182. data/spec/moxml/xpath/cache_spec.rb +358 -0
  183. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  184. data/spec/moxml/xpath/context_spec.rb +210 -0
  185. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  186. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  187. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  188. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  189. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  190. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  191. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  192. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  193. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  194. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  195. data/spec/moxml/xpath/parser_spec.rb +364 -0
  196. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  197. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  198. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  199. data/spec/moxml/xpath_spec.rb +77 -0
  200. data/spec/performance/README.md +83 -0
  201. data/spec/performance/benchmark_spec.rb +64 -0
  202. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  203. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  204. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  205. data/spec/spec_helper.rb +58 -1
  206. data/spec/support/xml_matchers.rb +1 -1
  207. metadata +176 -35
  208. data/lib/ox/node.rb +0 -9
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,286 @@
1
+ ---
2
+ title: LibXML adapter
3
+ parent: Adapters
4
+ grand_parent: Overview
5
+ nav_order: 2
6
+ ---
7
+
8
+ == LibXML adapter
9
+
10
+ === Purpose
11
+
12
+ The LibXML adapter provides XML processing through the libxml-ruby library,
13
+ offering excellent performance through native libxml2 bindings with full
14
+ XPath 1.0 support.
15
+
16
+ === Overview
17
+
18
+ link:https://github.com/xml4r/libxml-ruby[libxml-ruby] provides Ruby
19
+ bindings to the libxml2 C library, offering similar performance
20
+ characteristics to Nokogiri but as an alternative implementation. It's ideal
21
+ when you need native performance with full XML features.
22
+
23
+ === Installation
24
+
25
+ Add to your Gemfile:
26
+
27
+ [source,ruby]
28
+ ----
29
+ gem 'moxml'
30
+ gem 'libxml-ruby'
31
+ ----
32
+
33
+ Install:
34
+
35
+ [source,shell]
36
+ ----
37
+ bundle install
38
+ ----
39
+
40
+ === Configuration
41
+
42
+ [source,ruby]
43
+ ----
44
+ # Explicit selection
45
+ context = Moxml.new
46
+ context.config.adapter = :libxml
47
+
48
+ # Global default
49
+ Moxml::Config.default_adapter = :libxml
50
+ ----
51
+
52
+ === Features
53
+
54
+ ==== Full XPath 1.0 support
55
+
56
+ Complete XPath 1.0 implementation through libxml2:
57
+
58
+ [source,ruby]
59
+ ----
60
+ doc = Moxml.new.parse(xml)
61
+
62
+ # All XPath features work
63
+ books = doc.xpath('//book[@price < 30]')
64
+ count = doc.xpath('count(//book)')
65
+ titles = doc.xpath('//book[position() < 3]/title')
66
+
67
+ # Namespace-aware queries
68
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
69
+
70
+ # Complex predicates and functions
71
+ doc.xpath('//book[author and price > 20]')
72
+ doc.xpath('//chapter[last()]')
73
+ ----
74
+
75
+ ==== Complete namespace support
76
+
77
+ Full namespace handling:
78
+
79
+ [source,ruby]
80
+ ----
81
+ # Create namespaced elements
82
+ element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
83
+
84
+ # Query with namespaces
85
+ results = doc.xpath('//dc:creator',
86
+ 'dc' => 'http://purl.org/dc/elements/1.1/')
87
+
88
+ # Namespace inheritance works correctly
89
+ ----
90
+
91
+ ==== All node types
92
+
93
+ Complete support for:
94
+
95
+ * Elements with attributes
96
+ * Text nodes
97
+ * CDATA sections
98
+ * Comments
99
+ * Processing instructions
100
+ * DOCTYPE declarations (with limitations)
101
+ * XML declarations
102
+
103
+ ==== Performance
104
+
105
+ * **Parsing speed**: Very fast (native libxml2)
106
+ * **Serialization speed**: Very fast
107
+ * **Memory usage**: Excellent
108
+ * **XPath performance**: Excellent (native)
109
+
110
+ === Limitations
111
+
112
+ Minor limitations compared to other adapters:
113
+
114
+ **DOCTYPE handling:**
115
+
116
+ * DOCTYPE parsing works correctly
117
+ * DOCTYPE serialization is limited
118
+ * Round-trip preservation of DOCTYPE may not work perfectly
119
+
120
+ **Performance:**
121
+
122
+ * Serialization slightly slower than Ox in some cases
123
+ * Still very competitive with other adapters
124
+
125
+ [source,ruby]
126
+ ----
127
+ # DOCTYPE limitation example
128
+ xml_with_doctype = <<~XML
129
+ <!DOCTYPE root SYSTEM "test.dtd">
130
+ <root/>
131
+ XML
132
+
133
+ doc = Moxml.new.parse(xml_with_doctype)
134
+ # Parsing works fine
135
+
136
+ # But re-serialization may not preserve DOCTYPE perfectly
137
+ output = doc.to_xml
138
+ # DOCTYPE may be formatted differently or missing
139
+ ----
140
+
141
+ === Performance characteristics
142
+
143
+ Based on benchmarks:
144
+
145
+ [cols="2,2,3"]
146
+ |===
147
+ | Operation | Performance | Notes
148
+
149
+ | Parse medium XML
150
+ | ~120 ips
151
+ | Very fast native parsing
152
+
153
+ | Serialize medium XML
154
+ | ~1,200 ips
155
+ | Fast serialization
156
+
157
+ | XPath queries
158
+ | ~50,000+ ips
159
+ | Native libxml2 XPath
160
+
161
+ | Memory usage
162
+ | Excellent
163
+ | Efficient memory management
164
+ |===
165
+
166
+ === Best use cases
167
+
168
+ **Choose LibXML when:**
169
+
170
+ * You want an alternative to Nokogiri
171
+ * Native C performance is important
172
+ * Full XPath 1.0 support is required
173
+ * Namespace handling is critical
174
+ * You prefer libxml2 over libxml2-wrapped-by-Nokogiri
175
+
176
+ **Avoid LibXML when:**
177
+
178
+ * Pure Ruby is required (use link:oga[Oga])
179
+ * DOCTYPE round-trip is essential
180
+ * You need wider community/ecosystem (use link:nokogiri[Nokogiri])
181
+
182
+ === Example usage
183
+
184
+ ==== Basic operations
185
+
186
+ [source,ruby]
187
+ ----
188
+ require 'moxml'
189
+
190
+ # Configure LibXML adapter
191
+ context = Moxml.new
192
+ context.config.adapter = :libxml
193
+
194
+ xml = '<library><book id="1">Ruby Programming</book></library>'
195
+ doc = context.parse(xml)
196
+
197
+ # Query and modify
198
+ book = doc.at_xpath('//book[@id="1"]')
199
+ book.text = 'Advanced Ruby Programming'
200
+ book['edition'] = '2nd'
201
+
202
+ puts doc.to_xml(indent: 2)
203
+ ----
204
+
205
+ ==== Complex XPath
206
+
207
+ [source,ruby]
208
+ ----
209
+ # All XPath 1.0 features supported
210
+ expensive_books = doc.xpath('//book[price > 30]')
211
+ fiction_count = doc.xpath('count(//book[@category="fiction"])')
212
+ last_chapter = doc.at_xpath('//chapter[last()]')
213
+ ----
214
+
215
+ ==== Namespace operations
216
+
217
+ [source,ruby]
218
+ ----
219
+ xml = <<~XML
220
+ <library xmlns:dc="http://purl.org/dc/elements/1.1/">
221
+ <book>
222
+ <dc:title>Programming</dc:title>
223
+ <dc:creator>Smith</dc:creator>
224
+ </book>
225
+ </library>
226
+ XML
227
+
228
+ doc = Moxml.new.parse(xml)
229
+
230
+ # Query with namespaces
231
+ ns = { 'dc' => 'http://purl.org/dc/elements/1.1/' }
232
+ titles = doc.xpath('//dc:title', ns)
233
+ creators = doc.xpath('//dc:creator', ns)
234
+
235
+ puts "Title: #{titles.first.text}"
236
+ puts "Creator: #{creators.first.text}"
237
+ ----
238
+
239
+ === Comparison with Nokogiri
240
+
241
+ Both LibXML and Nokogiri use libxml2, but differ in their approach:
242
+
243
+ [cols="2,2,2"]
244
+ |===
245
+ | Aspect | LibXML | Nokogiri
246
+
247
+ | Underlying library
248
+ | libxml2 directly
249
+ | libxml2 via wrapper
250
+
251
+ | Community size
252
+ | Smaller
253
+ | Very large
254
+
255
+ | Performance
256
+ | Excellent
257
+ | Excellent
258
+
259
+ | Feature completeness
260
+ | Full (except DOCTYPE)
261
+ | Full
262
+
263
+ | Pure Ruby option
264
+ | No
265
+ | No
266
+
267
+ | Cross-platform
268
+ | Good
269
+ | Excellent
270
+
271
+ | Documentation
272
+ | Good
273
+ | Extensive
274
+ |===
275
+
276
+ === References
277
+
278
+ * link:https://github.com/xml4r/libxml-ruby[libxml-ruby on GitHub]
279
+ * link:https://github.com/GNOME/libxml2[libxml2 C library]
280
+ * link:https://libxml2.gitlab.io/[libxml2 documentation]
281
+
282
+ === See also
283
+
284
+ * link:../compatibility[Compatibility matrix] - Feature comparison
285
+ * link:nokogiri[Nokogiri adapter] - Similar performance
286
+ * link:../../guides/adapter-switching[Adapter switching guide]
@@ -0,0 +1,252 @@
1
+ ---
2
+ title: Nokogiri adapter
3
+ parent: Adapters
4
+ grand_parent: Overview
5
+ nav_order: 1
6
+ ---
7
+
8
+ == Nokogiri adapter
9
+
10
+ === Purpose
11
+
12
+ The Nokogiri adapter provides XML processing through the industry-standard
13
+ Nokogiri library, offering excellent performance and complete XPath 1.0
14
+ support.
15
+
16
+ === Overview
17
+
18
+ link:https://github.com/sparklemotion/nokogiri[Nokogiri] is the most widely
19
+ used XML library in the Ruby ecosystem. It wraps the performant libxml2 C
20
+ library, providing fast parsing, flexible querying, and reliable XML
21
+ processing.
22
+
23
+ === Installation
24
+
25
+ Add to your Gemfile:
26
+
27
+ [source,ruby]
28
+ ----
29
+ gem 'moxml'
30
+ gem 'nokogiri'
31
+ ----
32
+
33
+ Install:
34
+
35
+ [source,shell]
36
+ ----
37
+ bundle install
38
+ ----
39
+
40
+ === Configuration
41
+
42
+ Nokogiri is the default adapter and will be used automatically if installed:
43
+
44
+ [source,ruby]
45
+ ----
46
+ # Automatic selection (default)
47
+ context = Moxml.new
48
+ # Uses Nokogiri if available
49
+
50
+ # Explicit selection
51
+ context = Moxml.new
52
+ context.config.adapter = :nokogiri
53
+
54
+ # Global default
55
+ Moxml::Config.default_adapter = :nokogiri
56
+ ----
57
+
58
+ === Features
59
+
60
+ ==== Full XPath 1.0 support
61
+
62
+ Nokogiri provides complete XPath 1.0 implementation:
63
+
64
+ [source,ruby]
65
+ ----
66
+ doc = Moxml.new.parse(xml)
67
+
68
+ # All XPath features work
69
+ books = doc.xpath('//book[@price < 30]')
70
+ count = doc.xpath('count(//book)')
71
+ titles = doc.xpath('//book[position() < 3]/title')
72
+
73
+ # Namespace-aware queries
74
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
75
+
76
+ # Complex predicates
77
+ doc.xpath('//book[@id and @isbn and price < 50]')
78
+
79
+ # All axes supported
80
+ doc.xpath('//chapter/following-sibling::*')
81
+ ----
82
+
83
+ ==== Complete namespace support
84
+
85
+ Full namespace handling including default namespaces and inheritance:
86
+
87
+ [source,ruby]
88
+ ----
89
+ # Create namespaced elements
90
+ element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
91
+
92
+ # Query with namespaces
93
+ results = doc.xpath('//dc:creator',
94
+ 'dc' => 'http://purl.org/dc/elements/1.1/')
95
+
96
+ # Namespace inheritance works correctly
97
+ # Child elements inherit parent namespaces
98
+ ----
99
+
100
+ ==== All node types supported
101
+
102
+ Complete support for all XML node types:
103
+
104
+ * Elements with attributes
105
+ * Text nodes
106
+ * CDATA sections
107
+ * Comments
108
+ * Processing instructions
109
+ * DOCTYPE declarations
110
+ * XML declarations
111
+
112
+ ==== High performance
113
+
114
+ * **Parsing speed**: Fast (C library)
115
+ * **Serialization speed**: Fast
116
+ * **Memory usage**: Good
117
+ * **XPath performance**: Excellent (native libxml2)
118
+
119
+ === Limitations
120
+
121
+ Nokogiri has minimal limitations:
122
+
123
+ * Requires C extensions (not pure Ruby)
124
+ * Platform-specific compilation may be needed
125
+ * Slightly larger memory footprint than some alternatives
126
+
127
+ === Performance characteristics
128
+
129
+ Based on benchmarks:
130
+
131
+ [cols="2,2,3"]
132
+ |===
133
+ | Operation | Performance | Notes
134
+
135
+ | Parse medium XML
136
+ | ~76 ips
137
+ | Fast C library parsing
138
+
139
+ | Serialize medium XML
140
+ | ~13,900 ips
141
+ | Very fast serialization
142
+
143
+ | XPath queries
144
+ | ~64,958 ips
145
+ | Native libxml2 XPath engine
146
+
147
+ | Memory usage
148
+ | -0.1 MB delta
149
+ | Excellent memory efficiency
150
+ |===
151
+
152
+ === Best use cases
153
+
154
+ **Choose Nokogiri when:**
155
+
156
+ * You need industry-standard XML processing
157
+ * Large community support is important
158
+ * Full XPath 1.0 compliance is required
159
+ * Performance is important but not the absolute priority
160
+ * Cross-platform deployment is needed
161
+ * C extensions are acceptable
162
+
163
+ **Avoid Nokogiri when:**
164
+
165
+ * Pure Ruby is required (use link:oga[Oga])
166
+ * Absolutely maximum speed is critical (use link:ox[Ox])
167
+ * C extension compilation is problematic (use link:oga[Oga] or
168
+ link:rexml[REXML])
169
+
170
+ === Example usage
171
+
172
+ ==== Basic document processing
173
+
174
+ [source,ruby]
175
+ ----
176
+ require 'moxml'
177
+
178
+ # Nokogiri is used by default
179
+ context = Moxml.new
180
+
181
+ xml = <<~XML
182
+ <library>
183
+ <book id="1">
184
+ <title>Ruby Programming</title>
185
+ <price>29.99</price>
186
+ </book>
187
+ </library>
188
+ XML
189
+
190
+ doc = context.parse(xml)
191
+
192
+ # Query efficiently
193
+ book = doc.at_xpath('//book[@id="1"]')
194
+ puts book.at_xpath('.//title').text # => "Ruby Programming"
195
+
196
+ # Modify
197
+ book.at_xpath('.//price').text = '24.99'
198
+
199
+ # Serialize
200
+ puts doc.to_xml(indent: 2)
201
+ ----
202
+
203
+ ==== Complex XPath queries
204
+
205
+ [source,ruby]
206
+ ----
207
+ # All XPath 1.0 features work
208
+ doc.xpath('//book[price < 30 and @category="fiction"]')
209
+ doc.xpath('//book[position() mod 2 = 0]')
210
+ doc.xpath('count(//book[author="Smith"])')
211
+ doc.xpath('//chapter[last()]/preceding-sibling::*')
212
+ ----
213
+
214
+ ==== Namespace handling
215
+
216
+ [source,ruby]
217
+ ----
218
+ xml = <<~XML
219
+ <library xmlns="http://example.org/library"
220
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
221
+ <book>
222
+ <dc:title>Programming</dc:title>
223
+ </book>
224
+ </library>
225
+ XML
226
+
227
+ doc = Moxml.new.parse(xml)
228
+
229
+ # Define namespace mappings
230
+ ns = {
231
+ 'lib' => 'http://example.org/library',
232
+ 'dc' => 'http://purl.org/dc/elements/1.1/'
233
+ }
234
+
235
+ # Query with namespaces
236
+ books = doc.xpath('//lib:book', ns)
237
+ titles = doc.xpath('//dc:title', ns)
238
+
239
+ puts titles.first.text # => "Programming"
240
+ ----
241
+
242
+ === References
243
+
244
+ * link:https://nokogiri.org/[Nokogiri homepage]
245
+ * link:https://nokogiri.org/tutorials/[Nokogiri tutorials]
246
+ * link:https://github.com/sparklemotion/nokogiri[Nokogiri on GitHub]
247
+
248
+ === See also
249
+
250
+ * link:../compatibility[Compatibility matrix] - Feature comparison
251
+ * link:libxml[LibXML adapter] - Similar performance alternative
252
+ * link:../../guides/adapter-switching[Adapter switching guide]