moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,285 @@
1
+ ---
2
+ title: LibXML
3
+ parent: Adapters
4
+ nav_order: 2
5
+ ---
6
+
7
+ == LibXML adapter
8
+
9
+ === Purpose
10
+
11
+ The LibXML adapter provides XML processing through the libxml-ruby library,
12
+ offering excellent performance through native libxml2 bindings with full
13
+ XPath 1.0 support.
14
+
15
+ === Overview
16
+
17
+ link:https://github.com/xml4r/libxml-ruby[libxml-ruby] provides Ruby
18
+ bindings to the libxml2 C library, offering similar performance
19
+ characteristics to Nokogiri but as an alternative implementation. It's ideal
20
+ when you need native performance with full XML features.
21
+
22
+ === Installation
23
+
24
+ Add to your Gemfile:
25
+
26
+ [source,ruby]
27
+ ----
28
+ gem 'moxml'
29
+ gem 'libxml-ruby'
30
+ ----
31
+
32
+ Install:
33
+
34
+ [source,shell]
35
+ ----
36
+ bundle install
37
+ ----
38
+
39
+ === Configuration
40
+
41
+ [source,ruby]
42
+ ----
43
+ # Explicit selection
44
+ context = Moxml.new
45
+ context.config.adapter = :libxml
46
+
47
+ # Global default
48
+ Moxml::Config.default_adapter = :libxml
49
+ ----
50
+
51
+ === Features
52
+
53
+ ==== Full XPath 1.0 support
54
+
55
+ Complete XPath 1.0 implementation through libxml2:
56
+
57
+ [source,ruby]
58
+ ----
59
+ doc = Moxml.new.parse(xml)
60
+
61
+ # All XPath features work
62
+ books = doc.xpath('//book[@price < 30]')
63
+ count = doc.xpath('count(//book)')
64
+ titles = doc.xpath('//book[position() < 3]/title')
65
+
66
+ # Namespace-aware queries
67
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
68
+
69
+ # Complex predicates and functions
70
+ doc.xpath('//book[author and price > 20]')
71
+ doc.xpath('//chapter[last()]')
72
+ ----
73
+
74
+ ==== Complete namespace support
75
+
76
+ Full namespace handling:
77
+
78
+ [source,ruby]
79
+ ----
80
+ # Create namespaced elements
81
+ element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
82
+
83
+ # Query with namespaces
84
+ results = doc.xpath('//dc:creator',
85
+ 'dc' => 'http://purl.org/dc/elements/1.1/')
86
+
87
+ # Namespace inheritance works correctly
88
+ ----
89
+
90
+ ==== All node types
91
+
92
+ Complete support for:
93
+
94
+ * Elements with attributes
95
+ * Text nodes
96
+ * CDATA sections
97
+ * Comments
98
+ * Processing instructions
99
+ * DOCTYPE declarations (with limitations)
100
+ * XML declarations
101
+
102
+ ==== Performance
103
+
104
+ * **Parsing speed**: Very fast (native libxml2)
105
+ * **Serialization speed**: Very fast
106
+ * **Memory usage**: Excellent
107
+ * **XPath performance**: Excellent (native)
108
+
109
+ === Limitations
110
+
111
+ Minor limitations compared to other adapters:
112
+
113
+ **DOCTYPE handling:**
114
+
115
+ * DOCTYPE parsing works correctly
116
+ * DOCTYPE serialization is limited
117
+ * Round-trip preservation of DOCTYPE may not work perfectly
118
+
119
+ **Performance:**
120
+
121
+ * Serialization slightly slower than Ox in some cases
122
+ * Still very competitive with other adapters
123
+
124
+ [source,ruby]
125
+ ----
126
+ # DOCTYPE limitation example
127
+ xml_with_doctype = <<~XML
128
+ <!DOCTYPE root SYSTEM "test.dtd">
129
+ <root/>
130
+ XML
131
+
132
+ doc = Moxml.new.parse(xml_with_doctype)
133
+ # Parsing works fine
134
+
135
+ # But re-serialization may not preserve DOCTYPE perfectly
136
+ output = doc.to_xml
137
+ # DOCTYPE may be formatted differently or missing
138
+ ----
139
+
140
+ === Performance characteristics
141
+
142
+ Based on benchmarks:
143
+
144
+ [cols="2,2,3"]
145
+ |===
146
+ | Operation | Performance | Notes
147
+
148
+ | Parse medium XML
149
+ | ~120 ips
150
+ | Very fast native parsing
151
+
152
+ | Serialize medium XML
153
+ | ~1,200 ips
154
+ | Fast serialization
155
+
156
+ | XPath queries
157
+ | ~50,000+ ips
158
+ | Native libxml2 XPath
159
+
160
+ | Memory usage
161
+ | Excellent
162
+ | Efficient memory management
163
+ |===
164
+
165
+ === Best use cases
166
+
167
+ **Choose LibXML when:**
168
+
169
+ * You want an alternative to Nokogiri
170
+ * Native C performance is important
171
+ * Full XPath 1.0 support is required
172
+ * Namespace handling is critical
173
+ * You prefer libxml2 over libxml2-wrapped-by-Nokogiri
174
+
175
+ **Avoid LibXML when:**
176
+
177
+ * Pure Ruby is required (use link:oga[Oga])
178
+ * DOCTYPE round-trip is essential
179
+ * You need wider community/ecosystem (use link:nokogiri[Nokogiri])
180
+
181
+ === Example usage
182
+
183
+ ==== Basic operations
184
+
185
+ [source,ruby]
186
+ ----
187
+ require 'moxml'
188
+
189
+ # Configure LibXML adapter
190
+ context = Moxml.new
191
+ context.config.adapter = :libxml
192
+
193
+ xml = '<library><book id="1">Ruby Programming</book></library>'
194
+ doc = context.parse(xml)
195
+
196
+ # Query and modify
197
+ book = doc.at_xpath('//book[@id="1"]')
198
+ book.text = 'Advanced Ruby Programming'
199
+ book['edition'] = '2nd'
200
+
201
+ puts doc.to_xml(indent: 2)
202
+ ----
203
+
204
+ ==== Complex XPath
205
+
206
+ [source,ruby]
207
+ ----
208
+ # All XPath 1.0 features supported
209
+ expensive_books = doc.xpath('//book[price > 30]')
210
+ fiction_count = doc.xpath('count(//book[@category="fiction"])')
211
+ last_chapter = doc.at_xpath('//chapter[last()]')
212
+ ----
213
+
214
+ ==== Namespace operations
215
+
216
+ [source,ruby]
217
+ ----
218
+ xml = <<~XML
219
+ <library xmlns:dc="http://purl.org/dc/elements/1.1/">
220
+ <book>
221
+ <dc:title>Programming</dc:title>
222
+ <dc:creator>Smith</dc:creator>
223
+ </book>
224
+ </library>
225
+ XML
226
+
227
+ doc = Moxml.new.parse(xml)
228
+
229
+ # Query with namespaces
230
+ ns = { 'dc' => 'http://purl.org/dc/elements/1.1/' }
231
+ titles = doc.xpath('//dc:title', ns)
232
+ creators = doc.xpath('//dc:creator', ns)
233
+
234
+ puts "Title: #{titles.first.text}"
235
+ puts "Creator: #{creators.first.text}"
236
+ ----
237
+
238
+ === Comparison with Nokogiri
239
+
240
+ Both LibXML and Nokogiri use libxml2, but differ in their approach:
241
+
242
+ [cols="2,2,2"]
243
+ |===
244
+ | Aspect | LibXML | Nokogiri
245
+
246
+ | Underlying library
247
+ | libxml2 directly
248
+ | libxml2 via wrapper
249
+
250
+ | Community size
251
+ | Smaller
252
+ | Very large
253
+
254
+ | Performance
255
+ | Excellent
256
+ | Excellent
257
+
258
+ | Feature completeness
259
+ | Full (except DOCTYPE)
260
+ | Full
261
+
262
+ | Pure Ruby option
263
+ | No
264
+ | No
265
+
266
+ | Cross-platform
267
+ | Good
268
+ | Excellent
269
+
270
+ | Documentation
271
+ | Good
272
+ | Extensive
273
+ |===
274
+
275
+ === References
276
+
277
+ * link:https://github.com/xml4r/libxml-ruby[libxml-ruby on GitHub]
278
+ * link:https://github.com/GNOME/libxml2[libxml2 C library]
279
+ * link:https://libxml2.gitlab.io/[libxml2 documentation]
280
+
281
+ === See also
282
+
283
+ * link:../compatibility[Compatibility matrix] - Feature comparison
284
+ * link:nokogiri[Nokogiri adapter] - Similar performance
285
+ * link:../../guides/adapter-switching[Adapter switching guide]
@@ -0,0 +1,251 @@
1
+ ---
2
+ title: Nokogiri
3
+ parent: Adapters
4
+ nav_order: 1
5
+ ---
6
+
7
+ == Nokogiri adapter
8
+
9
+ === Purpose
10
+
11
+ The Nokogiri adapter provides XML processing through the industry-standard
12
+ Nokogiri library, offering excellent performance and complete XPath 1.0
13
+ support.
14
+
15
+ === Overview
16
+
17
+ link:https://github.com/sparklemotion/nokogiri[Nokogiri] is the most widely
18
+ used XML library in the Ruby ecosystem. It wraps the performant libxml2 C
19
+ library, providing fast parsing, flexible querying, and reliable XML
20
+ processing.
21
+
22
+ === Installation
23
+
24
+ Add to your Gemfile:
25
+
26
+ [source,ruby]
27
+ ----
28
+ gem 'moxml'
29
+ gem 'nokogiri'
30
+ ----
31
+
32
+ Install:
33
+
34
+ [source,shell]
35
+ ----
36
+ bundle install
37
+ ----
38
+
39
+ === Configuration
40
+
41
+ Nokogiri is the default adapter and will be used automatically if installed:
42
+
43
+ [source,ruby]
44
+ ----
45
+ # Automatic selection (default)
46
+ context = Moxml.new
47
+ # Uses Nokogiri if available
48
+
49
+ # Explicit selection
50
+ context = Moxml.new
51
+ context.config.adapter = :nokogiri
52
+
53
+ # Global default
54
+ Moxml::Config.default_adapter = :nokogiri
55
+ ----
56
+
57
+ === Features
58
+
59
+ ==== Full XPath 1.0 support
60
+
61
+ Nokogiri provides complete XPath 1.0 implementation:
62
+
63
+ [source,ruby]
64
+ ----
65
+ doc = Moxml.new.parse(xml)
66
+
67
+ # All XPath features work
68
+ books = doc.xpath('//book[@price < 30]')
69
+ count = doc.xpath('count(//book)')
70
+ titles = doc.xpath('//book[position() < 3]/title')
71
+
72
+ # Namespace-aware queries
73
+ doc.xpath('//ns:element', 'ns' => 'http://example.org')
74
+
75
+ # Complex predicates
76
+ doc.xpath('//book[@id and @isbn and price < 50]')
77
+
78
+ # All axes supported
79
+ doc.xpath('//chapter/following-sibling::*')
80
+ ----
81
+
82
+ ==== Complete namespace support
83
+
84
+ Full namespace handling including default namespaces and inheritance:
85
+
86
+ [source,ruby]
87
+ ----
88
+ # Create namespaced elements
89
+ element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
90
+
91
+ # Query with namespaces
92
+ results = doc.xpath('//dc:creator',
93
+ 'dc' => 'http://purl.org/dc/elements/1.1/')
94
+
95
+ # Namespace inheritance works correctly
96
+ # Child elements inherit parent namespaces
97
+ ----
98
+
99
+ ==== All node types supported
100
+
101
+ Complete support for all XML node types:
102
+
103
+ * Elements with attributes
104
+ * Text nodes
105
+ * CDATA sections
106
+ * Comments
107
+ * Processing instructions
108
+ * DOCTYPE declarations
109
+ * XML declarations
110
+
111
+ ==== High performance
112
+
113
+ * **Parsing speed**: Fast (C library)
114
+ * **Serialization speed**: Fast
115
+ * **Memory usage**: Good
116
+ * **XPath performance**: Excellent (native libxml2)
117
+
118
+ === Limitations
119
+
120
+ Nokogiri has minimal limitations:
121
+
122
+ * Requires C extensions (not pure Ruby)
123
+ * Platform-specific compilation may be needed
124
+ * Slightly larger memory footprint than some alternatives
125
+
126
+ === Performance characteristics
127
+
128
+ Based on benchmarks:
129
+
130
+ [cols="2,2,3"]
131
+ |===
132
+ | Operation | Performance | Notes
133
+
134
+ | Parse medium XML
135
+ | ~76 ips
136
+ | Fast C library parsing
137
+
138
+ | Serialize medium XML
139
+ | ~13,900 ips
140
+ | Very fast serialization
141
+
142
+ | XPath queries
143
+ | ~64,958 ips
144
+ | Native libxml2 XPath engine
145
+
146
+ | Memory usage
147
+ | -0.1 MB delta
148
+ | Excellent memory efficiency
149
+ |===
150
+
151
+ === Best use cases
152
+
153
+ **Choose Nokogiri when:**
154
+
155
+ * You need industry-standard XML processing
156
+ * Large community support is important
157
+ * Full XPath 1.0 compliance is required
158
+ * Performance is important but not the absolute priority
159
+ * Cross-platform deployment is needed
160
+ * C extensions are acceptable
161
+
162
+ **Avoid Nokogiri when:**
163
+
164
+ * Pure Ruby is required (use link:oga[Oga])
165
+ * Absolutely maximum speed is critical (use link:ox[Ox])
166
+ * C extension compilation is problematic (use link:oga[Oga] or
167
+ link:rexml[REXML])
168
+
169
+ === Example usage
170
+
171
+ ==== Basic document processing
172
+
173
+ [source,ruby]
174
+ ----
175
+ require 'moxml'
176
+
177
+ # Nokogiri is used by default
178
+ context = Moxml.new
179
+
180
+ xml = <<~XML
181
+ <library>
182
+ <book id="1">
183
+ <title>Ruby Programming</title>
184
+ <price>29.99</price>
185
+ </book>
186
+ </library>
187
+ XML
188
+
189
+ doc = context.parse(xml)
190
+
191
+ # Query efficiently
192
+ book = doc.at_xpath('//book[@id="1"]')
193
+ puts book.at_xpath('.//title').text # => "Ruby Programming"
194
+
195
+ # Modify
196
+ book.at_xpath('.//price').text = '24.99'
197
+
198
+ # Serialize
199
+ puts doc.to_xml(indent: 2)
200
+ ----
201
+
202
+ ==== Complex XPath queries
203
+
204
+ [source,ruby]
205
+ ----
206
+ # All XPath 1.0 features work
207
+ doc.xpath('//book[price < 30 and @category="fiction"]')
208
+ doc.xpath('//book[position() mod 2 = 0]')
209
+ doc.xpath('count(//book[author="Smith"])')
210
+ doc.xpath('//chapter[last()]/preceding-sibling::*')
211
+ ----
212
+
213
+ ==== Namespace handling
214
+
215
+ [source,ruby]
216
+ ----
217
+ xml = <<~XML
218
+ <library xmlns="http://example.org/library"
219
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
220
+ <book>
221
+ <dc:title>Programming</dc:title>
222
+ </book>
223
+ </library>
224
+ XML
225
+
226
+ doc = Moxml.new.parse(xml)
227
+
228
+ # Define namespace mappings
229
+ ns = {
230
+ 'lib' => 'http://example.org/library',
231
+ 'dc' => 'http://purl.org/dc/elements/1.1/'
232
+ }
233
+
234
+ # Query with namespaces
235
+ books = doc.xpath('//lib:book', ns)
236
+ titles = doc.xpath('//dc:title', ns)
237
+
238
+ puts titles.first.text # => "Programming"
239
+ ----
240
+
241
+ === References
242
+
243
+ * link:https://nokogiri.org/[Nokogiri homepage]
244
+ * link:https://nokogiri.org/tutorials/[Nokogiri tutorials]
245
+ * link:https://github.com/sparklemotion/nokogiri[Nokogiri on GitHub]
246
+
247
+ === See also
248
+
249
+ * link:../compatibility[Compatibility matrix] - Feature comparison
250
+ * link:libxml[LibXML adapter] - Similar performance alternative
251
+ * link:../../guides/adapter-switching[Adapter switching guide]