moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: LibXML
|
|
3
|
+
parent: Adapters
|
|
4
|
+
nav_order: 2
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== LibXML adapter
|
|
8
|
+
|
|
9
|
+
=== Purpose
|
|
10
|
+
|
|
11
|
+
The LibXML adapter provides XML processing through the libxml-ruby library,
|
|
12
|
+
offering excellent performance through native libxml2 bindings with full
|
|
13
|
+
XPath 1.0 support.
|
|
14
|
+
|
|
15
|
+
=== Overview
|
|
16
|
+
|
|
17
|
+
link:https://github.com/xml4r/libxml-ruby[libxml-ruby] provides Ruby
|
|
18
|
+
bindings to the libxml2 C library, offering similar performance
|
|
19
|
+
characteristics to Nokogiri but as an alternative implementation. It's ideal
|
|
20
|
+
when you need native performance with full XML features.
|
|
21
|
+
|
|
22
|
+
=== Installation
|
|
23
|
+
|
|
24
|
+
Add to your Gemfile:
|
|
25
|
+
|
|
26
|
+
[source,ruby]
|
|
27
|
+
----
|
|
28
|
+
gem 'moxml'
|
|
29
|
+
gem 'libxml-ruby'
|
|
30
|
+
----
|
|
31
|
+
|
|
32
|
+
Install:
|
|
33
|
+
|
|
34
|
+
[source,shell]
|
|
35
|
+
----
|
|
36
|
+
bundle install
|
|
37
|
+
----
|
|
38
|
+
|
|
39
|
+
=== Configuration
|
|
40
|
+
|
|
41
|
+
[source,ruby]
|
|
42
|
+
----
|
|
43
|
+
# Explicit selection
|
|
44
|
+
context = Moxml.new
|
|
45
|
+
context.config.adapter = :libxml
|
|
46
|
+
|
|
47
|
+
# Global default
|
|
48
|
+
Moxml::Config.default_adapter = :libxml
|
|
49
|
+
----
|
|
50
|
+
|
|
51
|
+
=== Features
|
|
52
|
+
|
|
53
|
+
==== Full XPath 1.0 support
|
|
54
|
+
|
|
55
|
+
Complete XPath 1.0 implementation through libxml2:
|
|
56
|
+
|
|
57
|
+
[source,ruby]
|
|
58
|
+
----
|
|
59
|
+
doc = Moxml.new.parse(xml)
|
|
60
|
+
|
|
61
|
+
# All XPath features work
|
|
62
|
+
books = doc.xpath('//book[@price < 30]')
|
|
63
|
+
count = doc.xpath('count(//book)')
|
|
64
|
+
titles = doc.xpath('//book[position() < 3]/title')
|
|
65
|
+
|
|
66
|
+
# Namespace-aware queries
|
|
67
|
+
doc.xpath('//ns:element', 'ns' => 'http://example.org')
|
|
68
|
+
|
|
69
|
+
# Complex predicates and functions
|
|
70
|
+
doc.xpath('//book[author and price > 20]')
|
|
71
|
+
doc.xpath('//chapter[last()]')
|
|
72
|
+
----
|
|
73
|
+
|
|
74
|
+
==== Complete namespace support
|
|
75
|
+
|
|
76
|
+
Full namespace handling:
|
|
77
|
+
|
|
78
|
+
[source,ruby]
|
|
79
|
+
----
|
|
80
|
+
# Create namespaced elements
|
|
81
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
|
82
|
+
|
|
83
|
+
# Query with namespaces
|
|
84
|
+
results = doc.xpath('//dc:creator',
|
|
85
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
|
86
|
+
|
|
87
|
+
# Namespace inheritance works correctly
|
|
88
|
+
----
|
|
89
|
+
|
|
90
|
+
==== All node types
|
|
91
|
+
|
|
92
|
+
Complete support for:
|
|
93
|
+
|
|
94
|
+
* Elements with attributes
|
|
95
|
+
* Text nodes
|
|
96
|
+
* CDATA sections
|
|
97
|
+
* Comments
|
|
98
|
+
* Processing instructions
|
|
99
|
+
* DOCTYPE declarations (with limitations)
|
|
100
|
+
* XML declarations
|
|
101
|
+
|
|
102
|
+
==== Performance
|
|
103
|
+
|
|
104
|
+
* **Parsing speed**: Very fast (native libxml2)
|
|
105
|
+
* **Serialization speed**: Very fast
|
|
106
|
+
* **Memory usage**: Excellent
|
|
107
|
+
* **XPath performance**: Excellent (native)
|
|
108
|
+
|
|
109
|
+
=== Limitations
|
|
110
|
+
|
|
111
|
+
Minor limitations compared to other adapters:
|
|
112
|
+
|
|
113
|
+
**DOCTYPE handling:**
|
|
114
|
+
|
|
115
|
+
* DOCTYPE parsing works correctly
|
|
116
|
+
* DOCTYPE serialization is limited
|
|
117
|
+
* Round-trip preservation of DOCTYPE may not work perfectly
|
|
118
|
+
|
|
119
|
+
**Performance:**
|
|
120
|
+
|
|
121
|
+
* Serialization slightly slower than Ox in some cases
|
|
122
|
+
* Still very competitive with other adapters
|
|
123
|
+
|
|
124
|
+
[source,ruby]
|
|
125
|
+
----
|
|
126
|
+
# DOCTYPE limitation example
|
|
127
|
+
xml_with_doctype = <<~XML
|
|
128
|
+
<!DOCTYPE root SYSTEM "test.dtd">
|
|
129
|
+
<root/>
|
|
130
|
+
XML
|
|
131
|
+
|
|
132
|
+
doc = Moxml.new.parse(xml_with_doctype)
|
|
133
|
+
# Parsing works fine
|
|
134
|
+
|
|
135
|
+
# But re-serialization may not preserve DOCTYPE perfectly
|
|
136
|
+
output = doc.to_xml
|
|
137
|
+
# DOCTYPE may be formatted differently or missing
|
|
138
|
+
----
|
|
139
|
+
|
|
140
|
+
=== Performance characteristics
|
|
141
|
+
|
|
142
|
+
Based on benchmarks:
|
|
143
|
+
|
|
144
|
+
[cols="2,2,3"]
|
|
145
|
+
|===
|
|
146
|
+
| Operation | Performance | Notes
|
|
147
|
+
|
|
148
|
+
| Parse medium XML
|
|
149
|
+
| ~120 ips
|
|
150
|
+
| Very fast native parsing
|
|
151
|
+
|
|
152
|
+
| Serialize medium XML
|
|
153
|
+
| ~1,200 ips
|
|
154
|
+
| Fast serialization
|
|
155
|
+
|
|
156
|
+
| XPath queries
|
|
157
|
+
| ~50,000+ ips
|
|
158
|
+
| Native libxml2 XPath
|
|
159
|
+
|
|
160
|
+
| Memory usage
|
|
161
|
+
| Excellent
|
|
162
|
+
| Efficient memory management
|
|
163
|
+
|===
|
|
164
|
+
|
|
165
|
+
=== Best use cases
|
|
166
|
+
|
|
167
|
+
**Choose LibXML when:**
|
|
168
|
+
|
|
169
|
+
* You want an alternative to Nokogiri
|
|
170
|
+
* Native C performance is important
|
|
171
|
+
* Full XPath 1.0 support is required
|
|
172
|
+
* Namespace handling is critical
|
|
173
|
+
* You prefer libxml2 over libxml2-wrapped-by-Nokogiri
|
|
174
|
+
|
|
175
|
+
**Avoid LibXML when:**
|
|
176
|
+
|
|
177
|
+
* Pure Ruby is required (use link:oga[Oga])
|
|
178
|
+
* DOCTYPE round-trip is essential
|
|
179
|
+
* You need wider community/ecosystem (use link:nokogiri[Nokogiri])
|
|
180
|
+
|
|
181
|
+
=== Example usage
|
|
182
|
+
|
|
183
|
+
==== Basic operations
|
|
184
|
+
|
|
185
|
+
[source,ruby]
|
|
186
|
+
----
|
|
187
|
+
require 'moxml'
|
|
188
|
+
|
|
189
|
+
# Configure LibXML adapter
|
|
190
|
+
context = Moxml.new
|
|
191
|
+
context.config.adapter = :libxml
|
|
192
|
+
|
|
193
|
+
xml = '<library><book id="1">Ruby Programming</book></library>'
|
|
194
|
+
doc = context.parse(xml)
|
|
195
|
+
|
|
196
|
+
# Query and modify
|
|
197
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
198
|
+
book.text = 'Advanced Ruby Programming'
|
|
199
|
+
book['edition'] = '2nd'
|
|
200
|
+
|
|
201
|
+
puts doc.to_xml(indent: 2)
|
|
202
|
+
----
|
|
203
|
+
|
|
204
|
+
==== Complex XPath
|
|
205
|
+
|
|
206
|
+
[source,ruby]
|
|
207
|
+
----
|
|
208
|
+
# All XPath 1.0 features supported
|
|
209
|
+
expensive_books = doc.xpath('//book[price > 30]')
|
|
210
|
+
fiction_count = doc.xpath('count(//book[@category="fiction"])')
|
|
211
|
+
last_chapter = doc.at_xpath('//chapter[last()]')
|
|
212
|
+
----
|
|
213
|
+
|
|
214
|
+
==== Namespace operations
|
|
215
|
+
|
|
216
|
+
[source,ruby]
|
|
217
|
+
----
|
|
218
|
+
xml = <<~XML
|
|
219
|
+
<library xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
220
|
+
<book>
|
|
221
|
+
<dc:title>Programming</dc:title>
|
|
222
|
+
<dc:creator>Smith</dc:creator>
|
|
223
|
+
</book>
|
|
224
|
+
</library>
|
|
225
|
+
XML
|
|
226
|
+
|
|
227
|
+
doc = Moxml.new.parse(xml)
|
|
228
|
+
|
|
229
|
+
# Query with namespaces
|
|
230
|
+
ns = { 'dc' => 'http://purl.org/dc/elements/1.1/' }
|
|
231
|
+
titles = doc.xpath('//dc:title', ns)
|
|
232
|
+
creators = doc.xpath('//dc:creator', ns)
|
|
233
|
+
|
|
234
|
+
puts "Title: #{titles.first.text}"
|
|
235
|
+
puts "Creator: #{creators.first.text}"
|
|
236
|
+
----
|
|
237
|
+
|
|
238
|
+
=== Comparison with Nokogiri
|
|
239
|
+
|
|
240
|
+
Both LibXML and Nokogiri use libxml2, but differ in their approach:
|
|
241
|
+
|
|
242
|
+
[cols="2,2,2"]
|
|
243
|
+
|===
|
|
244
|
+
| Aspect | LibXML | Nokogiri
|
|
245
|
+
|
|
246
|
+
| Underlying library
|
|
247
|
+
| libxml2 directly
|
|
248
|
+
| libxml2 via wrapper
|
|
249
|
+
|
|
250
|
+
| Community size
|
|
251
|
+
| Smaller
|
|
252
|
+
| Very large
|
|
253
|
+
|
|
254
|
+
| Performance
|
|
255
|
+
| Excellent
|
|
256
|
+
| Excellent
|
|
257
|
+
|
|
258
|
+
| Feature completeness
|
|
259
|
+
| Full (except DOCTYPE)
|
|
260
|
+
| Full
|
|
261
|
+
|
|
262
|
+
| Pure Ruby option
|
|
263
|
+
| No
|
|
264
|
+
| No
|
|
265
|
+
|
|
266
|
+
| Cross-platform
|
|
267
|
+
| Good
|
|
268
|
+
| Excellent
|
|
269
|
+
|
|
270
|
+
| Documentation
|
|
271
|
+
| Good
|
|
272
|
+
| Extensive
|
|
273
|
+
|===
|
|
274
|
+
|
|
275
|
+
=== References
|
|
276
|
+
|
|
277
|
+
* link:https://github.com/xml4r/libxml-ruby[libxml-ruby on GitHub]
|
|
278
|
+
* link:https://github.com/GNOME/libxml2[libxml2 C library]
|
|
279
|
+
* link:https://libxml2.gitlab.io/[libxml2 documentation]
|
|
280
|
+
|
|
281
|
+
=== See also
|
|
282
|
+
|
|
283
|
+
* link:../compatibility[Compatibility matrix] - Feature comparison
|
|
284
|
+
* link:nokogiri[Nokogiri adapter] - Similar performance
|
|
285
|
+
* link:../../guides/adapter-switching[Adapter switching guide]
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Nokogiri
|
|
3
|
+
parent: Adapters
|
|
4
|
+
nav_order: 1
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== Nokogiri adapter
|
|
8
|
+
|
|
9
|
+
=== Purpose
|
|
10
|
+
|
|
11
|
+
The Nokogiri adapter provides XML processing through the industry-standard
|
|
12
|
+
Nokogiri library, offering excellent performance and complete XPath 1.0
|
|
13
|
+
support.
|
|
14
|
+
|
|
15
|
+
=== Overview
|
|
16
|
+
|
|
17
|
+
link:https://github.com/sparklemotion/nokogiri[Nokogiri] is the most widely
|
|
18
|
+
used XML library in the Ruby ecosystem. It wraps the performant libxml2 C
|
|
19
|
+
library, providing fast parsing, flexible querying, and reliable XML
|
|
20
|
+
processing.
|
|
21
|
+
|
|
22
|
+
=== Installation
|
|
23
|
+
|
|
24
|
+
Add to your Gemfile:
|
|
25
|
+
|
|
26
|
+
[source,ruby]
|
|
27
|
+
----
|
|
28
|
+
gem 'moxml'
|
|
29
|
+
gem 'nokogiri'
|
|
30
|
+
----
|
|
31
|
+
|
|
32
|
+
Install:
|
|
33
|
+
|
|
34
|
+
[source,shell]
|
|
35
|
+
----
|
|
36
|
+
bundle install
|
|
37
|
+
----
|
|
38
|
+
|
|
39
|
+
=== Configuration
|
|
40
|
+
|
|
41
|
+
Nokogiri is the default adapter and will be used automatically if installed:
|
|
42
|
+
|
|
43
|
+
[source,ruby]
|
|
44
|
+
----
|
|
45
|
+
# Automatic selection (default)
|
|
46
|
+
context = Moxml.new
|
|
47
|
+
# Uses Nokogiri if available
|
|
48
|
+
|
|
49
|
+
# Explicit selection
|
|
50
|
+
context = Moxml.new
|
|
51
|
+
context.config.adapter = :nokogiri
|
|
52
|
+
|
|
53
|
+
# Global default
|
|
54
|
+
Moxml::Config.default_adapter = :nokogiri
|
|
55
|
+
----
|
|
56
|
+
|
|
57
|
+
=== Features
|
|
58
|
+
|
|
59
|
+
==== Full XPath 1.0 support
|
|
60
|
+
|
|
61
|
+
Nokogiri provides complete XPath 1.0 implementation:
|
|
62
|
+
|
|
63
|
+
[source,ruby]
|
|
64
|
+
----
|
|
65
|
+
doc = Moxml.new.parse(xml)
|
|
66
|
+
|
|
67
|
+
# All XPath features work
|
|
68
|
+
books = doc.xpath('//book[@price < 30]')
|
|
69
|
+
count = doc.xpath('count(//book)')
|
|
70
|
+
titles = doc.xpath('//book[position() < 3]/title')
|
|
71
|
+
|
|
72
|
+
# Namespace-aware queries
|
|
73
|
+
doc.xpath('//ns:element', 'ns' => 'http://example.org')
|
|
74
|
+
|
|
75
|
+
# Complex predicates
|
|
76
|
+
doc.xpath('//book[@id and @isbn and price < 50]')
|
|
77
|
+
|
|
78
|
+
# All axes supported
|
|
79
|
+
doc.xpath('//chapter/following-sibling::*')
|
|
80
|
+
----
|
|
81
|
+
|
|
82
|
+
==== Complete namespace support
|
|
83
|
+
|
|
84
|
+
Full namespace handling including default namespaces and inheritance:
|
|
85
|
+
|
|
86
|
+
[source,ruby]
|
|
87
|
+
----
|
|
88
|
+
# Create namespaced elements
|
|
89
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
|
90
|
+
|
|
91
|
+
# Query with namespaces
|
|
92
|
+
results = doc.xpath('//dc:creator',
|
|
93
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
|
94
|
+
|
|
95
|
+
# Namespace inheritance works correctly
|
|
96
|
+
# Child elements inherit parent namespaces
|
|
97
|
+
----
|
|
98
|
+
|
|
99
|
+
==== All node types supported
|
|
100
|
+
|
|
101
|
+
Complete support for all XML node types:
|
|
102
|
+
|
|
103
|
+
* Elements with attributes
|
|
104
|
+
* Text nodes
|
|
105
|
+
* CDATA sections
|
|
106
|
+
* Comments
|
|
107
|
+
* Processing instructions
|
|
108
|
+
* DOCTYPE declarations
|
|
109
|
+
* XML declarations
|
|
110
|
+
|
|
111
|
+
==== High performance
|
|
112
|
+
|
|
113
|
+
* **Parsing speed**: Fast (C library)
|
|
114
|
+
* **Serialization speed**: Fast
|
|
115
|
+
* **Memory usage**: Good
|
|
116
|
+
* **XPath performance**: Excellent (native libxml2)
|
|
117
|
+
|
|
118
|
+
=== Limitations
|
|
119
|
+
|
|
120
|
+
Nokogiri has minimal limitations:
|
|
121
|
+
|
|
122
|
+
* Requires C extensions (not pure Ruby)
|
|
123
|
+
* Platform-specific compilation may be needed
|
|
124
|
+
* Slightly larger memory footprint than some alternatives
|
|
125
|
+
|
|
126
|
+
=== Performance characteristics
|
|
127
|
+
|
|
128
|
+
Based on benchmarks:
|
|
129
|
+
|
|
130
|
+
[cols="2,2,3"]
|
|
131
|
+
|===
|
|
132
|
+
| Operation | Performance | Notes
|
|
133
|
+
|
|
134
|
+
| Parse medium XML
|
|
135
|
+
| ~76 ips
|
|
136
|
+
| Fast C library parsing
|
|
137
|
+
|
|
138
|
+
| Serialize medium XML
|
|
139
|
+
| ~13,900 ips
|
|
140
|
+
| Very fast serialization
|
|
141
|
+
|
|
142
|
+
| XPath queries
|
|
143
|
+
| ~64,958 ips
|
|
144
|
+
| Native libxml2 XPath engine
|
|
145
|
+
|
|
146
|
+
| Memory usage
|
|
147
|
+
| -0.1 MB delta
|
|
148
|
+
| Excellent memory efficiency
|
|
149
|
+
|===
|
|
150
|
+
|
|
151
|
+
=== Best use cases
|
|
152
|
+
|
|
153
|
+
**Choose Nokogiri when:**
|
|
154
|
+
|
|
155
|
+
* You need industry-standard XML processing
|
|
156
|
+
* Large community support is important
|
|
157
|
+
* Full XPath 1.0 compliance is required
|
|
158
|
+
* Performance is important but not the absolute priority
|
|
159
|
+
* Cross-platform deployment is needed
|
|
160
|
+
* C extensions are acceptable
|
|
161
|
+
|
|
162
|
+
**Avoid Nokogiri when:**
|
|
163
|
+
|
|
164
|
+
* Pure Ruby is required (use link:oga[Oga])
|
|
165
|
+
* Absolutely maximum speed is critical (use link:ox[Ox])
|
|
166
|
+
* C extension compilation is problematic (use link:oga[Oga] or
|
|
167
|
+
link:rexml[REXML])
|
|
168
|
+
|
|
169
|
+
=== Example usage
|
|
170
|
+
|
|
171
|
+
==== Basic document processing
|
|
172
|
+
|
|
173
|
+
[source,ruby]
|
|
174
|
+
----
|
|
175
|
+
require 'moxml'
|
|
176
|
+
|
|
177
|
+
# Nokogiri is used by default
|
|
178
|
+
context = Moxml.new
|
|
179
|
+
|
|
180
|
+
xml = <<~XML
|
|
181
|
+
<library>
|
|
182
|
+
<book id="1">
|
|
183
|
+
<title>Ruby Programming</title>
|
|
184
|
+
<price>29.99</price>
|
|
185
|
+
</book>
|
|
186
|
+
</library>
|
|
187
|
+
XML
|
|
188
|
+
|
|
189
|
+
doc = context.parse(xml)
|
|
190
|
+
|
|
191
|
+
# Query efficiently
|
|
192
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
193
|
+
puts book.at_xpath('.//title').text # => "Ruby Programming"
|
|
194
|
+
|
|
195
|
+
# Modify
|
|
196
|
+
book.at_xpath('.//price').text = '24.99'
|
|
197
|
+
|
|
198
|
+
# Serialize
|
|
199
|
+
puts doc.to_xml(indent: 2)
|
|
200
|
+
----
|
|
201
|
+
|
|
202
|
+
==== Complex XPath queries
|
|
203
|
+
|
|
204
|
+
[source,ruby]
|
|
205
|
+
----
|
|
206
|
+
# All XPath 1.0 features work
|
|
207
|
+
doc.xpath('//book[price < 30 and @category="fiction"]')
|
|
208
|
+
doc.xpath('//book[position() mod 2 = 0]')
|
|
209
|
+
doc.xpath('count(//book[author="Smith"])')
|
|
210
|
+
doc.xpath('//chapter[last()]/preceding-sibling::*')
|
|
211
|
+
----
|
|
212
|
+
|
|
213
|
+
==== Namespace handling
|
|
214
|
+
|
|
215
|
+
[source,ruby]
|
|
216
|
+
----
|
|
217
|
+
xml = <<~XML
|
|
218
|
+
<library xmlns="http://example.org/library"
|
|
219
|
+
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
220
|
+
<book>
|
|
221
|
+
<dc:title>Programming</dc:title>
|
|
222
|
+
</book>
|
|
223
|
+
</library>
|
|
224
|
+
XML
|
|
225
|
+
|
|
226
|
+
doc = Moxml.new.parse(xml)
|
|
227
|
+
|
|
228
|
+
# Define namespace mappings
|
|
229
|
+
ns = {
|
|
230
|
+
'lib' => 'http://example.org/library',
|
|
231
|
+
'dc' => 'http://purl.org/dc/elements/1.1/'
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# Query with namespaces
|
|
235
|
+
books = doc.xpath('//lib:book', ns)
|
|
236
|
+
titles = doc.xpath('//dc:title', ns)
|
|
237
|
+
|
|
238
|
+
puts titles.first.text # => "Programming"
|
|
239
|
+
----
|
|
240
|
+
|
|
241
|
+
=== References
|
|
242
|
+
|
|
243
|
+
* link:https://nokogiri.org/[Nokogiri homepage]
|
|
244
|
+
* link:https://nokogiri.org/tutorials/[Nokogiri tutorials]
|
|
245
|
+
* link:https://github.com/sparklemotion/nokogiri[Nokogiri on GitHub]
|
|
246
|
+
|
|
247
|
+
=== See also
|
|
248
|
+
|
|
249
|
+
* link:../compatibility[Compatibility matrix] - Feature comparison
|
|
250
|
+
* link:libxml[LibXML adapter] - Similar performance alternative
|
|
251
|
+
* link:../../guides/adapter-switching[Adapter switching guide]
|