moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Oga
|
|
3
|
+
parent: Adapters
|
|
4
|
+
nav_order: 3
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== Oga adapter
|
|
8
|
+
|
|
9
|
+
=== Purpose
|
|
10
|
+
|
|
11
|
+
The Oga adapter provides pure Ruby XML processing with full XPath 1.0
|
|
12
|
+
support, making it ideal for environments where C extensions are not allowed
|
|
13
|
+
or desired.
|
|
14
|
+
|
|
15
|
+
=== Overview
|
|
16
|
+
|
|
17
|
+
link:https://github.com/yorickpeterse/oga[Oga] is a pure Ruby XML parser that
|
|
18
|
+
requires no C extensions. It's perfect for JRuby, TruffleRuby, Opal, or any
|
|
19
|
+
environment where native extensions are problematic.
|
|
20
|
+
|
|
21
|
+
=== Installation
|
|
22
|
+
|
|
23
|
+
Add to your Gemfile:
|
|
24
|
+
|
|
25
|
+
[source,ruby]
|
|
26
|
+
----
|
|
27
|
+
gem 'moxml'
|
|
28
|
+
gem 'oga'
|
|
29
|
+
----
|
|
30
|
+
|
|
31
|
+
Install:
|
|
32
|
+
|
|
33
|
+
[source,shell]
|
|
34
|
+
----
|
|
35
|
+
bundle install
|
|
36
|
+
----
|
|
37
|
+
|
|
38
|
+
=== Configuration
|
|
39
|
+
|
|
40
|
+
[source,ruby]
|
|
41
|
+
----
|
|
42
|
+
# Explicit selection
|
|
43
|
+
context = Moxml.new
|
|
44
|
+
context.config.adapter = :oga
|
|
45
|
+
|
|
46
|
+
# Global default
|
|
47
|
+
Moxml::Config.default_adapter = :oga
|
|
48
|
+
----
|
|
49
|
+
|
|
50
|
+
=== Features
|
|
51
|
+
|
|
52
|
+
==== Full XPath 1.0 support
|
|
53
|
+
|
|
54
|
+
Pure Ruby implementation of XPath 1.0:
|
|
55
|
+
|
|
56
|
+
[source,ruby]
|
|
57
|
+
----
|
|
58
|
+
doc = Moxml.new.parse(xml)
|
|
59
|
+
|
|
60
|
+
# All XPath features work
|
|
61
|
+
books = doc.xpath('//book[@price < 30]')
|
|
62
|
+
count = doc.xpath('count(//book)')
|
|
63
|
+
titles = doc.xpath('//book[position() < 3]/title')
|
|
64
|
+
|
|
65
|
+
# Namespace-aware queries
|
|
66
|
+
doc.xpath('//ns:element', 'ns' => 'http://example.org')
|
|
67
|
+
|
|
68
|
+
# Functions and predicates
|
|
69
|
+
doc.xpath('//book[contains(title, "Ruby")]')
|
|
70
|
+
----
|
|
71
|
+
|
|
72
|
+
==== Complete namespace support
|
|
73
|
+
|
|
74
|
+
Full namespace handling in pure Ruby:
|
|
75
|
+
|
|
76
|
+
[source,ruby]
|
|
77
|
+
----
|
|
78
|
+
# Create namespaced elements
|
|
79
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
|
80
|
+
|
|
81
|
+
# Query with namespaces
|
|
82
|
+
results = doc.xpath('//dc:creator',
|
|
83
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
|
84
|
+
----
|
|
85
|
+
|
|
86
|
+
==== All node types supported
|
|
87
|
+
|
|
88
|
+
Complete support for all XML node types:
|
|
89
|
+
|
|
90
|
+
* Elements with attributes
|
|
91
|
+
* Text nodes
|
|
92
|
+
* CDATA sections
|
|
93
|
+
* Comments
|
|
94
|
+
* Processing instructions
|
|
95
|
+
* DOCTYPE declarations
|
|
96
|
+
* XML declarations
|
|
97
|
+
|
|
98
|
+
==== Pure Ruby benefits
|
|
99
|
+
|
|
100
|
+
* No C extension compilation required
|
|
101
|
+
* Works on JRuby and TruffleRuby
|
|
102
|
+
* Compatible with Opal for browser environments
|
|
103
|
+
* Easier debugging and introspection
|
|
104
|
+
* No platform-specific build issues
|
|
105
|
+
|
|
106
|
+
=== Limitations
|
|
107
|
+
|
|
108
|
+
Minor limitations compared to native implementations:
|
|
109
|
+
|
|
110
|
+
* Slower parsing than C-based libraries
|
|
111
|
+
* Higher memory usage than Ox
|
|
112
|
+
* Some advanced XPath 1.0 functions may have edge case differences
|
|
113
|
+
|
|
114
|
+
NOTE: Oga's pure Ruby implementation makes it slower than C-based libraries,
|
|
115
|
+
but it provides excellent functionality and is faster than REXML.
|
|
116
|
+
|
|
117
|
+
=== Performance characteristics
|
|
118
|
+
|
|
119
|
+
Based on benchmarks:
|
|
120
|
+
|
|
121
|
+
[cols="2,2,3"]
|
|
122
|
+
|===
|
|
123
|
+
| Operation | Performance | Notes
|
|
124
|
+
|
|
125
|
+
| Parse medium XML
|
|
126
|
+
| ~30-50 ips
|
|
127
|
+
| Pure Ruby parsing
|
|
128
|
+
|
|
129
|
+
| Serialize medium XML
|
|
130
|
+
| ~2,000+ ips
|
|
131
|
+
| Efficient serialization
|
|
132
|
+
|
|
133
|
+
| XPath queries
|
|
134
|
+
| ~20,000+ ips
|
|
135
|
+
| Pure Ruby XPath engine
|
|
136
|
+
|
|
137
|
+
| Memory usage
|
|
138
|
+
| Medium
|
|
139
|
+
| More than C libraries, less than REXML
|
|
140
|
+
|===
|
|
141
|
+
|
|
142
|
+
=== Best use cases
|
|
143
|
+
|
|
144
|
+
**Choose Oga when:**
|
|
145
|
+
|
|
146
|
+
* Pure Ruby environment is required (JRuby, TruffleRuby)
|
|
147
|
+
* C extensions cannot be compiled or used
|
|
148
|
+
* Browser deployment via Opal is needed
|
|
149
|
+
* Full XPath 1.0 support is required without C dependencies
|
|
150
|
+
* Easier debugging of XML processing is desired
|
|
151
|
+
|
|
152
|
+
**Avoid Oga when:**
|
|
153
|
+
|
|
154
|
+
* Maximum parsing performance is critical (use link:ox[Ox] or
|
|
155
|
+
link:nokogiri[Nokogiri])
|
|
156
|
+
* Memory usage must be minimized (use link:ox[Ox])
|
|
157
|
+
* Standard library only is required (use link:rexml[REXML])
|
|
158
|
+
|
|
159
|
+
=== Example usage
|
|
160
|
+
|
|
161
|
+
==== Basic document processing
|
|
162
|
+
|
|
163
|
+
[source,ruby]
|
|
164
|
+
----
|
|
165
|
+
require 'moxml'
|
|
166
|
+
|
|
167
|
+
# Configure Oga adapter
|
|
168
|
+
context = Moxml.new
|
|
169
|
+
context.config.adapter = :oga
|
|
170
|
+
|
|
171
|
+
xml = <<~XML
|
|
172
|
+
<library>
|
|
173
|
+
<book id="1">
|
|
174
|
+
<title>Ruby Programming</title>
|
|
175
|
+
<price>29.99</price>
|
|
176
|
+
</book>
|
|
177
|
+
</library>
|
|
178
|
+
XML
|
|
179
|
+
|
|
180
|
+
doc = context.parse(xml)
|
|
181
|
+
|
|
182
|
+
# Query with XPath
|
|
183
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
184
|
+
puts book.at_xpath('.//title').text # => "Ruby Programming"
|
|
185
|
+
|
|
186
|
+
# Modify
|
|
187
|
+
book.at_xpath('.//price').text = '24.99'
|
|
188
|
+
|
|
189
|
+
# Serialize
|
|
190
|
+
puts doc.to_xml(indent: 2)
|
|
191
|
+
----
|
|
192
|
+
|
|
193
|
+
==== XPath queries
|
|
194
|
+
|
|
195
|
+
[source,ruby]
|
|
196
|
+
----
|
|
197
|
+
# Complex XPath expressions work
|
|
198
|
+
cheap_books = doc.xpath('//book[price < 25]')
|
|
199
|
+
fiction = doc.xpath('//book[@category="fiction"]')
|
|
200
|
+
last_book = doc.at_xpath('//book[last()]')
|
|
201
|
+
|
|
202
|
+
# Functions
|
|
203
|
+
book_count = doc.xpath('count(//book)')
|
|
204
|
+
has_isbn = doc.xpath('//book[string-length(@isbn) = 13]')
|
|
205
|
+
----
|
|
206
|
+
|
|
207
|
+
==== Namespace handling
|
|
208
|
+
|
|
209
|
+
[source,ruby]
|
|
210
|
+
----
|
|
211
|
+
xml = <<~XML
|
|
212
|
+
<library xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
213
|
+
<book>
|
|
214
|
+
<dc:title>Programming</dc:title>
|
|
215
|
+
<dc:creator>Smith</dc:creator>
|
|
216
|
+
</book>
|
|
217
|
+
</library>
|
|
218
|
+
XML
|
|
219
|
+
|
|
220
|
+
doc = Moxml.new.parse(xml)
|
|
221
|
+
|
|
222
|
+
# Query with namespaces
|
|
223
|
+
ns = { 'dc' => 'http://purl.org/dc/elements/1.1/' }
|
|
224
|
+
titles = doc.xpath('//dc:title', ns)
|
|
225
|
+
|
|
226
|
+
puts titles.first.text # => "Programming"
|
|
227
|
+
----
|
|
228
|
+
|
|
229
|
+
=== JRuby and TruffleRuby support
|
|
230
|
+
|
|
231
|
+
Oga is particularly valuable for alternative Ruby implementations:
|
|
232
|
+
|
|
233
|
+
[source,ruby]
|
|
234
|
+
----
|
|
235
|
+
# Works identically on JRuby
|
|
236
|
+
# jruby -S gem install moxml oga
|
|
237
|
+
# jruby script.rb
|
|
238
|
+
|
|
239
|
+
# Works on TruffleRuby
|
|
240
|
+
# Same code, no modifications needed
|
|
241
|
+
|
|
242
|
+
require 'moxml'
|
|
243
|
+
context = Moxml.new
|
|
244
|
+
context.config.adapter = :oga
|
|
245
|
+
|
|
246
|
+
# Full functionality on all Ruby implementations
|
|
247
|
+
doc = context.parse(xml)
|
|
248
|
+
results = doc.xpath('//book[@id="1"]')
|
|
249
|
+
----
|
|
250
|
+
|
|
251
|
+
=== Comparison with other pure Ruby options
|
|
252
|
+
|
|
253
|
+
[cols="2,2,2"]
|
|
254
|
+
|===
|
|
255
|
+
| Aspect | Oga | REXML
|
|
256
|
+
|
|
257
|
+
| XPath support
|
|
258
|
+
| Full XPath 1.0
|
|
259
|
+
| Limited
|
|
260
|
+
|
|
261
|
+
| Performance
|
|
262
|
+
| Fast for pure Ruby
|
|
263
|
+
| Medium
|
|
264
|
+
|
|
265
|
+
| Memory usage
|
|
266
|
+
| Medium
|
|
267
|
+
| Medium
|
|
268
|
+
|
|
269
|
+
| Namespace XPath
|
|
270
|
+
| Full support
|
|
271
|
+
| Not supported
|
|
272
|
+
|
|
273
|
+
| Standard library
|
|
274
|
+
| No (external gem)
|
|
275
|
+
| Yes (built-in)
|
|
276
|
+
|
|
277
|
+
| Maintenance
|
|
278
|
+
| Active
|
|
279
|
+
| Active
|
|
280
|
+
|===
|
|
281
|
+
|
|
282
|
+
=== References
|
|
283
|
+
|
|
284
|
+
* link:https://github.com/yorickpeterse/oga[Oga on GitHub]
|
|
285
|
+
* link:https://github.com/yorickpeterse/oga/tree/master/doc[Oga documentation]
|
|
286
|
+
|
|
287
|
+
=== See also
|
|
288
|
+
|
|
289
|
+
* link:../compatibility[Compatibility matrix] - Feature comparison
|
|
290
|
+
* link:rexml[REXML adapter] - Alternative pure Ruby option
|
|
291
|
+
* link:../../guides/adapter-switching[Adapter switching guide]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Ox
|
|
3
|
+
parent: Adapters
|
|
4
|
+
nav_order: 5
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== Ox adapter
|
|
8
|
+
|
|
9
|
+
=== Overview
|
|
10
|
+
|
|
11
|
+
Ox is the fastest XML parser available for Ruby, providing excellent performance for simple to moderately complex XML documents.
|
|
12
|
+
|
|
13
|
+
**Best for:**
|
|
14
|
+
|
|
15
|
+
* Maximum parsing speed
|
|
16
|
+
* Simple document structures
|
|
17
|
+
* Memory-constrained environments
|
|
18
|
+
* When XPath usage is minimal
|
|
19
|
+
|
|
20
|
+
== Specific adapter limitations
|
|
21
|
+
|
|
22
|
+
=== Ox adapter
|
|
23
|
+
|
|
24
|
+
==== XPath limitations
|
|
25
|
+
|
|
26
|
+
The Ox adapter uses a custom "XPath-to-locate" translation engine.
|
|
27
|
+
|
|
28
|
+
The following XPath features are NOT supported:
|
|
29
|
+
|
|
30
|
+
* Attribute value predicates: `//book[@id='123']` ❌
|
|
31
|
+
* Logical operators: `//book[@id and @title]` ❌
|
|
32
|
+
* Position predicates: `//book[1]`, `//book[last()]` ❌
|
|
33
|
+
* Text predicates: `//book[text()='Title']` ❌
|
|
34
|
+
* Namespace queries: `//ns:element` ❌
|
|
35
|
+
* Parent axis: `//child/..` ❌
|
|
36
|
+
* Sibling axes: `following-sibling::*` ❌
|
|
37
|
+
* XPath functions: `count()`, `concat()`, etc. ❌
|
|
38
|
+
|
|
39
|
+
*Workaround:* Use Ruby enumerable methods after basic queries:
|
|
40
|
+
|
|
41
|
+
[source,ruby]
|
|
42
|
+
----
|
|
43
|
+
# Instead of: doc.xpath("//book[@id='123']")
|
|
44
|
+
# Use:
|
|
45
|
+
doc.xpath("//book").find { |book| book["id"] == "123" }
|
|
46
|
+
----
|
|
47
|
+
|
|
48
|
+
IMPORTANT: For complete XPath 1.0 specification with zero limitations today, use
|
|
49
|
+
Nokogiri or Oga adapters.
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
See also:
|
|
54
|
+
|
|
55
|
+
* link:headed-ox[HeadedOx adapter] - Ox with full XPath support
|
|
56
|
+
* link:../compatibility[Adapter compatibility matrix]
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: REXML
|
|
3
|
+
parent: Adapters
|
|
4
|
+
nav_order: 4
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
== REXML adapter
|
|
8
|
+
|
|
9
|
+
=== Purpose
|
|
10
|
+
|
|
11
|
+
The REXML adapter provides XML processing through Ruby's standard library,
|
|
12
|
+
offering maximum portability without requiring any external gems.
|
|
13
|
+
|
|
14
|
+
=== Overview
|
|
15
|
+
|
|
16
|
+
link:https://github.com/ruby/rexml[REXML] is Ruby's built-in XML parser,
|
|
17
|
+
distributed as part of the standard library. It requires no external
|
|
18
|
+
dependencies and works on all Ruby installations.
|
|
19
|
+
|
|
20
|
+
=== Installation
|
|
21
|
+
|
|
22
|
+
REXML is included with Ruby, so only Moxml needs to be installed:
|
|
23
|
+
|
|
24
|
+
[source,ruby]
|
|
25
|
+
----
|
|
26
|
+
gem 'moxml'
|
|
27
|
+
# No additional gems needed - REXML is in Ruby stdlib
|
|
28
|
+
----
|
|
29
|
+
|
|
30
|
+
Install:
|
|
31
|
+
|
|
32
|
+
[source,shell]
|
|
33
|
+
----
|
|
34
|
+
bundle install
|
|
35
|
+
----
|
|
36
|
+
|
|
37
|
+
=== Configuration
|
|
38
|
+
|
|
39
|
+
[source,ruby]
|
|
40
|
+
----
|
|
41
|
+
# Explicit selection
|
|
42
|
+
context = Moxml.new
|
|
43
|
+
context.config.adapter = :rexml
|
|
44
|
+
|
|
45
|
+
# Global default
|
|
46
|
+
Moxml::Config.default_adapter = :rexml
|
|
47
|
+
----
|
|
48
|
+
|
|
49
|
+
=== Features
|
|
50
|
+
|
|
51
|
+
==== Basic XPath support
|
|
52
|
+
|
|
53
|
+
REXML provides basic XPath querying:
|
|
54
|
+
|
|
55
|
+
[source,ruby]
|
|
56
|
+
----
|
|
57
|
+
doc = Moxml.new.parse(xml)
|
|
58
|
+
|
|
59
|
+
# Basic paths work
|
|
60
|
+
books = doc.xpath('//book')
|
|
61
|
+
first_book = doc.xpath('/library/book[1]')
|
|
62
|
+
|
|
63
|
+
# Attribute predicates work
|
|
64
|
+
has_id = doc.xpath('//book[@id]')
|
|
65
|
+
specific_book = doc.xpath('//book[@id="1"]')
|
|
66
|
+
|
|
67
|
+
# Position predicates work
|
|
68
|
+
first_three = doc.xpath('//book[position() < 4]')
|
|
69
|
+
----
|
|
70
|
+
|
|
71
|
+
==== Limited namespace support
|
|
72
|
+
|
|
73
|
+
REXML can parse and preserve namespaces but cannot use them in XPath queries:
|
|
74
|
+
|
|
75
|
+
[source,ruby]
|
|
76
|
+
----
|
|
77
|
+
xml = '<library xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:title>Book</dc:title></library>'
|
|
78
|
+
|
|
79
|
+
doc = Moxml.new.parse(xml)
|
|
80
|
+
|
|
81
|
+
# Namespace metadata is preserved
|
|
82
|
+
element = doc.root.children.first
|
|
83
|
+
puts element.namespace # Works - returns namespace URI
|
|
84
|
+
|
|
85
|
+
# But namespace-aware XPath does NOT work
|
|
86
|
+
doc.xpath('//dc:title', 'dc' => 'http://purl.org/dc/elements/1.1/')
|
|
87
|
+
# => Returns empty, cannot resolve namespace prefix
|
|
88
|
+
|
|
89
|
+
# Workaround: use element name without namespace
|
|
90
|
+
doc.xpath('//title') # Works - finds the element
|
|
91
|
+
----
|
|
92
|
+
|
|
93
|
+
==== All basic node types
|
|
94
|
+
|
|
95
|
+
Support for standard XML node types:
|
|
96
|
+
|
|
97
|
+
* Elements with attributes
|
|
98
|
+
* Text nodes
|
|
99
|
+
* CDATA sections
|
|
100
|
+
* Comments
|
|
101
|
+
* Processing instructions
|
|
102
|
+
* DOCTYPE declarations
|
|
103
|
+
* XML declarations
|
|
104
|
+
|
|
105
|
+
==== Standard library advantage
|
|
106
|
+
|
|
107
|
+
* Always available - no gem dependencies
|
|
108
|
+
* Maximum portability across Ruby versions
|
|
109
|
+
* Simple deployment - no compilation needed
|
|
110
|
+
* Guaranteed compatibility
|
|
111
|
+
|
|
112
|
+
=== Limitations
|
|
113
|
+
|
|
114
|
+
REXML has several limitations compared to other adapters:
|
|
115
|
+
|
|
116
|
+
**XPath limitations:**
|
|
117
|
+
|
|
118
|
+
* No namespace-aware XPath queries (see example above)
|
|
119
|
+
* Limited function support
|
|
120
|
+
* Some axes not supported
|
|
121
|
+
* Complex predicates may not work
|
|
122
|
+
|
|
123
|
+
**Performance:**
|
|
124
|
+
|
|
125
|
+
* Slower parsing than C-based libraries
|
|
126
|
+
* Medium serialization speed
|
|
127
|
+
* Higher memory usage than Ox
|
|
128
|
+
|
|
129
|
+
**Namespace XPath workaround:**
|
|
130
|
+
|
|
131
|
+
[source,ruby]
|
|
132
|
+
----
|
|
133
|
+
# Instead of:
|
|
134
|
+
doc.xpath('//ns:element', 'ns' => 'http://example.org')
|
|
135
|
+
|
|
136
|
+
# Use element name matching:
|
|
137
|
+
doc.xpath('//element')
|
|
138
|
+
|
|
139
|
+
# Then filter in Ruby:
|
|
140
|
+
elements.select { |e| e.namespace == 'http://example.org' }
|
|
141
|
+
----
|
|
142
|
+
|
|
143
|
+
=== Performance characteristics
|
|
144
|
+
|
|
145
|
+
Based on benchmarks:
|
|
146
|
+
|
|
147
|
+
[cols="2,2,3"]
|
|
148
|
+
|===
|
|
149
|
+
| Operation | Performance | Notes
|
|
150
|
+
|
|
151
|
+
| Parse medium XML
|
|
152
|
+
| ~10-20 ips
|
|
153
|
+
| Pure Ruby parsing
|
|
154
|
+
|
|
155
|
+
| Serialize medium XML
|
|
156
|
+
| ~500-1,000 ips
|
|
157
|
+
| Medium speed
|
|
158
|
+
|
|
159
|
+
| XPath queries
|
|
160
|
+
| ~5,000-10,000 ips
|
|
161
|
+
| Limited XPath
|
|
162
|
+
|
|
163
|
+
| Memory usage
|
|
164
|
+
| Medium
|
|
165
|
+
| Pure Ruby overhead
|
|
166
|
+
|===
|
|
167
|
+
|
|
168
|
+
=== Best use cases
|
|
169
|
+
|
|
170
|
+
**Choose REXML when:**
|
|
171
|
+
|
|
172
|
+
* No external gems can be used (standard library only)
|
|
173
|
+
* Maximum portability is required
|
|
174
|
+
* Small to medium documents
|
|
175
|
+
* Deployment simplicity is critical
|
|
176
|
+
* C extensions cannot be compiled
|
|
177
|
+
* Basic XPath without namespaces is sufficient
|
|
178
|
+
|
|
179
|
+
**Avoid REXML when:**
|
|
180
|
+
|
|
181
|
+
* Namespace-aware XPath is required (use link:oga[Oga],
|
|
182
|
+
link:nokogiri[Nokogiri], or link:libxml[LibXML])
|
|
183
|
+
* High performance is needed (use link:ox[Ox] or link:nokogiri[Nokogiri])
|
|
184
|
+
* Complex XPath expressions are needed (use link:nokogiri[Nokogiri])
|
|
185
|
+
|
|
186
|
+
=== Example usage
|
|
187
|
+
|
|
188
|
+
==== Basic operations
|
|
189
|
+
|
|
190
|
+
[source,ruby]
|
|
191
|
+
----
|
|
192
|
+
require 'moxml'
|
|
193
|
+
|
|
194
|
+
# Configure REXML adapter
|
|
195
|
+
context = Moxml.new
|
|
196
|
+
context.config.adapter = :rexml
|
|
197
|
+
|
|
198
|
+
xml = '<library><book id="1">Ruby Programming</book></library>'
|
|
199
|
+
doc = context.parse(xml)
|
|
200
|
+
|
|
201
|
+
# Basic XPath works
|
|
202
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
203
|
+
puts book.text # => "Ruby Programming"
|
|
204
|
+
|
|
205
|
+
# Modify
|
|
206
|
+
book.text = 'Advanced Ruby'
|
|
207
|
+
book['edition'] = '2nd'
|
|
208
|
+
|
|
209
|
+
puts doc.to_xml(indent: 2)
|
|
210
|
+
----
|
|
211
|
+
|
|
212
|
+
==== XPath queries
|
|
213
|
+
|
|
214
|
+
[source,ruby]
|
|
215
|
+
----
|
|
216
|
+
# Supported patterns
|
|
217
|
+
books = doc.xpath('//book')
|
|
218
|
+
with_id = doc.xpath('//book[@id]')
|
|
219
|
+
specific = doc.xpath('//book[@id="1"]')
|
|
220
|
+
first_two = doc.xpath('//book[position() <= 2]')
|
|
221
|
+
|
|
222
|
+
# NOT supported - avoid these
|
|
223
|
+
# doc.xpath('//ns:book', namespaces) # ❌ No namespace XPath
|
|
224
|
+
# doc.xpath('count(//book)') # ⚠️ Limited functions
|
|
225
|
+
# doc.xpath('//book[price < 30]') # ⚠️ May not work
|
|
226
|
+
----
|
|
227
|
+
|
|
228
|
+
==== Namespace workarounds
|
|
229
|
+
|
|
230
|
+
[source,ruby]
|
|
231
|
+
----
|
|
232
|
+
xml = <<~XML
|
|
233
|
+
<library xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
234
|
+
<dc:title>Programming</dc:title>
|
|
235
|
+
<dc:creator>Smith</dc:creator>
|
|
236
|
+
</library>
|
|
237
|
+
XML
|
|
238
|
+
|
|
239
|
+
doc = Moxml.new.parse(xml)
|
|
240
|
+
|
|
241
|
+
# Find elements by name (without namespace prefix)
|
|
242
|
+
titles = doc.xpath('//title')
|
|
243
|
+
|
|
244
|
+
# Check namespace programmatically
|
|
245
|
+
titles.each do |title|
|
|
246
|
+
if title.namespace == 'http://purl.org/dc/elements/1.1/'
|
|
247
|
+
puts "DC title: #{title.text}"
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
----
|
|
251
|
+
|
|
252
|
+
=== Comparison with other pure Ruby option
|
|
253
|
+
|
|
254
|
+
[cols="2,2,2"]
|
|
255
|
+
|===
|
|
256
|
+
| Aspect | REXML | Oga
|
|
257
|
+
|
|
258
|
+
| XPath support
|
|
259
|
+
| Limited
|
|
260
|
+
| Full XPath 1.0
|
|
261
|
+
|
|
262
|
+
| Performance
|
|
263
|
+
| Medium
|
|
264
|
+
| Fast
|
|
265
|
+
|
|
266
|
+
| Memory usage
|
|
267
|
+
| Medium
|
|
268
|
+
| Medium
|
|
269
|
+
|
|
270
|
+
| Namespace XPath
|
|
271
|
+
| Not supported
|
|
272
|
+
| Full support
|
|
273
|
+
|
|
274
|
+
| Standard library
|
|
275
|
+
| Yes
|
|
276
|
+
| No (external gem)
|
|
277
|
+
|
|
278
|
+
| Dependencies
|
|
279
|
+
| None
|
|
280
|
+
| None (pure Ruby)
|
|
281
|
+
|===
|
|
282
|
+
|
|
283
|
+
=== References
|
|
284
|
+
|
|
285
|
+
* link:https://github.com/ruby/rexml[REXML on GitHub]
|
|
286
|
+
* link:https://ruby-doc.org/stdlib/libdoc/rexml/rdoc/REXML.html[REXML documentation]
|
|
287
|
+
|
|
288
|
+
=== See also
|
|
289
|
+
|
|
290
|
+
* link:../compatibility[Compatibility matrix] - Feature comparison
|
|
291
|
+
* link:oga[Oga adapter] - Alternative pure Ruby with full XPath
|
|
292
|
+
* link:../../guides/xpath-queries[XPath queries guide]
|