rxerces 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +19 -0
- data/README.md +14 -3
- data/benchmarks/README.md +68 -0
- data/benchmarks/css_benchmark.rb +115 -0
- data/benchmarks/parse_benchmark.rb +103 -0
- data/benchmarks/run_all.rb +25 -0
- data/benchmarks/serialization_benchmark.rb +93 -0
- data/benchmarks/traversal_benchmark.rb +149 -0
- data/benchmarks/xpath_benchmark.rb +100 -0
- data/ext/rxerces/rxerces.cpp +977 -50
- data/lib/rxerces/nokogiri.rb +26 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +117 -0
- data/spec/node_spec.rb +408 -4
- data/spec/nodeset_spec.rb +59 -0
- data/spec/nokogiri_compatibility_spec.rb +44 -0
- data/spec/rxerces_shared.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +8 -1
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3a1129744191c92a7a15cb783fc74cbccfd2dbeedfbcf5741e295686c38b8108
|
|
4
|
+
data.tar.gz: 0b03758412cff12acccf3232f0f135ec69e0eaabcb6aa015091a44ed75e53081
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e2ba8796e66c0163d1f3bd9ae91d2fde804b462b39fd750e8f600b073e1ad02e36a2978cd533ea7256a9ad7b7d906c4462927bc609b1f59ac7c33f39b5423b83
|
|
7
|
+
data.tar.gz: 9b7b522fc2c5f203a2de65ba2d112ef644cf84fb18128503432ab37240f4b31cdc7acde58c83d00e5b17e006433e595aa19f46f2a5a11345c1778c55a76c1820
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data/CHANGES.md
CHANGED
|
@@ -1,3 +1,22 @@
|
|
|
1
|
+
## 0.6.0 - 17-Dec-2025
|
|
2
|
+
* Some internal refactoring, better initialization, some better string
|
|
3
|
+
handling, that sort of stuff.
|
|
4
|
+
* Added the Document#errors method with more detailed information. Also
|
|
5
|
+
helps with Nokogiri compatibility.
|
|
6
|
+
* Added some benchmarks (they're not great compared to others, oh well).
|
|
7
|
+
|
|
8
|
+
## 0.5.0 - 16-Dec-2025
|
|
9
|
+
* Implemented a real css method. Requires Xalan to be installed.
|
|
10
|
+
* Added text/content methods for most classes.
|
|
11
|
+
* Added a nicer inspect method for most classes.
|
|
12
|
+
* Added an HTML alias for XML, mainly for compatibility with nokogiri,
|
|
13
|
+
but keep in mind that this library parses HTML as XML.
|
|
14
|
+
* Added the Node#ancestors method.
|
|
15
|
+
* Added the Node#has_attribute method.
|
|
16
|
+
* Added first, last, empty? and inner_html methods for Node.
|
|
17
|
+
* Added elements, next_element and previous_element for Node.
|
|
18
|
+
* Added the Document#at_css method.
|
|
19
|
+
|
|
1
20
|
## 0.4.0 - 15-Dec-2025
|
|
2
21
|
* Now uses Xalan if installed for xpath 1.0 compliance.
|
|
3
22
|
* Added Node#search.
|
data/README.md
CHANGED
|
@@ -102,16 +102,27 @@ RXerces provides optional Nokogiri compatibility. Require `rxerces/nokogiri` to
|
|
|
102
102
|
```ruby
|
|
103
103
|
require 'rxerces/nokogiri'
|
|
104
104
|
|
|
105
|
-
#
|
|
105
|
+
# Parse XML with Nokogiri syntax
|
|
106
106
|
doc = Nokogiri.XML('<root><child>text</child></root>')
|
|
107
107
|
puts doc.root.name # => "root"
|
|
108
108
|
|
|
109
|
-
#
|
|
110
|
-
|
|
109
|
+
# Parse HTML with Nokogiri syntax
|
|
110
|
+
html_doc = Nokogiri.HTML('<html><body><h1>Hello</h1></body></html>')
|
|
111
|
+
puts html_doc.root.name # => "html"
|
|
112
|
+
|
|
113
|
+
# Alternative syntax
|
|
114
|
+
xml_doc = Nokogiri::XML.parse('<root>text</root>')
|
|
115
|
+
html_doc = Nokogiri::HTML.parse('<html>...</html>')
|
|
116
|
+
|
|
117
|
+
# Classes are aliased for both XML and HTML
|
|
118
|
+
Nokogiri::XML::Document == RXerces::XML::Document # => true
|
|
119
|
+
Nokogiri::HTML::Document == RXerces::XML::Document # => true
|
|
111
120
|
```
|
|
112
121
|
|
|
113
122
|
**Note:** If you don't need Nokogiri compatibility, just `require 'rxerces'` and use the `RXerces` module directly.
|
|
114
123
|
|
|
124
|
+
**HTML Parsing Note:** Since RXerces uses Xerces-C (an XML parser), `Nokogiri::HTML` parses HTML as XML. This means it won't perform HTML-specific error correction or tag fixing like Nokogiri does with libxml2's HTML parser. For well-formed HTML/XHTML documents, this works fine.
|
|
125
|
+
|
|
115
126
|
### Working with Nodes
|
|
116
127
|
|
|
117
128
|
```ruby
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# RXerces Performance Benchmarks
|
|
2
|
+
|
|
3
|
+
This directory contains performance benchmarks comparing RXerces with other popular Ruby XML libraries (Nokogiri and Ox).
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
Install the required gems:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
gem install benchmark-ips nokogiri ox
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Running Benchmarks
|
|
14
|
+
|
|
15
|
+
Run all benchmarks:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
ruby benchmarks/parse_benchmark.rb
|
|
19
|
+
ruby benchmarks/xpath_benchmark.rb
|
|
20
|
+
ruby benchmarks/css_benchmark.rb
|
|
21
|
+
ruby benchmarks/traversal_benchmark.rb
|
|
22
|
+
ruby benchmarks/serialization_benchmark.rb
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or run a specific benchmark:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
ruby benchmarks/parse_benchmark.rb
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Benchmark Categories
|
|
32
|
+
|
|
33
|
+
### 1. Parse Benchmark (`parse_benchmark.rb`)
|
|
34
|
+
Tests XML document parsing performance with small, medium, and large documents.
|
|
35
|
+
|
|
36
|
+
### 2. XPath Benchmark (`xpath_benchmark.rb`)
|
|
37
|
+
Tests XPath query performance including:
|
|
38
|
+
- Simple queries (`//book`)
|
|
39
|
+
- Attribute-based queries (`//book[@category='fiction']`)
|
|
40
|
+
- Complex queries with predicates
|
|
41
|
+
- `at_xpath` for first-match queries
|
|
42
|
+
|
|
43
|
+
### 3. CSS Benchmark (`css_benchmark.rb`)
|
|
44
|
+
Tests CSS selector performance including:
|
|
45
|
+
- Simple selectors (`div`)
|
|
46
|
+
- Class selectors (`.title`)
|
|
47
|
+
- ID selectors (`#div100`)
|
|
48
|
+
- Descendant combinators (`div.content p.text`)
|
|
49
|
+
- `at_css` for first-match queries
|
|
50
|
+
|
|
51
|
+
### 4. Traversal Benchmark (`traversal_benchmark.rb`)
|
|
52
|
+
Tests DOM traversal operations:
|
|
53
|
+
- `.children` access
|
|
54
|
+
- `.element_children` access
|
|
55
|
+
- `.parent` access
|
|
56
|
+
- `.ancestors` access
|
|
57
|
+
- `.next_sibling` access
|
|
58
|
+
- `.text` extraction
|
|
59
|
+
|
|
60
|
+
### 5. Serialization Benchmark (`serialization_benchmark.rb`)
|
|
61
|
+
Tests document serialization (`to_s`/`to_xml`) with various document sizes.
|
|
62
|
+
|
|
63
|
+
## Notes
|
|
64
|
+
|
|
65
|
+
- All benchmarks use `benchmark-ips` for accurate iterations-per-second measurements
|
|
66
|
+
- Each benchmark runs with a 2-second warmup and 5-second measurement period
|
|
67
|
+
- Nokogiri and Ox tests are skipped if not installed
|
|
68
|
+
- RXerces requires Xalan-C for full XPath 1.0 support (CSS selectors need XPath)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'benchmark/ips'
|
|
5
|
+
require 'rxerces'
|
|
6
|
+
|
|
7
|
+
begin
|
|
8
|
+
require 'nokogiri'
|
|
9
|
+
NOKOGIRI_AVAILABLE = true
|
|
10
|
+
rescue LoadError
|
|
11
|
+
NOKOGIRI_AVAILABLE = false
|
|
12
|
+
puts "Nokogiri not available - install with: gem install nokogiri"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Sample HTML/XML for CSS selector queries
|
|
16
|
+
HTML_DATA = begin
|
|
17
|
+
divs = (1..200).map do |i|
|
|
18
|
+
"<div class=\"content\" id=\"div#{i}\">
|
|
19
|
+
<h2 class=\"title\">Heading #{i}</h2>
|
|
20
|
+
<p class=\"text\">Paragraph #{i} with <span class=\"highlight\">highlighted</span> text.</p>
|
|
21
|
+
<ul class=\"list\">
|
|
22
|
+
<li>Item 1</li>
|
|
23
|
+
<li>Item 2</li>
|
|
24
|
+
<li class=\"special\">Special Item</li>
|
|
25
|
+
</ul>
|
|
26
|
+
</div>"
|
|
27
|
+
end
|
|
28
|
+
"<html><body>#{divs.join}</body></html>"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
puts "=" * 80
|
|
32
|
+
puts "CSS Selector Benchmarks"
|
|
33
|
+
puts "=" * 80
|
|
34
|
+
puts "Document Size: #{HTML_DATA.bytesize} bytes"
|
|
35
|
+
puts
|
|
36
|
+
|
|
37
|
+
# Parse documents once
|
|
38
|
+
rxerces_doc = RXerces::XML::Document.parse(HTML_DATA)
|
|
39
|
+
nokogiri_doc = Nokogiri::HTML(HTML_DATA) if NOKOGIRI_AVAILABLE
|
|
40
|
+
|
|
41
|
+
# Simple CSS selector
|
|
42
|
+
puts "Simple CSS: div"
|
|
43
|
+
puts "-" * 80
|
|
44
|
+
|
|
45
|
+
Benchmark.ips do |x|
|
|
46
|
+
x.config(time: 5, warmup: 2)
|
|
47
|
+
|
|
48
|
+
x.report("rxerces") { rxerces_doc.css('div') }
|
|
49
|
+
x.report("nokogiri") { nokogiri_doc.css('div') } if NOKOGIRI_AVAILABLE
|
|
50
|
+
|
|
51
|
+
x.compare!
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
puts
|
|
55
|
+
|
|
56
|
+
# Class selector
|
|
57
|
+
puts "Class CSS: .title"
|
|
58
|
+
puts "-" * 80
|
|
59
|
+
|
|
60
|
+
Benchmark.ips do |x|
|
|
61
|
+
x.config(time: 5, warmup: 2)
|
|
62
|
+
|
|
63
|
+
x.report("rxerces") { rxerces_doc.css('.title') }
|
|
64
|
+
x.report("nokogiri") { nokogiri_doc.css('.title') } if NOKOGIRI_AVAILABLE
|
|
65
|
+
|
|
66
|
+
x.compare!
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
puts
|
|
70
|
+
|
|
71
|
+
# ID selector
|
|
72
|
+
puts "ID CSS: #div100"
|
|
73
|
+
puts "-" * 80
|
|
74
|
+
|
|
75
|
+
Benchmark.ips do |x|
|
|
76
|
+
x.config(time: 5, warmup: 2)
|
|
77
|
+
|
|
78
|
+
x.report("rxerces") { rxerces_doc.css('#div100') }
|
|
79
|
+
x.report("nokogiri") { nokogiri_doc.css('#div100') } if NOKOGIRI_AVAILABLE
|
|
80
|
+
|
|
81
|
+
x.compare!
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
puts
|
|
85
|
+
|
|
86
|
+
# Descendant combinator
|
|
87
|
+
puts "Descendant CSS: div.content p.text"
|
|
88
|
+
puts "-" * 80
|
|
89
|
+
|
|
90
|
+
Benchmark.ips do |x|
|
|
91
|
+
x.config(time: 5, warmup: 2)
|
|
92
|
+
|
|
93
|
+
x.report("rxerces") { rxerces_doc.css('div.content p.text') }
|
|
94
|
+
x.report("nokogiri") { nokogiri_doc.css('div.content p.text') } if NOKOGIRI_AVAILABLE
|
|
95
|
+
|
|
96
|
+
x.compare!
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
puts
|
|
100
|
+
|
|
101
|
+
# at_css (first match)
|
|
102
|
+
puts "at_css: .title (first match only)"
|
|
103
|
+
puts "-" * 80
|
|
104
|
+
|
|
105
|
+
Benchmark.ips do |x|
|
|
106
|
+
x.config(time: 5, warmup: 2)
|
|
107
|
+
|
|
108
|
+
x.report("rxerces") { rxerces_doc.at_css('.title') }
|
|
109
|
+
x.report("nokogiri") { nokogiri_doc.at_css('.title') } if NOKOGIRI_AVAILABLE
|
|
110
|
+
|
|
111
|
+
x.compare!
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
puts
|
|
115
|
+
puts "=" * 80
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'benchmark/ips'
|
|
5
|
+
require 'rxerces'
|
|
6
|
+
|
|
7
|
+
# Try to load Nokogiri and Ox
|
|
8
|
+
begin
|
|
9
|
+
require 'nokogiri'
|
|
10
|
+
NOKOGIRI_AVAILABLE = true
|
|
11
|
+
rescue LoadError
|
|
12
|
+
NOKOGIRI_AVAILABLE = false
|
|
13
|
+
puts "Nokogiri not available - install with: gem install nokogiri"
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
begin
|
|
17
|
+
require 'ox'
|
|
18
|
+
OX_AVAILABLE = true
|
|
19
|
+
rescue LoadError
|
|
20
|
+
OX_AVAILABLE = false
|
|
21
|
+
puts "Ox not available - install with: gem install ox"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Sample XML documents of varying sizes
|
|
25
|
+
SMALL_XML = <<~XML
|
|
26
|
+
<root>
|
|
27
|
+
<person id="1" name="Alice">
|
|
28
|
+
<age>30</age>
|
|
29
|
+
<city>New York</city>
|
|
30
|
+
</person>
|
|
31
|
+
</root>
|
|
32
|
+
XML
|
|
33
|
+
|
|
34
|
+
MEDIUM_XML = begin
|
|
35
|
+
people = (1..100).map do |i|
|
|
36
|
+
"<person id=\"#{i}\" name=\"Person#{i}\"><age>#{20 + (i % 50)}</age><city>City#{i % 20}</city></person>"
|
|
37
|
+
end
|
|
38
|
+
"<root>#{people.join}</root>"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Generate a large XML document
|
|
42
|
+
def generate_large_xml(count = 1000)
|
|
43
|
+
people = (1..count).map do |i|
|
|
44
|
+
"<person id=\"#{i}\" name=\"Person#{i}\"><age>#{20 + (i % 50)}</age><city>City#{i % 20}</city></person>"
|
|
45
|
+
end
|
|
46
|
+
"<root>#{people.join}</root>"
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
LARGE_XML = generate_large_xml(1000)
|
|
50
|
+
|
|
51
|
+
puts "=" * 80
|
|
52
|
+
puts "XML Parsing Benchmarks"
|
|
53
|
+
puts "=" * 80
|
|
54
|
+
puts
|
|
55
|
+
|
|
56
|
+
# Small XML parsing
|
|
57
|
+
puts "Small XML Parsing (#{SMALL_XML.bytesize} bytes)"
|
|
58
|
+
puts "-" * 80
|
|
59
|
+
|
|
60
|
+
Benchmark.ips do |x|
|
|
61
|
+
x.config(time: 5, warmup: 2)
|
|
62
|
+
|
|
63
|
+
x.report("rxerces") { RXerces::XML::Document.parse(SMALL_XML) }
|
|
64
|
+
x.report("nokogiri") { Nokogiri::XML(SMALL_XML) } if NOKOGIRI_AVAILABLE
|
|
65
|
+
x.report("ox") { Ox.parse(SMALL_XML) } if OX_AVAILABLE
|
|
66
|
+
|
|
67
|
+
x.compare!
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
puts
|
|
71
|
+
|
|
72
|
+
# Medium XML parsing
|
|
73
|
+
puts "Medium XML Parsing (#{MEDIUM_XML.bytesize} bytes)"
|
|
74
|
+
puts "-" * 80
|
|
75
|
+
|
|
76
|
+
Benchmark.ips do |x|
|
|
77
|
+
x.config(time: 5, warmup: 2)
|
|
78
|
+
|
|
79
|
+
x.report("rxerces") { RXerces::XML::Document.parse(MEDIUM_XML) }
|
|
80
|
+
x.report("nokogiri") { Nokogiri::XML(MEDIUM_XML) } if NOKOGIRI_AVAILABLE
|
|
81
|
+
x.report("ox") { Ox.parse(MEDIUM_XML) } if OX_AVAILABLE
|
|
82
|
+
|
|
83
|
+
x.compare!
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
puts
|
|
87
|
+
|
|
88
|
+
# Large XML parsing
|
|
89
|
+
puts "Large XML Parsing (#{LARGE_XML.bytesize} bytes)"
|
|
90
|
+
puts "-" * 80
|
|
91
|
+
|
|
92
|
+
Benchmark.ips do |x|
|
|
93
|
+
x.config(time: 5, warmup: 2)
|
|
94
|
+
|
|
95
|
+
x.report("rxerces") { RXerces::XML::Document.parse(LARGE_XML) }
|
|
96
|
+
x.report("nokogiri") { Nokogiri::XML(LARGE_XML) } if NOKOGIRI_AVAILABLE
|
|
97
|
+
x.report("ox") { Ox.parse(LARGE_XML) } if OX_AVAILABLE
|
|
98
|
+
|
|
99
|
+
x.compare!
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
puts
|
|
103
|
+
puts "=" * 80
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Run all benchmarks in sequence
|
|
5
|
+
|
|
6
|
+
benchmarks = %w[
|
|
7
|
+
parse_benchmark.rb
|
|
8
|
+
xpath_benchmark.rb
|
|
9
|
+
css_benchmark.rb
|
|
10
|
+
traversal_benchmark.rb
|
|
11
|
+
serialization_benchmark.rb
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
puts "Running all RXerces benchmarks..."
|
|
15
|
+
puts "=" * 80
|
|
16
|
+
puts
|
|
17
|
+
|
|
18
|
+
benchmarks.each do |benchmark|
|
|
19
|
+
puts "\nRunning #{benchmark}...\n\n"
|
|
20
|
+
system("ruby -Ilib benchmarks/#{benchmark}")
|
|
21
|
+
puts "\n"
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
puts "=" * 80
|
|
25
|
+
puts "All benchmarks complete!"
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'benchmark/ips'
|
|
5
|
+
require 'rxerces'
|
|
6
|
+
|
|
7
|
+
begin
|
|
8
|
+
require 'nokogiri'
|
|
9
|
+
NOKOGIRI_AVAILABLE = true
|
|
10
|
+
rescue LoadError
|
|
11
|
+
NOKOGIRI_AVAILABLE = false
|
|
12
|
+
puts "Nokogiri not available - install with: gem install nokogiri"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
puts "=" * 80
|
|
16
|
+
puts "Document Serialization Benchmarks"
|
|
17
|
+
puts "=" * 80
|
|
18
|
+
puts
|
|
19
|
+
|
|
20
|
+
# Small document
|
|
21
|
+
SMALL_XML = <<~XML
|
|
22
|
+
<root>
|
|
23
|
+
<person id="1" name="Alice">
|
|
24
|
+
<age>30</age>
|
|
25
|
+
<city>New York</city>
|
|
26
|
+
</person>
|
|
27
|
+
</root>
|
|
28
|
+
XML
|
|
29
|
+
|
|
30
|
+
rxerces_small = RXerces::XML::Document.parse(SMALL_XML)
|
|
31
|
+
nokogiri_small = Nokogiri::XML(SMALL_XML) if NOKOGIRI_AVAILABLE
|
|
32
|
+
|
|
33
|
+
puts "Small document to_s (#{SMALL_XML.bytesize} bytes)"
|
|
34
|
+
puts "-" * 80
|
|
35
|
+
|
|
36
|
+
Benchmark.ips do |x|
|
|
37
|
+
x.config(time: 5, warmup: 2)
|
|
38
|
+
|
|
39
|
+
x.report("rxerces") { rxerces_small.to_s }
|
|
40
|
+
x.report("nokogiri") { nokogiri_small.to_xml } if NOKOGIRI_AVAILABLE
|
|
41
|
+
|
|
42
|
+
x.compare!
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
puts
|
|
46
|
+
|
|
47
|
+
# Medium document
|
|
48
|
+
def generate_xml(count)
|
|
49
|
+
people = (1..count).map do |i|
|
|
50
|
+
"<person id=\"#{i}\" name=\"Person#{i}\"><age>#{20 + (i % 50)}</age><city>City#{i % 20}</city></person>"
|
|
51
|
+
end
|
|
52
|
+
"<root>#{people.join}</root>"
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
MEDIUM_XML = generate_xml(100)
|
|
56
|
+
|
|
57
|
+
rxerces_medium = RXerces::XML::Document.parse(MEDIUM_XML)
|
|
58
|
+
nokogiri_medium = Nokogiri::XML(MEDIUM_XML) if NOKOGIRI_AVAILABLE
|
|
59
|
+
|
|
60
|
+
puts "Medium document to_s (#{MEDIUM_XML.bytesize} bytes)"
|
|
61
|
+
puts "-" * 80
|
|
62
|
+
|
|
63
|
+
Benchmark.ips do |x|
|
|
64
|
+
x.config(time: 5, warmup: 2)
|
|
65
|
+
|
|
66
|
+
x.report("rxerces") { rxerces_medium.to_s }
|
|
67
|
+
x.report("nokogiri") { nokogiri_medium.to_xml } if NOKOGIRI_AVAILABLE
|
|
68
|
+
|
|
69
|
+
x.compare!
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
puts
|
|
73
|
+
|
|
74
|
+
# Large document
|
|
75
|
+
LARGE_XML = generate_xml(1000)
|
|
76
|
+
|
|
77
|
+
rxerces_large = RXerces::XML::Document.parse(LARGE_XML)
|
|
78
|
+
nokogiri_large = Nokogiri::XML(LARGE_XML) if NOKOGIRI_AVAILABLE
|
|
79
|
+
|
|
80
|
+
puts "Large document to_s (#{LARGE_XML.bytesize} bytes)"
|
|
81
|
+
puts "-" * 80
|
|
82
|
+
|
|
83
|
+
Benchmark.ips do |x|
|
|
84
|
+
x.config(time: 5, warmup: 2)
|
|
85
|
+
|
|
86
|
+
x.report("rxerces") { rxerces_large.to_s }
|
|
87
|
+
x.report("nokogiri") { nokogiri_large.to_xml } if NOKOGIRI_AVAILABLE
|
|
88
|
+
|
|
89
|
+
x.compare!
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
puts
|
|
93
|
+
puts "=" * 80
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'benchmark/ips'
|
|
5
|
+
require 'rxerces'
|
|
6
|
+
|
|
7
|
+
begin
|
|
8
|
+
require 'nokogiri'
|
|
9
|
+
NOKOGIRI_AVAILABLE = true
|
|
10
|
+
rescue LoadError
|
|
11
|
+
NOKOGIRI_AVAILABLE = false
|
|
12
|
+
puts "Nokogiri not available - install with: gem install nokogiri"
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Sample XML for DOM traversal
|
|
16
|
+
XML_DATA = begin
|
|
17
|
+
sections = (1..100).map do |i|
|
|
18
|
+
"<section id=\"s#{i}\">
|
|
19
|
+
<article id=\"a#{i}\">
|
|
20
|
+
<header>
|
|
21
|
+
<title>Title #{i}</title>
|
|
22
|
+
<author>Author #{i}</author>
|
|
23
|
+
</header>
|
|
24
|
+
<content>
|
|
25
|
+
<paragraph>Paragraph 1</paragraph>
|
|
26
|
+
<paragraph>Paragraph 2</paragraph>
|
|
27
|
+
<paragraph>Paragraph 3</paragraph>
|
|
28
|
+
</content>
|
|
29
|
+
<footer>
|
|
30
|
+
<date>2024-01-01</date>
|
|
31
|
+
</footer>
|
|
32
|
+
</article>
|
|
33
|
+
</section>"
|
|
34
|
+
end
|
|
35
|
+
"<root>#{sections.join}</root>"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
puts "=" * 80
|
|
39
|
+
puts "DOM Traversal Benchmarks"
|
|
40
|
+
puts "=" * 80
|
|
41
|
+
puts "Document Size: #{XML_DATA.bytesize} bytes"
|
|
42
|
+
puts
|
|
43
|
+
|
|
44
|
+
# Parse documents once
|
|
45
|
+
rxerces_doc = RXerces::XML::Document.parse(XML_DATA)
|
|
46
|
+
nokogiri_doc = Nokogiri::XML(XML_DATA) if NOKOGIRI_AVAILABLE
|
|
47
|
+
|
|
48
|
+
rxerces_root = rxerces_doc.root
|
|
49
|
+
nokogiri_root = nokogiri_doc.root if NOKOGIRI_AVAILABLE
|
|
50
|
+
|
|
51
|
+
# Children access
|
|
52
|
+
puts "Access .children"
|
|
53
|
+
puts "-" * 80
|
|
54
|
+
|
|
55
|
+
Benchmark.ips do |x|
|
|
56
|
+
x.config(time: 5, warmup: 2)
|
|
57
|
+
|
|
58
|
+
x.report("rxerces") { rxerces_root.children }
|
|
59
|
+
x.report("nokogiri") { nokogiri_root.children } if NOKOGIRI_AVAILABLE
|
|
60
|
+
|
|
61
|
+
x.compare!
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
puts
|
|
65
|
+
|
|
66
|
+
# Element children access
|
|
67
|
+
puts "Access .element_children (elements only)"
|
|
68
|
+
puts "-" * 80
|
|
69
|
+
|
|
70
|
+
Benchmark.ips do |x|
|
|
71
|
+
x.config(time: 5, warmup: 2)
|
|
72
|
+
|
|
73
|
+
x.report("rxerces") { rxerces_root.element_children }
|
|
74
|
+
x.report("nokogiri") { nokogiri_root.element_children } if NOKOGIRI_AVAILABLE
|
|
75
|
+
|
|
76
|
+
x.compare!
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
puts
|
|
80
|
+
|
|
81
|
+
# Parent access
|
|
82
|
+
puts "Access .parent on deep node"
|
|
83
|
+
puts "-" * 80
|
|
84
|
+
|
|
85
|
+
rxerces_deep = rxerces_doc.xpath('//paragraph').first
|
|
86
|
+
nokogiri_deep = nokogiri_doc.xpath('//paragraph').first if NOKOGIRI_AVAILABLE
|
|
87
|
+
|
|
88
|
+
Benchmark.ips do |x|
|
|
89
|
+
x.config(time: 5, warmup: 2)
|
|
90
|
+
|
|
91
|
+
x.report("rxerces") { rxerces_deep.parent }
|
|
92
|
+
x.report("nokogiri") { nokogiri_deep.parent } if NOKOGIRI_AVAILABLE
|
|
93
|
+
|
|
94
|
+
x.compare!
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
puts
|
|
98
|
+
|
|
99
|
+
# Ancestors access
|
|
100
|
+
puts "Access .ancestors on deep node"
|
|
101
|
+
puts "-" * 80
|
|
102
|
+
|
|
103
|
+
Benchmark.ips do |x|
|
|
104
|
+
x.config(time: 5, warmup: 2)
|
|
105
|
+
|
|
106
|
+
x.report("rxerces") { rxerces_deep.ancestors }
|
|
107
|
+
x.report("nokogiri") { nokogiri_deep.ancestors } if NOKOGIRI_AVAILABLE
|
|
108
|
+
|
|
109
|
+
x.compare!
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
puts
|
|
113
|
+
|
|
114
|
+
# Next sibling
|
|
115
|
+
puts "Access .next_sibling"
|
|
116
|
+
puts "-" * 80
|
|
117
|
+
|
|
118
|
+
rxerces_para = rxerces_doc.xpath('//paragraph').first
|
|
119
|
+
nokogiri_para = nokogiri_doc.xpath('//paragraph').first if NOKOGIRI_AVAILABLE
|
|
120
|
+
|
|
121
|
+
Benchmark.ips do |x|
|
|
122
|
+
x.config(time: 5, warmup: 2)
|
|
123
|
+
|
|
124
|
+
x.report("rxerces") { rxerces_para.next_sibling }
|
|
125
|
+
x.report("nokogiri") { nokogiri_para.next_sibling } if NOKOGIRI_AVAILABLE
|
|
126
|
+
|
|
127
|
+
x.compare!
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
puts
|
|
131
|
+
|
|
132
|
+
# Text extraction
|
|
133
|
+
puts "Extract .text from element"
|
|
134
|
+
puts "-" * 80
|
|
135
|
+
|
|
136
|
+
rxerces_section = rxerces_doc.xpath('//section').first
|
|
137
|
+
nokogiri_section = nokogiri_doc.xpath('//section').first if NOKOGIRI_AVAILABLE
|
|
138
|
+
|
|
139
|
+
Benchmark.ips do |x|
|
|
140
|
+
x.config(time: 5, warmup: 2)
|
|
141
|
+
|
|
142
|
+
x.report("rxerces") { rxerces_section.text }
|
|
143
|
+
x.report("nokogiri") { nokogiri_section.text } if NOKOGIRI_AVAILABLE
|
|
144
|
+
|
|
145
|
+
x.compare!
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
puts
|
|
149
|
+
puts "=" * 80
|