rxerces 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +17 -0
- data/README.md +29 -1
- data/benchmarks/xpath_validation_cache_benchmark.rb +157 -0
- data/benchmarks/xpath_validation_micro_benchmark.rb +168 -0
- data/e +0 -0
- data/ext/rxerces/rxerces.cpp +497 -22
- data/lib/rxerces/version.rb +1 -1
- data/lib/rxerces.rb +3 -2
- data/rxerces.gemspec +2 -1
- data/spec/document_spec.rb +184 -17
- data/spec/node_spec.rb +230 -58
- data/spec/nodeset_spec.rb +90 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/rxerces_spec.rb +58 -0
- data/spec/schema_spec.rb +28 -1
- data/spec/spec_helper.rb +5 -0
- data/spec/xpath_cache_spec.rb +409 -0
- data/spec/xpath_spec.rb +306 -18
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data.tar.gz.sig +0 -0
- metadata +25 -4
- metadata.gz.sig +0 -0
- /data/{tmp/arm64-darwin24/rxerces/3.4.7 → ext/rxerces}/rxerces.bundle.dSYM/Contents/Info.plist +0 -0
- /data/{tmp/arm64-darwin24/rxerces/3.4.7 → ext/rxerces}/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b67969a411b4eb67c0eef75b5c32496a39f962fc8f1b67015eeaf8cb20555d21
|
|
4
|
+
data.tar.gz: 3597880bb07a892766f5717d979cc140416f28e4db5d92b562b173e3f3ec4027
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e7fa8c0ca9ace56ea3ec1f070340ae11ffa92e2090220099c5a7e7a25e069973ebc85fabbdda120f94dd59fdd1ad72cdbb16f735f48fea77d1dccadb0c7ee27d
|
|
7
|
+
data.tar.gz: 66b6e8eceb35d815b9fa08dfbb0581a3561891ec9f47976386188f3bac207470db7cb97ea9f9688eab13e7288cde2ff9784dcc5e604dc33d092af0ea12c28512
|
checksums.yaml.gz.sig
CHANGED
|
Binary file
|
data/CHANGES.md
CHANGED
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
## 0.7.0 - 3-Jan-2026
|
|
2
|
+
* Added XPath validation to prevent XPath injection attacks, with checks for
|
|
3
|
+
unbalanced quotes, dangerous functions, encoded characters, and injection patterns.
|
|
4
|
+
* Added XPath validation caching with LRU eviction for better performance.
|
|
5
|
+
* Added configuration API for XPath validation caching.
|
|
6
|
+
* Added configurable XPath expression maximum length.
|
|
7
|
+
* Added RXerces.xalan_enabled? method to check Xalan availability.
|
|
8
|
+
* Added Node#attribute_nodes method to get attribute nodes as an array.
|
|
9
|
+
* Improved thread safety with mutex protection for Xerces/Xalan initialization.
|
|
10
|
+
* Added XXE (XML External Entity) protection, disabled by default.
|
|
11
|
+
* Improved exception handling with more specific error messages.
|
|
12
|
+
* Fixed UTF-8 truncation issues in NodeSet#inspect.
|
|
13
|
+
* Nodes are now automatically imported when adding children from different documents.
|
|
14
|
+
* Added validation for Document.parse options hash.
|
|
15
|
+
* Improved wrap_node function robustness.
|
|
16
|
+
* Now uses mkmf-lite to check for Xalan installation.
|
|
17
|
+
|
|
1
18
|
## 0.6.1 - 20-Dec-2025
|
|
2
19
|
* Added more Nokogiri compatibility methods: children, first_element_child,
|
|
3
20
|
last_element_child, elements, at_xpath.
|
data/README.md
CHANGED
|
@@ -247,6 +247,34 @@ For full XPath 1.0 support, install the Xalan library.
|
|
|
247
247
|
|
|
248
248
|
- `RXerces.XML(string)` - Parse XML string and return Document
|
|
249
249
|
- `RXerces.parse(string)` - Alias for `XML`
|
|
250
|
+
- `RXerces.xalan_enabled?` - Check if Xalan XPath 1.0 support is available
|
|
251
|
+
|
|
252
|
+
#### XPath Validation Cache Configuration
|
|
253
|
+
|
|
254
|
+
RXerces validates XPath expressions for security (preventing injection attacks). For high-volume applications, validated expressions are cached to avoid redundant validation overhead.
|
|
255
|
+
|
|
256
|
+
```ruby
|
|
257
|
+
# Check if caching is enabled (default: true)
|
|
258
|
+
RXerces.cache_xpath_validation? # => true
|
|
259
|
+
|
|
260
|
+
# Disable caching (re-validates every query)
|
|
261
|
+
RXerces.cache_xpath_validation = false
|
|
262
|
+
|
|
263
|
+
# Re-enable caching
|
|
264
|
+
RXerces.cache_xpath_validation = true
|
|
265
|
+
|
|
266
|
+
# Get current cache size
|
|
267
|
+
RXerces.xpath_validation_cache_size # => 42
|
|
268
|
+
|
|
269
|
+
# Get/set maximum cache size (default: 10,000)
|
|
270
|
+
RXerces.xpath_validation_cache_max_size # => 10000
|
|
271
|
+
RXerces.xpath_validation_cache_max_size = 5000
|
|
272
|
+
|
|
273
|
+
# Clear the cache
|
|
274
|
+
RXerces.clear_xpath_validation_cache
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
**Performance note:** Caching provides ~7-9% speedup for repeated XPath queries by avoiding redundant validation. The cache is thread-safe.
|
|
250
278
|
|
|
251
279
|
### RXerces::XML::Document
|
|
252
280
|
|
|
@@ -341,7 +369,7 @@ sorry state of that library in general. Since nokogiri uses it under the hood,
|
|
|
341
369
|
I thought it best to create an alternative.
|
|
342
370
|
|
|
343
371
|
## Copyright
|
|
344
|
-
(C) 2025, Daniel J. Berger
|
|
372
|
+
(C) 2025-2026, Daniel J. Berger
|
|
345
373
|
All Rights Reserved
|
|
346
374
|
|
|
347
375
|
## Author
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Benchmark comparing XPath performance with and without validation caching
|
|
5
|
+
#
|
|
6
|
+
# This benchmark measures the overhead of XPath expression validation
|
|
7
|
+
# and demonstrates the performance benefit of caching validated expressions.
|
|
8
|
+
|
|
9
|
+
require "benchmark/ips"
|
|
10
|
+
require "rxerces"
|
|
11
|
+
|
|
12
|
+
puts "=" * 70
|
|
13
|
+
puts "XPath Validation Cache Benchmarks"
|
|
14
|
+
puts "=" * 70
|
|
15
|
+
puts
|
|
16
|
+
|
|
17
|
+
# Build a moderately sized document
|
|
18
|
+
def build_xml(num_items)
|
|
19
|
+
items = (1..num_items).map do |i|
|
|
20
|
+
category = %w[fiction science history biography].sample
|
|
21
|
+
<<~ITEM
|
|
22
|
+
<item id="#{i}" category="#{category}">
|
|
23
|
+
<title>Item #{i}</title>
|
|
24
|
+
<price>#{(rand * 50).round(2)}</price>
|
|
25
|
+
<stock>#{rand(100)}</stock>
|
|
26
|
+
</item>
|
|
27
|
+
ITEM
|
|
28
|
+
end.join("\n")
|
|
29
|
+
|
|
30
|
+
<<~XML
|
|
31
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
32
|
+
<catalog>
|
|
33
|
+
#{items}
|
|
34
|
+
</catalog>
|
|
35
|
+
XML
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
xml = build_xml(500)
|
|
39
|
+
doc = RXerces::XML::Document.parse(xml)
|
|
40
|
+
|
|
41
|
+
puts "Document size: #{xml.bytesize} bytes"
|
|
42
|
+
puts "Xalan enabled: #{RXerces.xalan_enabled?}"
|
|
43
|
+
puts
|
|
44
|
+
|
|
45
|
+
# Define various XPath expressions to test
|
|
46
|
+
xpath_expressions = [
|
|
47
|
+
"//item",
|
|
48
|
+
"//item[@category='fiction']",
|
|
49
|
+
"//item/title",
|
|
50
|
+
"//item[price > 25]",
|
|
51
|
+
"//item[@id='100']",
|
|
52
|
+
"/catalog/item[1]",
|
|
53
|
+
"//item[contains(title, 'Item')]",
|
|
54
|
+
"//item[stock < 50]/title",
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
puts "-" * 70
|
|
58
|
+
puts "Single XPath expression, repeated queries (same expression)"
|
|
59
|
+
puts "-" * 70
|
|
60
|
+
puts
|
|
61
|
+
|
|
62
|
+
single_xpath = "//item[@category='fiction']"
|
|
63
|
+
|
|
64
|
+
Benchmark.ips do |x|
|
|
65
|
+
x.config(time: 5, warmup: 2)
|
|
66
|
+
|
|
67
|
+
x.report("with cache (default)") do
|
|
68
|
+
RXerces.cache_xpath_validation = true
|
|
69
|
+
doc.xpath(single_xpath)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
x.report("without cache") do
|
|
73
|
+
RXerces.cache_xpath_validation = false
|
|
74
|
+
doc.xpath(single_xpath)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
x.compare!
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Reset to default
|
|
81
|
+
RXerces.cache_xpath_validation = true
|
|
82
|
+
RXerces.clear_xpath_validation_cache
|
|
83
|
+
|
|
84
|
+
puts
|
|
85
|
+
puts "-" * 70
|
|
86
|
+
puts "Multiple different XPath expressions (round-robin)"
|
|
87
|
+
puts "-" * 70
|
|
88
|
+
puts
|
|
89
|
+
|
|
90
|
+
Benchmark.ips do |x|
|
|
91
|
+
x.config(time: 5, warmup: 2)
|
|
92
|
+
|
|
93
|
+
x.report("with cache") do |times|
|
|
94
|
+
RXerces.cache_xpath_validation = true
|
|
95
|
+
times.times do |i|
|
|
96
|
+
doc.xpath(xpath_expressions[i % xpath_expressions.length])
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
x.report("without cache") do |times|
|
|
101
|
+
RXerces.cache_xpath_validation = false
|
|
102
|
+
times.times do |i|
|
|
103
|
+
doc.xpath(xpath_expressions[i % xpath_expressions.length])
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
x.compare!
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Reset
|
|
111
|
+
RXerces.cache_xpath_validation = true
|
|
112
|
+
RXerces.clear_xpath_validation_cache
|
|
113
|
+
|
|
114
|
+
puts
|
|
115
|
+
puts "-" * 70
|
|
116
|
+
puts "High-volume scenario: 1000 queries with same expression"
|
|
117
|
+
puts "-" * 70
|
|
118
|
+
puts
|
|
119
|
+
|
|
120
|
+
iterations = 1000
|
|
121
|
+
test_xpath = "//item[price > 20]"
|
|
122
|
+
|
|
123
|
+
Benchmark.ips do |x|
|
|
124
|
+
x.config(time: 5, warmup: 2)
|
|
125
|
+
|
|
126
|
+
x.report("with cache (1000 queries)") do
|
|
127
|
+
RXerces.cache_xpath_validation = true
|
|
128
|
+
iterations.times { doc.xpath(test_xpath) }
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
x.report("without cache (1000 queries)") do
|
|
132
|
+
RXerces.cache_xpath_validation = false
|
|
133
|
+
iterations.times { doc.xpath(test_xpath) }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
x.compare!
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Reset
|
|
140
|
+
RXerces.cache_xpath_validation = true
|
|
141
|
+
RXerces.clear_xpath_validation_cache
|
|
142
|
+
|
|
143
|
+
puts
|
|
144
|
+
puts "-" * 70
|
|
145
|
+
puts "Cache statistics after benchmark"
|
|
146
|
+
puts "-" * 70
|
|
147
|
+
puts
|
|
148
|
+
|
|
149
|
+
# Run some queries to populate cache
|
|
150
|
+
xpath_expressions.each { |xp| doc.xpath(xp) }
|
|
151
|
+
|
|
152
|
+
puts "Cache size: #{RXerces.xpath_validation_cache_size}"
|
|
153
|
+
puts "Cache max size: #{RXerces.xpath_validation_cache_max_size}"
|
|
154
|
+
puts "Cache enabled: #{RXerces.cache_xpath_validation?}"
|
|
155
|
+
puts
|
|
156
|
+
puts "=" * 70
|
|
157
|
+
puts "Benchmark complete!"
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Micro-benchmark isolating XPath validation overhead
|
|
5
|
+
#
|
|
6
|
+
# This benchmark focuses specifically on measuring the validation
|
|
7
|
+
# overhead by using very simple documents and minimal XPath execution time.
|
|
8
|
+
|
|
9
|
+
require "benchmark/ips"
|
|
10
|
+
require "rxerces"
|
|
11
|
+
|
|
12
|
+
puts "=" * 70
|
|
13
|
+
puts "XPath Validation Cache Micro-Benchmarks"
|
|
14
|
+
puts "=" * 70
|
|
15
|
+
puts
|
|
16
|
+
puts "Xalan enabled: #{RXerces.xalan_enabled?}"
|
|
17
|
+
puts
|
|
18
|
+
|
|
19
|
+
# Use a tiny document to minimize XPath execution time
|
|
20
|
+
tiny_xml = "<r><a/></r>"
|
|
21
|
+
tiny_doc = RXerces::XML::Document.parse(tiny_xml)
|
|
22
|
+
|
|
23
|
+
# Generate many unique XPath expressions to prevent any caching benefit
|
|
24
|
+
def generate_unique_xpaths(count)
|
|
25
|
+
count.times.map { |i| "//a[#{i + 1} = #{i + 1}]" }
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
puts "-" * 70
|
|
29
|
+
puts "Test 1: Same expression repeated (cache hit scenario)"
|
|
30
|
+
puts "-" * 70
|
|
31
|
+
puts
|
|
32
|
+
|
|
33
|
+
simple_xpath = "//a"
|
|
34
|
+
|
|
35
|
+
# Clear cache and warm up
|
|
36
|
+
RXerces.clear_xpath_validation_cache
|
|
37
|
+
|
|
38
|
+
Benchmark.ips do |x|
|
|
39
|
+
x.config(time: 5, warmup: 2)
|
|
40
|
+
|
|
41
|
+
x.report("cached (repeated)") do
|
|
42
|
+
RXerces.cache_xpath_validation = true
|
|
43
|
+
tiny_doc.xpath(simple_xpath)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
x.report("uncached (repeated)") do
|
|
47
|
+
RXerces.cache_xpath_validation = false
|
|
48
|
+
tiny_doc.xpath(simple_xpath)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
x.compare!
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
puts
|
|
55
|
+
puts "-" * 70
|
|
56
|
+
puts "Test 2: Many unique expressions (cache miss then hit vs always validate)"
|
|
57
|
+
puts "-" * 70
|
|
58
|
+
puts
|
|
59
|
+
|
|
60
|
+
unique_xpaths = generate_unique_xpaths(100)
|
|
61
|
+
RXerces.clear_xpath_validation_cache
|
|
62
|
+
|
|
63
|
+
Benchmark.ips do |x|
|
|
64
|
+
x.config(time: 5, warmup: 2)
|
|
65
|
+
|
|
66
|
+
x.report("cached (100 unique, round-robin)") do |times|
|
|
67
|
+
RXerces.cache_xpath_validation = true
|
|
68
|
+
times.times { |i| tiny_doc.xpath(unique_xpaths[i % 100]) }
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
x.report("uncached (100 unique, round-robin)") do |times|
|
|
72
|
+
RXerces.cache_xpath_validation = false
|
|
73
|
+
times.times { |i| tiny_doc.xpath(unique_xpaths[i % 100]) }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
x.compare!
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
puts
|
|
80
|
+
puts "-" * 70
|
|
81
|
+
puts "Test 3: Pure validation overhead measurement (validation-only calls)"
|
|
82
|
+
puts "-" * 70
|
|
83
|
+
puts
|
|
84
|
+
|
|
85
|
+
# Measure how long validation itself takes by comparing many XPath calls
|
|
86
|
+
# with a small vs large expression (validation time scales with expression length)
|
|
87
|
+
|
|
88
|
+
short_xpath = "//a"
|
|
89
|
+
long_xpath = "//a[@x='1' and @y='2' and @z='3'][position() > 0 and position() < 100][contains(text(), 'test')]"
|
|
90
|
+
|
|
91
|
+
RXerces.clear_xpath_validation_cache
|
|
92
|
+
|
|
93
|
+
puts "Short XPath: #{short_xpath.length} chars"
|
|
94
|
+
puts "Long XPath: #{long_xpath.length} chars"
|
|
95
|
+
puts
|
|
96
|
+
|
|
97
|
+
Benchmark.ips do |x|
|
|
98
|
+
x.config(time: 5, warmup: 2)
|
|
99
|
+
|
|
100
|
+
x.report("short xpath (cached)") do
|
|
101
|
+
RXerces.cache_xpath_validation = true
|
|
102
|
+
tiny_doc.xpath(short_xpath)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
x.report("short xpath (uncached)") do
|
|
106
|
+
RXerces.cache_xpath_validation = false
|
|
107
|
+
tiny_doc.xpath(short_xpath)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
x.report("long xpath (cached)") do
|
|
111
|
+
RXerces.cache_xpath_validation = true
|
|
112
|
+
tiny_doc.xpath(long_xpath)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
x.report("long xpath (uncached)") do
|
|
116
|
+
RXerces.cache_xpath_validation = false
|
|
117
|
+
tiny_doc.xpath(long_xpath)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
x.compare!
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
puts
|
|
124
|
+
puts "-" * 70
|
|
125
|
+
puts "Test 4: High-frequency scenario (10,000 queries, same expression)"
|
|
126
|
+
puts "-" * 70
|
|
127
|
+
puts
|
|
128
|
+
|
|
129
|
+
RXerces.clear_xpath_validation_cache
|
|
130
|
+
iterations = 10_000
|
|
131
|
+
|
|
132
|
+
require "benchmark"
|
|
133
|
+
|
|
134
|
+
puts "Running #{iterations} XPath queries..."
|
|
135
|
+
puts
|
|
136
|
+
|
|
137
|
+
RXerces.cache_xpath_validation = true
|
|
138
|
+
RXerces.clear_xpath_validation_cache
|
|
139
|
+
cached_time = Benchmark.realtime do
|
|
140
|
+
iterations.times { tiny_doc.xpath(simple_xpath) }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
RXerces.cache_xpath_validation = false
|
|
144
|
+
uncached_time = Benchmark.realtime do
|
|
145
|
+
iterations.times { tiny_doc.xpath(simple_xpath) }
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
puts "With cache: #{cached_time.round(4)}s (#{(iterations / cached_time).round(1)} queries/sec)"
|
|
149
|
+
puts "Without cache: #{uncached_time.round(4)}s (#{(iterations / uncached_time).round(1)} queries/sec)"
|
|
150
|
+
puts "Difference: #{((uncached_time - cached_time) * 1000).round(2)}ms (#{((uncached_time / cached_time - 1) * 100).round(2)}% overhead)"
|
|
151
|
+
|
|
152
|
+
puts
|
|
153
|
+
puts "-" * 70
|
|
154
|
+
puts "Cache statistics"
|
|
155
|
+
puts "-" * 70
|
|
156
|
+
puts
|
|
157
|
+
|
|
158
|
+
RXerces.cache_xpath_validation = true
|
|
159
|
+
RXerces.clear_xpath_validation_cache
|
|
160
|
+
|
|
161
|
+
# Populate with test expressions
|
|
162
|
+
unique_xpaths.each { |xp| tiny_doc.xpath(xp) }
|
|
163
|
+
|
|
164
|
+
puts "Expressions cached: #{RXerces.xpath_validation_cache_size}"
|
|
165
|
+
puts "Max cache size: #{RXerces.xpath_validation_cache_max_size}"
|
|
166
|
+
puts
|
|
167
|
+
puts "=" * 70
|
|
168
|
+
puts "Benchmark complete!"
|
data/e
ADDED
|
File without changes
|