rxerces 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +21 -0
- data/README.md +29 -1
- data/benchmarks/xpath_validation_cache_benchmark.rb +157 -0
- data/benchmarks/xpath_validation_micro_benchmark.rb +168 -0
- data/e +0 -0
- data/ext/rxerces/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/ext/rxerces/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data/ext/rxerces/rxerces.cpp +670 -22
- data/lib/rxerces/version.rb +1 -1
- data/lib/rxerces.rb +3 -2
- data/rxerces.gemspec +2 -1
- data/spec/document_spec.rb +184 -17
- data/spec/node_spec.rb +230 -58
- data/spec/nodeset_spec.rb +90 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/rxerces_spec.rb +58 -0
- data/spec/schema_spec.rb +28 -1
- data/spec/spec_helper.rb +5 -0
- data/spec/xpath_cache_spec.rb +409 -0
- data/spec/xpath_spec.rb +306 -18
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data.tar.gz.sig +0 -0
- metadata +24 -1
- metadata.gz.sig +0 -0
data/spec/nodeset_spec.rb
CHANGED
|
@@ -145,6 +145,96 @@ RSpec.describe RXerces::XML::NodeSet do
|
|
|
145
145
|
end
|
|
146
146
|
end
|
|
147
147
|
|
|
148
|
+
describe "#inspect" do
|
|
149
|
+
it "returns a string representation" do
|
|
150
|
+
result = nodeset.inspect
|
|
151
|
+
expect(result).to be_a(String)
|
|
152
|
+
expect(result).to include('RXerces::XML::NodeSet')
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
it "shows element names in the output" do
|
|
156
|
+
result = nodeset.inspect
|
|
157
|
+
expect(result).to include('<item>')
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
it "truncates long content" do
|
|
161
|
+
long_xml = '<root><item>' + ('x' * 100) + '</item></root>'
|
|
162
|
+
doc = RXerces::XML::Document.parse(long_xml)
|
|
163
|
+
result = doc.xpath('//item').inspect
|
|
164
|
+
expect(result).to include('...')
|
|
165
|
+
expect(result.length).to be < long_xml.length
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
context "with UTF-8 content" do
|
|
169
|
+
it "handles multi-byte characters without corruption" do
|
|
170
|
+
# Test with various multi-byte UTF-8 characters
|
|
171
|
+
utf8_xml = '<root><item>Hello 世界 🌍 Привет</item></root>'
|
|
172
|
+
doc = RXerces::XML::Document.parse(utf8_xml)
|
|
173
|
+
result = doc.xpath('//item').inspect
|
|
174
|
+
|
|
175
|
+
# Should not raise encoding errors
|
|
176
|
+
expect { result.encode('UTF-8') }.not_to raise_error
|
|
177
|
+
expect(result.encoding).to eq(Encoding::UTF_8)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "truncates UTF-8 strings safely without cutting mid-character" do
|
|
181
|
+
# Create a string with multi-byte characters that would be cut off
|
|
182
|
+
# Use Japanese characters (3 bytes each in UTF-8) near the truncation boundary
|
|
183
|
+
long_text = 'a' * 25 + '世界你好こんにちは' + 'x' * 50
|
|
184
|
+
utf8_xml = "<root><item>#{long_text}</item></root>"
|
|
185
|
+
doc = RXerces::XML::Document.parse(utf8_xml)
|
|
186
|
+
result = doc.xpath('//item').inspect
|
|
187
|
+
|
|
188
|
+
# Result should be valid UTF-8
|
|
189
|
+
expect(result).to be_valid_encoding
|
|
190
|
+
expect { result.encode('UTF-8') }.not_to raise_error
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
it "handles emojis and 4-byte UTF-8 characters" do
|
|
194
|
+
# Emojis are 4-byte UTF-8 characters
|
|
195
|
+
emoji_xml = '<root><item>Test 🎉🎊🎈🎁🎀🎂 more text here that is quite long</item></root>'
|
|
196
|
+
doc = RXerces::XML::Document.parse(emoji_xml)
|
|
197
|
+
result = doc.xpath('//item').inspect
|
|
198
|
+
|
|
199
|
+
expect(result).to be_valid_encoding
|
|
200
|
+
expect { result.encode('UTF-8') }.not_to raise_error
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it "handles mixed ASCII and multi-byte characters" do
|
|
204
|
+
mixed_xml = '<root><item>ASCII テキスト text 文字 more</item></root>'
|
|
205
|
+
doc = RXerces::XML::Document.parse(mixed_xml)
|
|
206
|
+
result = doc.xpath('//item').inspect
|
|
207
|
+
|
|
208
|
+
expect(result).to be_valid_encoding
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
it "handles UTF-8 in text nodes" do
|
|
212
|
+
text_xml = '<root><item>こんにちは世界' + ('x' * 50) + '</item></root>'
|
|
213
|
+
doc = RXerces::XML::Document.parse(text_xml)
|
|
214
|
+
# Get the item element which contains the text
|
|
215
|
+
result = doc.xpath('//item').inspect
|
|
216
|
+
|
|
217
|
+
expect(result).to be_valid_encoding
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
it "handles Cyrillic characters" do
|
|
221
|
+
cyrillic_xml = '<root><item>Привет мир это длинный текст на русском языке</item></root>'
|
|
222
|
+
doc = RXerces::XML::Document.parse(cyrillic_xml)
|
|
223
|
+
result = doc.xpath('//item').inspect
|
|
224
|
+
|
|
225
|
+
expect(result).to be_valid_encoding
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
it "handles Arabic characters" do
|
|
229
|
+
arabic_xml = '<root><item>مرحبا بالعالم هذا نص طويل بالعربية</item></root>'
|
|
230
|
+
doc = RXerces::XML::Document.parse(arabic_xml)
|
|
231
|
+
result = doc.xpath('//item').inspect
|
|
232
|
+
|
|
233
|
+
expect(result).to be_valid_encoding
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
148
238
|
it "includes Enumerable" do
|
|
149
239
|
expect(RXerces::XML::NodeSet.ancestors).to include(Enumerable)
|
|
150
240
|
end
|
data/spec/rxerces_shared.rb
CHANGED
data/spec/rxerces_spec.rb
CHANGED
|
@@ -20,4 +20,62 @@ RSpec.describe RXerces do
|
|
|
20
20
|
expect(doc).to be_a(RXerces::XML::Document)
|
|
21
21
|
end
|
|
22
22
|
end
|
|
23
|
+
|
|
24
|
+
describe "thread safety" do
|
|
25
|
+
it "handles concurrent initialization safely" do
|
|
26
|
+
xml = '<root><child>text</child></root>'
|
|
27
|
+
threads = []
|
|
28
|
+
results = []
|
|
29
|
+
|
|
30
|
+
# Create multiple threads that parse XML concurrently
|
|
31
|
+
10.times do
|
|
32
|
+
threads << Thread.new do
|
|
33
|
+
doc = RXerces.XML(xml)
|
|
34
|
+
results << doc.class
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Wait for all threads to complete
|
|
39
|
+
threads.each(&:join)
|
|
40
|
+
|
|
41
|
+
# All should succeed and return Document objects
|
|
42
|
+
expect(results.size).to eq(10)
|
|
43
|
+
results.each do |result|
|
|
44
|
+
expect(result).to eq(RXerces::XML::Document)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe "security" do
|
|
50
|
+
it "prevents XXE attacks by not processing external entities" do
|
|
51
|
+
# XML with external entity reference
|
|
52
|
+
malicious_xml = <<~XML
|
|
53
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
54
|
+
<!DOCTYPE foo [ <!ENTITY xxe SYSTEM "file:///etc/passwd"> ]>
|
|
55
|
+
<foo>&xxe;</foo>
|
|
56
|
+
XML
|
|
57
|
+
|
|
58
|
+
# Should fail to parse because external entities are disabled
|
|
59
|
+
expect {
|
|
60
|
+
RXerces.XML(malicious_xml)
|
|
61
|
+
}.to raise_error(RuntimeError, /unable to open external entity/)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "allows external entities when explicitly enabled" do
|
|
65
|
+
# XML with external entity reference
|
|
66
|
+
xml_with_entity = <<~XML
|
|
67
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
68
|
+
<!DOCTYPE foo [ <!ENTITY test "test content"> ]>
|
|
69
|
+
<foo>&test;</foo>
|
|
70
|
+
XML
|
|
71
|
+
|
|
72
|
+
# Should succeed with internal entities even when external are disabled
|
|
73
|
+
doc = RXerces.XML(xml_with_entity)
|
|
74
|
+
expect(doc.root.text).to eq("test content")
|
|
75
|
+
|
|
76
|
+
# With allow_external_entities: true, should still handle internal entities
|
|
77
|
+
doc2 = RXerces.XML(xml_with_entity, allow_external_entities: true)
|
|
78
|
+
expect(doc2.root.text).to eq("test content")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
23
81
|
end
|
data/spec/schema_spec.rb
CHANGED
|
@@ -44,7 +44,7 @@ RSpec.describe RXerces::XML::Schema do
|
|
|
44
44
|
end
|
|
45
45
|
|
|
46
46
|
# Note: Xerces-C parser is very tolerant of invalid XML
|
|
47
|
-
#
|
|
47
|
+
# Schema creation succeeds even with malformed XML, validation catches issues
|
|
48
48
|
end
|
|
49
49
|
|
|
50
50
|
describe '.from_document' do
|
|
@@ -72,5 +72,32 @@ RSpec.describe RXerces::XML::Schema do
|
|
|
72
72
|
expect(errors).not_to be_empty
|
|
73
73
|
expect(errors.first).to include('not-a-number')
|
|
74
74
|
end
|
|
75
|
+
|
|
76
|
+
it 'handles schema grammar loading errors gracefully' do
|
|
77
|
+
# Create a schema with an invalid type reference
|
|
78
|
+
invalid_schema_xsd = <<~XSD
|
|
79
|
+
<?xml version="1.0"?>
|
|
80
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
|
81
|
+
<xs:element name="root">
|
|
82
|
+
<xs:complexType>
|
|
83
|
+
<xs:sequence>
|
|
84
|
+
<xs:element name="invalid" type="invalid-type"/>
|
|
85
|
+
</xs:sequence>
|
|
86
|
+
</xs:complexType>
|
|
87
|
+
</xs:element>
|
|
88
|
+
</xs:schema>
|
|
89
|
+
XSD
|
|
90
|
+
|
|
91
|
+
invalid_schema = described_class.from_string(invalid_schema_xsd)
|
|
92
|
+
doc = RXerces::XML::Document.parse(valid_xml)
|
|
93
|
+
|
|
94
|
+
# Validation should handle XMLException/SAXException in loadGrammar gracefully
|
|
95
|
+
# and continue with validation, producing errors
|
|
96
|
+
errors = doc.validate(invalid_schema)
|
|
97
|
+
expect(errors).to be_a(Array)
|
|
98
|
+
expect(errors).not_to be_empty
|
|
99
|
+
# Should contain errors about the invalid type
|
|
100
|
+
expect(errors.join).to include('invalid-type')
|
|
101
|
+
end
|
|
75
102
|
end
|
|
76
103
|
end
|
data/spec/spec_helper.rb
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
require 'rspec'
|
|
2
2
|
require 'rxerces'
|
|
3
3
|
require 'rxerces_shared'
|
|
4
|
+
require 'mkmf-lite'
|
|
4
5
|
|
|
5
6
|
RSpec.configure do |config|
|
|
7
|
+
include Mkmf::Lite
|
|
8
|
+
|
|
6
9
|
# Enable flags like --only-failures and --next-failure
|
|
7
10
|
config.example_status_persistence_file_path = ".rspec_status"
|
|
8
11
|
|
|
@@ -14,4 +17,6 @@ RSpec.configure do |config|
|
|
|
14
17
|
end
|
|
15
18
|
|
|
16
19
|
config.include_context(RXerces)
|
|
20
|
+
|
|
21
|
+
config.filter_run_excluding(:xalan) unless have_library('xalan-c')
|
|
17
22
|
end
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'spec_helper'
|
|
4
|
+
|
|
5
|
+
RSpec.describe "XPath Validation Cache" do
|
|
6
|
+
let(:doc) { RXerces::XML::Document.parse('<root><item id="1"/><item id="2"/></root>') }
|
|
7
|
+
|
|
8
|
+
# Helper to generate unique XPath expressions that work with or without Xalan
|
|
9
|
+
# With Xalan: uses attribute predicates for more realistic expressions
|
|
10
|
+
# Without Xalan: uses unique element names
|
|
11
|
+
def unique_xpath(prefix, index)
|
|
12
|
+
if RXerces.xalan_enabled?
|
|
13
|
+
"//item[@id='#{prefix}_#{index}']"
|
|
14
|
+
else
|
|
15
|
+
"//#{prefix}#{index}"
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
before(:each) do
|
|
20
|
+
# Reset to default state before each test
|
|
21
|
+
RXerces.cache_xpath_validation = true
|
|
22
|
+
RXerces.xpath_validation_cache_max_size = 10_000
|
|
23
|
+
RXerces.clear_xpath_validation_cache
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
after(:all) do
|
|
27
|
+
# Restore defaults after all tests
|
|
28
|
+
RXerces.cache_xpath_validation = true
|
|
29
|
+
RXerces.xpath_validation_cache_max_size = 10_000
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
describe "configuration methods" do
|
|
33
|
+
describe ".cache_xpath_validation?" do
|
|
34
|
+
it "returns true by default" do
|
|
35
|
+
expect(RXerces.cache_xpath_validation?).to be true
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
it "returns false when caching is disabled" do
|
|
39
|
+
RXerces.cache_xpath_validation = false
|
|
40
|
+
expect(RXerces.cache_xpath_validation?).to be false
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe ".cache_xpath_validation=" do
|
|
45
|
+
it "enables caching when set to true" do
|
|
46
|
+
RXerces.cache_xpath_validation = false
|
|
47
|
+
RXerces.cache_xpath_validation = true
|
|
48
|
+
expect(RXerces.cache_xpath_validation?).to be true
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "disables caching when set to false" do
|
|
52
|
+
RXerces.cache_xpath_validation = false
|
|
53
|
+
expect(RXerces.cache_xpath_validation?).to be false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it "accepts truthy values" do
|
|
57
|
+
RXerces.cache_xpath_validation = 1
|
|
58
|
+
expect(RXerces.cache_xpath_validation?).to be true
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "accepts falsy values" do
|
|
62
|
+
RXerces.cache_xpath_validation = nil
|
|
63
|
+
expect(RXerces.cache_xpath_validation?).to be false
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
describe ".xpath_validation_cache_size" do
|
|
68
|
+
it "returns 0 when cache is empty" do
|
|
69
|
+
expect(RXerces.xpath_validation_cache_size).to eq(0)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it "increases after XPath queries" do
|
|
73
|
+
doc.xpath("//item")
|
|
74
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
it "does not double-count repeated expressions" do
|
|
78
|
+
3.times { doc.xpath("//item") }
|
|
79
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
it "counts unique expressions" do
|
|
83
|
+
doc.xpath("//item")
|
|
84
|
+
if RXerces.xalan_enabled?
|
|
85
|
+
doc.xpath("//item[@id='1']")
|
|
86
|
+
else
|
|
87
|
+
doc.xpath("//root")
|
|
88
|
+
end
|
|
89
|
+
doc.xpath("/root/item")
|
|
90
|
+
expect(RXerces.xpath_validation_cache_size).to eq(3)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
describe ".xpath_validation_cache_max_size" do
|
|
95
|
+
it "returns 10000 by default" do
|
|
96
|
+
expect(RXerces.xpath_validation_cache_max_size).to eq(10_000)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
describe ".xpath_validation_cache_max_size=" do
|
|
101
|
+
it "sets the maximum cache size" do
|
|
102
|
+
RXerces.xpath_validation_cache_max_size = 5000
|
|
103
|
+
expect(RXerces.xpath_validation_cache_max_size).to eq(5000)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "accepts large values" do
|
|
107
|
+
RXerces.xpath_validation_cache_max_size = 100_000
|
|
108
|
+
expect(RXerces.xpath_validation_cache_max_size).to eq(100_000)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
it "accepts zero" do
|
|
112
|
+
RXerces.xpath_validation_cache_max_size = 0
|
|
113
|
+
expect(RXerces.xpath_validation_cache_max_size).to eq(0)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it "raises TypeError for non-integer values" do
|
|
117
|
+
expect { RXerces.xpath_validation_cache_max_size = "1000" }.to raise_error(TypeError)
|
|
118
|
+
expect { RXerces.xpath_validation_cache_max_size = 1.5 }.to raise_error(TypeError)
|
|
119
|
+
expect { RXerces.xpath_validation_cache_max_size = nil }.to raise_error(TypeError)
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
it "raises ArgumentError for negative values" do
|
|
123
|
+
expect { RXerces.xpath_validation_cache_max_size = -1 }.to raise_error(ArgumentError)
|
|
124
|
+
expect { RXerces.xpath_validation_cache_max_size = -100 }.to raise_error(ArgumentError)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
describe ".clear_xpath_validation_cache" do
|
|
129
|
+
it "empties the cache" do
|
|
130
|
+
doc.xpath("//item")
|
|
131
|
+
if RXerces.xalan_enabled?
|
|
132
|
+
doc.xpath("//item[@id='1']")
|
|
133
|
+
else
|
|
134
|
+
doc.xpath("//root")
|
|
135
|
+
end
|
|
136
|
+
expect(RXerces.xpath_validation_cache_size).to be > 0
|
|
137
|
+
|
|
138
|
+
RXerces.clear_xpath_validation_cache
|
|
139
|
+
expect(RXerces.xpath_validation_cache_size).to eq(0)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
it "returns nil" do
|
|
143
|
+
expect(RXerces.clear_xpath_validation_cache).to be_nil
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
describe ".xalan_enabled?" do
|
|
148
|
+
it "returns a boolean" do
|
|
149
|
+
expect([true, false]).to include(RXerces.xalan_enabled?)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
describe ".xpath_max_length" do
|
|
154
|
+
it "returns 10000 by default" do
|
|
155
|
+
expect(RXerces.xpath_max_length).to eq(10_000)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
describe ".xpath_max_length=" do
|
|
160
|
+
after(:each) do
|
|
161
|
+
# Restore default after each test
|
|
162
|
+
RXerces.xpath_max_length = 10_000
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
it "sets the maximum XPath expression length" do
|
|
166
|
+
RXerces.xpath_max_length = 5000
|
|
167
|
+
expect(RXerces.xpath_max_length).to eq(5000)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
it "accepts zero to disable the limit" do
|
|
171
|
+
RXerces.xpath_max_length = 0
|
|
172
|
+
expect(RXerces.xpath_max_length).to eq(0)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
it "raises TypeError for non-integer values" do
|
|
176
|
+
expect { RXerces.xpath_max_length = "1000" }.to raise_error(TypeError)
|
|
177
|
+
expect { RXerces.xpath_max_length = 1.5 }.to raise_error(TypeError)
|
|
178
|
+
expect { RXerces.xpath_max_length = nil }.to raise_error(TypeError)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
it "raises ArgumentError for negative values" do
|
|
182
|
+
expect { RXerces.xpath_max_length = -1 }.to raise_error(ArgumentError)
|
|
183
|
+
expect { RXerces.xpath_max_length = -100 }.to raise_error(ArgumentError)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it "rejects XPath expressions exceeding the limit" do
|
|
187
|
+
RXerces.xpath_max_length = 50
|
|
188
|
+
expect {
|
|
189
|
+
doc.xpath("//" + "a" * 50)
|
|
190
|
+
}.to raise_error(ArgumentError, /too long/)
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
it "allows XPath expressions within the limit" do
|
|
194
|
+
RXerces.xpath_max_length = 100
|
|
195
|
+
expect { doc.xpath("//item") }.not_to raise_error
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
it "allows any length when set to zero" do
|
|
199
|
+
RXerces.xpath_max_length = 0
|
|
200
|
+
# This would normally exceed the default 10k limit
|
|
201
|
+
long_xpath = "//" + "a" * 15_000
|
|
202
|
+
# Should not raise "too long" error - verify by checking it doesn't match that pattern
|
|
203
|
+
begin
|
|
204
|
+
doc.xpath(long_xpath)
|
|
205
|
+
rescue ArgumentError => e
|
|
206
|
+
expect(e.message).not_to match(/too long/)
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
describe "caching behavior" do
|
|
213
|
+
it "caches validated expressions when enabled" do
|
|
214
|
+
RXerces.cache_xpath_validation = true
|
|
215
|
+
doc.xpath("//item")
|
|
216
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
it "does not cache when disabled" do
|
|
220
|
+
RXerces.cache_xpath_validation = false
|
|
221
|
+
doc.xpath("//item")
|
|
222
|
+
expect(RXerces.xpath_validation_cache_size).to eq(0)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it "reuses cached validations for identical expressions" do
|
|
226
|
+
# This is implicitly tested by the fact that repeated queries
|
|
227
|
+
# don't increase cache size
|
|
228
|
+
xpath = RXerces.xalan_enabled? ? "//item[@id='1']" : "//item"
|
|
229
|
+
5.times { doc.xpath(xpath) }
|
|
230
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
it "caches expressions from different documents" do
|
|
234
|
+
doc2 = RXerces::XML::Document.parse('<data><value/></data>')
|
|
235
|
+
|
|
236
|
+
doc.xpath("//item")
|
|
237
|
+
doc2.xpath("//value")
|
|
238
|
+
doc.xpath("//item") # Should hit cache
|
|
239
|
+
doc2.xpath("//value") # Should hit cache
|
|
240
|
+
|
|
241
|
+
expect(RXerces.xpath_validation_cache_size).to eq(2)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
it "caches expressions from node-level xpath calls" do
|
|
245
|
+
root = doc.root
|
|
246
|
+
root.xpath(".//item")
|
|
247
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
it "shares cache between document and node xpath calls" do
|
|
251
|
+
doc.xpath("//item")
|
|
252
|
+
doc.root.xpath("//item") # Same expression, should reuse cache
|
|
253
|
+
expect(RXerces.xpath_validation_cache_size).to eq(1)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it "respects max cache size" do
|
|
257
|
+
RXerces.xpath_validation_cache_max_size = 3
|
|
258
|
+
|
|
259
|
+
doc.xpath("//a")
|
|
260
|
+
doc.xpath("//b")
|
|
261
|
+
doc.xpath("//c")
|
|
262
|
+
initial_size = RXerces.xpath_validation_cache_size
|
|
263
|
+
|
|
264
|
+
# Cache should be at max
|
|
265
|
+
expect(initial_size).to eq(3)
|
|
266
|
+
|
|
267
|
+
# Additional expressions should not increase size beyond max
|
|
268
|
+
doc.xpath("//d")
|
|
269
|
+
doc.xpath("//e")
|
|
270
|
+
expect(RXerces.xpath_validation_cache_size).to eq(3)
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
it "uses LRU eviction when cache is full" do
|
|
274
|
+
RXerces.xpath_validation_cache_max_size = 3
|
|
275
|
+
|
|
276
|
+
# Add 3 expressions: //a is oldest, //c is newest
|
|
277
|
+
doc.xpath("//a")
|
|
278
|
+
doc.xpath("//b")
|
|
279
|
+
doc.xpath("//c")
|
|
280
|
+
|
|
281
|
+
# Access //a again to make it most recently used
|
|
282
|
+
# Now order is: //a (newest), //c, //b (oldest)
|
|
283
|
+
doc.xpath("//a")
|
|
284
|
+
|
|
285
|
+
# Add //d, which should evict //b (least recently used)
|
|
286
|
+
doc.xpath("//d")
|
|
287
|
+
expect(RXerces.xpath_validation_cache_size).to eq(3)
|
|
288
|
+
|
|
289
|
+
# //a should still be cached (was accessed recently)
|
|
290
|
+
# We can verify by checking that accessing it doesn't change cache size
|
|
291
|
+
doc.xpath("//a")
|
|
292
|
+
expect(RXerces.xpath_validation_cache_size).to eq(3)
|
|
293
|
+
|
|
294
|
+
# //b was evicted, adding it again should evict //c (now oldest)
|
|
295
|
+
doc.xpath("//b")
|
|
296
|
+
expect(RXerces.xpath_validation_cache_size).to eq(3)
|
|
297
|
+
|
|
298
|
+
# Cache should now contain: //b, //a, //d (in MRU order)
|
|
299
|
+
# //c was evicted
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
describe "thread safety" do
|
|
304
|
+
it "handles concurrent xpath queries without errors" do
|
|
305
|
+
threads = 10.times.map do |i|
|
|
306
|
+
Thread.new do
|
|
307
|
+
100.times do |j|
|
|
308
|
+
doc.xpath(unique_xpath("t#{i}", j))
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
expect { threads.each(&:join) }.not_to raise_error
|
|
314
|
+
end
|
|
315
|
+
|
|
316
|
+
it "handles concurrent cache configuration changes" do
|
|
317
|
+
threads = []
|
|
318
|
+
|
|
319
|
+
# Thread toggling cache on/off
|
|
320
|
+
threads << Thread.new do
|
|
321
|
+
50.times do
|
|
322
|
+
RXerces.cache_xpath_validation = false
|
|
323
|
+
RXerces.cache_xpath_validation = true
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Threads doing xpath queries
|
|
328
|
+
3.times do |i|
|
|
329
|
+
threads << Thread.new do
|
|
330
|
+
50.times do |j|
|
|
331
|
+
doc.xpath(unique_xpath("c#{i}", j))
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
expect { threads.each(&:join) }.not_to raise_error
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
it "handles concurrent cache clearing" do
|
|
340
|
+
threads = []
|
|
341
|
+
|
|
342
|
+
# Thread clearing cache periodically
|
|
343
|
+
threads << Thread.new do
|
|
344
|
+
20.times do
|
|
345
|
+
sleep(0.001)
|
|
346
|
+
RXerces.clear_xpath_validation_cache
|
|
347
|
+
end
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
# Threads doing xpath queries
|
|
351
|
+
3.times do |i|
|
|
352
|
+
threads << Thread.new do
|
|
353
|
+
100.times do |j|
|
|
354
|
+
doc.xpath(unique_xpath("r#{i}", j))
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
expect { threads.each(&:join) }.not_to raise_error
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
it "returns consistent cache size under concurrent access" do
|
|
363
|
+
# Fill cache with known expressions
|
|
364
|
+
10.times { |i| doc.xpath("//item#{i}") }
|
|
365
|
+
initial_size = RXerces.xpath_validation_cache_size
|
|
366
|
+
|
|
367
|
+
# Read cache size from multiple threads
|
|
368
|
+
sizes = []
|
|
369
|
+
mutex = Mutex.new
|
|
370
|
+
|
|
371
|
+
threads = 5.times.map do
|
|
372
|
+
Thread.new do
|
|
373
|
+
10.times do
|
|
374
|
+
size = RXerces.xpath_validation_cache_size
|
|
375
|
+
mutex.synchronize { sizes << size }
|
|
376
|
+
end
|
|
377
|
+
end
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
threads.each(&:join)
|
|
381
|
+
|
|
382
|
+
# All reads should return consistent values (either the initial
|
|
383
|
+
# size or values after any concurrent modifications)
|
|
384
|
+
expect(sizes).to all(be >= 0)
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
describe "interaction with validation" do
|
|
389
|
+
it "still validates expressions even when cached" do
|
|
390
|
+
# First call validates and caches
|
|
391
|
+
doc.xpath("//item")
|
|
392
|
+
|
|
393
|
+
# Invalid expression should still be rejected
|
|
394
|
+
expect {
|
|
395
|
+
doc.xpath("//item[@id=''] or 1=1")
|
|
396
|
+
}.to raise_error(ArgumentError)
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
it "caches valid expressions only" do
|
|
400
|
+
# Try an invalid expression
|
|
401
|
+
expect {
|
|
402
|
+
doc.xpath("")
|
|
403
|
+
}.to raise_error(ArgumentError)
|
|
404
|
+
|
|
405
|
+
# Cache should not have increased
|
|
406
|
+
expect(RXerces.xpath_validation_cache_size).to eq(0)
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
end
|