moxml 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +1 -1
- data/.github/workflows/rake.yml +16 -13
- data/.github/workflows/release.yml +1 -0
- data/.github/workflows/round-trip.yml +74 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +160 -38
- data/Gemfile +2 -1
- data/README.adoc +236 -0
- data/Rakefile +11 -0
- data/data/w3c_entities.json +2131 -0
- data/docs/ENTITY_SUPPORT_FOR_LUTAML_MODEL.md +102 -0
- data/docs/_pages/adapters/ox.adoc +30 -0
- data/docs/_pages/configuration.adoc +43 -0
- data/docs/_pages/node-api-reference.adoc +35 -0
- data/docs/_tutorials/namespace-handling.adoc +21 -0
- data/examples/rss_parser/rss_parser.rb +1 -3
- data/lib/moxml/adapter/base.rb +26 -2
- data/lib/moxml/adapter/headed_ox.rb +5 -4
- data/lib/moxml/adapter/libxml.rb +3 -2
- data/lib/moxml/adapter/nokogiri.rb +16 -3
- data/lib/moxml/adapter/oga.rb +124 -20
- data/lib/moxml/adapter/ox.rb +4 -3
- data/lib/moxml/adapter/rexml.rb +41 -7
- data/lib/moxml/builder.rb +6 -0
- data/lib/moxml/config.rb +52 -1
- data/lib/moxml/context.rb +21 -2
- data/lib/moxml/document.rb +6 -1
- data/lib/moxml/document_builder.rb +45 -1
- data/lib/moxml/element.rb +4 -3
- data/lib/moxml/entity_reference.rb +29 -0
- data/lib/moxml/entity_registry.rb +278 -0
- data/lib/moxml/node.rb +10 -8
- data/lib/moxml/node_set.rb +10 -6
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +25 -2
- data/lib/moxml.rb +1 -0
- data/spec/consistency/README.md +3 -1
- data/spec/consistency/round_trip_spec.rb +479 -0
- data/spec/examples/readme_examples_spec.rb +1 -1
- data/spec/fixtures/round-trips/metanorma/a.xml +66 -0
- data/spec/fixtures/round-trips/metanorma/bilingual-en.xml +7682 -0
- data/spec/fixtures/round-trips/metanorma/bilingual-fr.xml +7520 -0
- data/spec/fixtures/round-trips/metanorma/bilingual.presentation.xml +21211 -0
- data/spec/fixtures/round-trips/metanorma/collection1.xml +313 -0
- data/spec/fixtures/round-trips/metanorma/collection1nested.xml +291 -0
- data/spec/fixtures/round-trips/metanorma/collection_docinline.xml +544 -0
- data/spec/fixtures/round-trips/metanorma/collection_full.xml +1776 -0
- data/spec/fixtures/round-trips/metanorma/dummy.1.xml +295 -0
- data/spec/fixtures/round-trips/metanorma/dummy.xml +349 -0
- data/spec/fixtures/round-trips/metanorma/footnotes.xml +70 -0
- data/spec/fixtures/round-trips/metanorma/iho.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/rice-amd.final.xml +186 -0
- data/spec/fixtures/round-trips/metanorma/rice-amd.final_1.xml +180 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final.norepo.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final.xml +149 -0
- data/spec/fixtures/round-trips/metanorma/rice-en.final_1.xml +144 -0
- data/spec/fixtures/round-trips/metanorma/rice1-en.final.xml +120 -0
- data/spec/fixtures/round-trips/metanorma/rice2-en.final.xml +116 -0
- data/spec/fixtures/round-trips/metanorma/test_sectionsplit.xml +119 -0
- data/spec/fixtures/round-trips/niso-jats/bmj_sample.xml +1068 -0
- data/spec/fixtures/round-trips/niso-jats/element_citation.xml +7 -0
- data/spec/fixtures/round-trips/niso-jats/pnas_sample.xml +3768 -0
- data/spec/fixtures/round-trips/rfcxml/rfc8881.xml +45848 -0
- data/spec/fixtures/round-trips/rfcxml/rfc8994.xml +6607 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9000.xml +9064 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9043.xml +5527 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9051.xml +14286 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9110.xml +18156 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9260.xml +9136 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9293.xml +8300 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9380.xml +8916 -0
- data/spec/fixtures/round-trips/rfcxml/rfc9420.xml +8927 -0
- data/spec/fixtures/w3c/namespaces/1.0/001.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/002.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/003.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/004.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/005.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/006.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/007.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/008.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/009.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/010.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/011.xml +20 -0
- data/spec/fixtures/w3c/namespaces/1.0/012.xml +19 -0
- data/spec/fixtures/w3c/namespaces/1.0/013.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/014.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/015.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/016.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/017.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/018.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/019.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/020.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/021.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/022.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/023.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/024.xml +6 -0
- data/spec/fixtures/w3c/namespaces/1.0/025.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/026.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/027.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/028.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/029.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/030.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/031.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/032.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/033.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/034.xml +3 -0
- data/spec/fixtures/w3c/namespaces/1.0/035.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/036.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/037.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/038.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/039.xml +10 -0
- data/spec/fixtures/w3c/namespaces/1.0/040.xml +9 -0
- data/spec/fixtures/w3c/namespaces/1.0/041.xml +8 -0
- data/spec/fixtures/w3c/namespaces/1.0/042.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/043.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/044.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/045.xml +7 -0
- data/spec/fixtures/w3c/namespaces/1.0/046.xml +10 -0
- data/spec/fixtures/w3c/namespaces/1.0/047.xml +4 -0
- data/spec/fixtures/w3c/namespaces/1.0/048.xml +5 -0
- data/spec/fixtures/w3c/namespaces/1.0/LICENSE.md +32 -0
- data/spec/fixtures/w3c/namespaces/1.0/README.adoc +42 -0
- data/spec/fixtures/w3c/namespaces/1.0/rmt-ns10.xml +156 -0
- data/spec/integration/shared_examples/node_wrappers/element_behavior.rb +14 -0
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +14 -2
- data/spec/integration/shared_examples/w3c_namespace_examples.rb +10 -0
- data/spec/integration/w3c_namespace_spec.rb +69 -0
- data/spec/moxml/adapter/libxml_spec.rb +7 -1
- data/spec/moxml/adapter/oga_spec.rb +92 -0
- data/spec/moxml/config_spec.rb +75 -0
- data/spec/moxml/entity_registry_spec.rb +184 -0
- data/spec/moxml/error_spec.rb +2 -2
- data/spec/moxml/namespace_uri_validation_spec.rb +140 -0
- data/spec/moxml/xpath/axes_spec.rb +3 -4
- data/spec/performance/xpath_benchmark_spec.rb +6 -54
- data/spec/support/w3c_namespace_helpers.rb +41 -0
- data/spec/unit/rexml_isolated_test.rb +271 -0
- metadata +98 -2
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.describe "Namespace URI validation" do
|
|
4
|
+
let(:context) { Moxml.new }
|
|
5
|
+
let(:doc) { context.create_document }
|
|
6
|
+
let(:element) { doc.create_element("test") }
|
|
7
|
+
|
|
8
|
+
describe "RFC 3986 URI-reference validation" do
|
|
9
|
+
context "with valid absolute URIs" do
|
|
10
|
+
%w[
|
|
11
|
+
http://example.com
|
|
12
|
+
https://www.w3.org/2001/XMLSchema
|
|
13
|
+
urn:isbn:12345
|
|
14
|
+
ftp://ftp.is.co.za/rfc/rfc1808.txt
|
|
15
|
+
mailto:John.Doe@example.com
|
|
16
|
+
mailto:bar
|
|
17
|
+
tel:+1-816-555-1212
|
|
18
|
+
zarquon://example.org/namespace
|
|
19
|
+
http://example.org/namespace#apples
|
|
20
|
+
data:text/plain;base64,SGVsbG8=
|
|
21
|
+
tag:example.com,2000:test
|
|
22
|
+
].each do |uri|
|
|
23
|
+
it "accepts #{uri.inspect}" do
|
|
24
|
+
expect { element.add_namespace("ns", uri) }.not_to raise_error
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
context "with valid relative URI-references" do
|
|
30
|
+
%w[
|
|
31
|
+
my-custom-ns
|
|
32
|
+
../relative
|
|
33
|
+
namespaces/zaphod
|
|
34
|
+
path/to/resource
|
|
35
|
+
hello%20world
|
|
36
|
+
].each do |uri|
|
|
37
|
+
it "accepts #{uri.inspect}" do
|
|
38
|
+
expect { element.add_namespace("ns", uri) }.not_to raise_error
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
context "with valid fragment-only references" do
|
|
44
|
+
%w[
|
|
45
|
+
#fragment
|
|
46
|
+
#beeblebrox
|
|
47
|
+
].each do |uri|
|
|
48
|
+
it "accepts #{uri.inspect}" do
|
|
49
|
+
expect { element.add_namespace("ns", uri) }.not_to raise_error
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
context "with invalid URIs" do
|
|
55
|
+
[
|
|
56
|
+
"invalid uri",
|
|
57
|
+
"has space",
|
|
58
|
+
"two spaces",
|
|
59
|
+
].each do |uri|
|
|
60
|
+
it "rejects #{uri.inspect}" do
|
|
61
|
+
expect do
|
|
62
|
+
element.add_namespace("ns", uri)
|
|
63
|
+
end.to raise_error(Moxml::NamespaceError, /Invalid URI/)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
context "with control characters" do
|
|
69
|
+
[
|
|
70
|
+
"invalid\x00uri",
|
|
71
|
+
"bad\x01char",
|
|
72
|
+
"control\x1Fchar",
|
|
73
|
+
].each do |uri|
|
|
74
|
+
it "rejects URI containing control characters" do
|
|
75
|
+
expect do
|
|
76
|
+
element.add_namespace("ns", uri)
|
|
77
|
+
end.to raise_error(Moxml::NamespaceError)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
describe "empty URI constraint" do
|
|
84
|
+
it "accepts empty URI for default namespace undeclaration" do
|
|
85
|
+
expect { element.add_namespace(nil, "") }.not_to raise_error
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
it "rejects empty URI for prefixed namespace declarations" do
|
|
89
|
+
expect do
|
|
90
|
+
element.add_namespace("xs", "")
|
|
91
|
+
end.to raise_error(Moxml::NamespaceError, /empty URI/)
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
describe "lenient namespace_uri_mode" do
|
|
96
|
+
let(:context) do
|
|
97
|
+
Moxml.new do |config|
|
|
98
|
+
config.namespace_uri_mode = :lenient
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
context "with non-URI strings" do
|
|
103
|
+
[
|
|
104
|
+
"invalid uri",
|
|
105
|
+
"has space",
|
|
106
|
+
"two spaces",
|
|
107
|
+
"my custom namespace",
|
|
108
|
+
"not-a-uri but still valid in lenient mode",
|
|
109
|
+
].each do |uri|
|
|
110
|
+
it "accepts #{uri.inspect}" do
|
|
111
|
+
expect { element.add_namespace("ns", uri) }.not_to raise_error
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
context "with control characters" do
|
|
117
|
+
[
|
|
118
|
+
"invalid\x00uri",
|
|
119
|
+
"bad\x01char",
|
|
120
|
+
"control\x1Fchar",
|
|
121
|
+
].each do |uri|
|
|
122
|
+
it "still rejects #{uri.inspect}" do
|
|
123
|
+
expect do
|
|
124
|
+
element.add_namespace("ns", uri)
|
|
125
|
+
end.to raise_error(Moxml::NamespaceError)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "still rejects empty URI for prefixed namespace declarations" do
|
|
131
|
+
expect do
|
|
132
|
+
element.add_namespace("xs", "")
|
|
133
|
+
end.to raise_error(Moxml::NamespaceError, /empty URI/)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it "still accepts valid URIs" do
|
|
137
|
+
expect { element.add_namespace("ns", "http://example.com") }.not_to raise_error
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -36,13 +36,12 @@ RSpec.describe "XPath Axes" do
|
|
|
36
36
|
compiler.respond_to?(:"on_axis_#{axis}", true)
|
|
37
37
|
end
|
|
38
38
|
|
|
39
|
-
|
|
40
|
-
implemented.each { |a| puts " ✓ #{a}" }
|
|
39
|
+
implemented.each { |a| }
|
|
41
40
|
|
|
42
41
|
missing = axes - implemented
|
|
43
42
|
if missing.any?
|
|
44
|
-
|
|
45
|
-
missing.each { |a|
|
|
43
|
+
|
|
44
|
+
missing.each { |a| }
|
|
46
45
|
end
|
|
47
46
|
|
|
48
47
|
# Expect at least 6 axes (3 original + 3 new critical axes)
|
|
@@ -6,7 +6,7 @@ require "yaml"
|
|
|
6
6
|
require "fileutils"
|
|
7
7
|
require "time"
|
|
8
8
|
|
|
9
|
-
RSpec.describe "XPath Performance Benchmark" do
|
|
9
|
+
RSpec.describe "XPath Performance Benchmark", :performance do
|
|
10
10
|
if ENV["SKIP_BENCHMARKS"]
|
|
11
11
|
it "skips benchmarks when SKIP_BENCHMARKS is set" do
|
|
12
12
|
skip "Benchmarks skipped. To run benchmarks, use: bundle exec rspec " \
|
|
@@ -59,14 +59,7 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
59
59
|
|
|
60
60
|
describe "XPath query performance" do
|
|
61
61
|
it "benchmarks XPath operations across all adapters" do
|
|
62
|
-
|
|
63
|
-
puts "XPath Performance Benchmark - All Adapters"
|
|
64
|
-
puts "=" * 80
|
|
65
|
-
|
|
66
|
-
xpath_patterns.each do |pattern_name, xpath|
|
|
67
|
-
puts "\nPattern: #{pattern_name}"
|
|
68
|
-
puts "-" * 80
|
|
69
|
-
|
|
62
|
+
xpath_patterns.each_value do |xpath|
|
|
70
63
|
Benchmark.ips do |x|
|
|
71
64
|
x.config(time: 5, warmup: 2)
|
|
72
65
|
|
|
@@ -83,20 +76,10 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
83
76
|
|
|
84
77
|
x.compare!
|
|
85
78
|
end
|
|
86
|
-
|
|
87
|
-
puts "\n"
|
|
88
79
|
end
|
|
89
|
-
|
|
90
|
-
puts "=" * 80
|
|
91
|
-
puts "Benchmark complete"
|
|
92
|
-
puts "=" * 80
|
|
93
80
|
end
|
|
94
81
|
|
|
95
82
|
it "generates detailed performance comparison table" do
|
|
96
|
-
puts "\n#{'=' * 80}"
|
|
97
|
-
puts "Detailed XPath Performance Comparison"
|
|
98
|
-
puts "=" * 80
|
|
99
|
-
|
|
100
83
|
results_table = {}
|
|
101
84
|
|
|
102
85
|
xpath_patterns.each do |pattern_name, xpath|
|
|
@@ -121,28 +104,17 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
121
104
|
end
|
|
122
105
|
end
|
|
123
106
|
|
|
124
|
-
puts "\nResults (operations per second):"
|
|
125
|
-
puts "-" * 80
|
|
126
|
-
|
|
127
107
|
adapters.each do |adapter|
|
|
128
|
-
puts "\n#{adapter.to_s.capitalize}:"
|
|
129
108
|
xpath_patterns.each_key do |pattern_name|
|
|
130
109
|
ops = results_table[pattern_name][adapter]
|
|
131
110
|
if ops
|
|
132
|
-
|
|
133
|
-
else
|
|
134
|
-
puts " #{pattern_name}: Not supported"
|
|
111
|
+
|
|
135
112
|
end
|
|
136
113
|
end
|
|
137
114
|
end
|
|
138
115
|
|
|
139
|
-
puts "\n#{'=' * 80}"
|
|
140
|
-
puts "Relative Performance (fastest = 1.0x baseline):"
|
|
141
|
-
puts "-" * 80
|
|
142
|
-
|
|
143
116
|
relative_results = {}
|
|
144
117
|
xpath_patterns.each_key do |pattern_name|
|
|
145
|
-
puts "\n#{pattern_name}:"
|
|
146
118
|
valid_results = results_table[pattern_name].compact
|
|
147
119
|
next if valid_results.empty?
|
|
148
120
|
|
|
@@ -152,11 +124,10 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
152
124
|
if ops
|
|
153
125
|
relative = ops / fastest
|
|
154
126
|
relative_results[pattern_name][adapter] = relative
|
|
155
|
-
|
|
156
|
-
"(#{ops.round(2)} ops/sec)"
|
|
127
|
+
|
|
157
128
|
else
|
|
158
129
|
relative_results[pattern_name][adapter] = nil
|
|
159
|
-
|
|
130
|
+
|
|
160
131
|
end
|
|
161
132
|
end
|
|
162
133
|
end
|
|
@@ -187,12 +158,6 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
187
158
|
}
|
|
188
159
|
|
|
189
160
|
File.write(output_file, YAML.dump(yaml_data))
|
|
190
|
-
|
|
191
|
-
puts "\n#{'=' * 80}"
|
|
192
|
-
puts "Results saved to: #{output_file}"
|
|
193
|
-
puts "=" * 80
|
|
194
|
-
puts "Test complete - see output above for results"
|
|
195
|
-
puts "=" * 80
|
|
196
161
|
end
|
|
197
162
|
end
|
|
198
163
|
|
|
@@ -221,18 +186,9 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
221
186
|
end
|
|
222
187
|
|
|
223
188
|
it "benchmarks namespace-aware XPath" do
|
|
224
|
-
puts "\n#{'=' * 80}"
|
|
225
|
-
puts "Namespace-Aware XPath Performance"
|
|
226
|
-
puts "=" * 80
|
|
227
|
-
puts "\nNote: REXML and Ox do not support namespace-aware XPath"
|
|
228
|
-
puts "-" * 80
|
|
229
|
-
|
|
230
189
|
namespace_capable_adapters = %i[nokogiri libxml oga]
|
|
231
190
|
|
|
232
|
-
namespace_patterns.
|
|
233
|
-
puts "\nPattern: #{pattern_name}"
|
|
234
|
-
puts "-" * 80
|
|
235
|
-
|
|
191
|
+
namespace_patterns.each_value do |(xpath, namespaces)|
|
|
236
192
|
Benchmark.ips do |x|
|
|
237
193
|
x.config(time: 5, warmup: 2)
|
|
238
194
|
|
|
@@ -248,11 +204,7 @@ RSpec.describe "XPath Performance Benchmark" do
|
|
|
248
204
|
|
|
249
205
|
x.compare!
|
|
250
206
|
end
|
|
251
|
-
|
|
252
|
-
puts "\n"
|
|
253
207
|
end
|
|
254
|
-
|
|
255
|
-
puts "=" * 80
|
|
256
208
|
end
|
|
257
209
|
end
|
|
258
210
|
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rexml/document"
|
|
4
|
+
|
|
5
|
+
W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0", __dir__)
|
|
6
|
+
|
|
7
|
+
# Parse the test catalog to get test metadata
|
|
8
|
+
def load_w3c_namespace_tests
|
|
9
|
+
catalog = File.read(File.join(W3C_NS_FIXTURES_DIR, "rmt-ns10.xml"))
|
|
10
|
+
doc = REXML::Document.new(catalog)
|
|
11
|
+
tests = []
|
|
12
|
+
doc.elements.each("TESTCASES/TEST") do |test_el|
|
|
13
|
+
tests << {
|
|
14
|
+
id: test_el.attributes["ID"],
|
|
15
|
+
uri: test_el.attributes["URI"],
|
|
16
|
+
type: test_el.attributes["TYPE"],
|
|
17
|
+
sections: test_el.attributes["SECTIONS"],
|
|
18
|
+
description: test_el.text.strip,
|
|
19
|
+
}
|
|
20
|
+
end
|
|
21
|
+
tests
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
W3C_NAMESPACE_TESTS = load_w3c_namespace_tests
|
|
25
|
+
|
|
26
|
+
# Known adapter-level limitations for specific tests.
|
|
27
|
+
# These are parser bugs/limitations, not moxml issues.
|
|
28
|
+
ADAPTER_SKIP_TESTS = {
|
|
29
|
+
# Test 006: ISO-8859-1 encoded IRI — adapters receive binary-read content
|
|
30
|
+
# and may fail on encoding before namespace processing begins.
|
|
31
|
+
"rmt-ns10-006" => :all,
|
|
32
|
+
# Test 047: DOCTYPE with colon in element name — Oga parser limitation.
|
|
33
|
+
"ht-ns10-047" => [:oga],
|
|
34
|
+
}.freeze
|
|
35
|
+
|
|
36
|
+
def skip_for_adapter?(test_id, adapter)
|
|
37
|
+
skip_config = ADAPTER_SKIP_TESTS[test_id]
|
|
38
|
+
return false unless skip_config
|
|
39
|
+
|
|
40
|
+
skip_config == :all || skip_config.include?(adapter)
|
|
41
|
+
end
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "rspec"
|
|
4
|
+
require "moxml"
|
|
5
|
+
|
|
6
|
+
RSpec.describe "REXML Adapter Isolated Test" do
|
|
7
|
+
let(:rexml_context) { Moxml.new(:rexml) }
|
|
8
|
+
|
|
9
|
+
describe "text extraction behavior" do
|
|
10
|
+
it "extracts simple text correctly" do
|
|
11
|
+
xml = <<~XML
|
|
12
|
+
<root>Hello World</root>
|
|
13
|
+
XML
|
|
14
|
+
|
|
15
|
+
doc = rexml_context.parse(xml.dup)
|
|
16
|
+
text = doc.root.text
|
|
17
|
+
|
|
18
|
+
expect(text).to eq("Hello World")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "demonstrates BMJBMJ concatenation issue - should NOT add spaces" do
|
|
22
|
+
xml = <<~XML
|
|
23
|
+
<root>
|
|
24
|
+
<journal>BMJ</journal>
|
|
25
|
+
<journal>BMJ</journal>
|
|
26
|
+
</root>
|
|
27
|
+
XML
|
|
28
|
+
|
|
29
|
+
doc = rexml_context.parse(xml.dup)
|
|
30
|
+
text = doc.root.text
|
|
31
|
+
|
|
32
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
33
|
+
# Other adapters produce: "BMJBMJ"
|
|
34
|
+
# REXML currently produces: "BMJ BMJ" (with space)
|
|
35
|
+
# For round-trip compatibility, REXML should produce "BMJBMJ"
|
|
36
|
+
expect(text).to eq("BMJBMJ")
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
it "demonstrates mixed case transition issue - should NOT add spaces" do
|
|
40
|
+
xml = <<~XML
|
|
41
|
+
<root>
|
|
42
|
+
<issn>0959-8138</issn>
|
|
43
|
+
<publisher>BMJ</publisher>
|
|
44
|
+
<author>j</author>
|
|
45
|
+
</root>
|
|
46
|
+
XML
|
|
47
|
+
|
|
48
|
+
doc = rexml_context.parse(xml.dup)
|
|
49
|
+
text = doc.root.text
|
|
50
|
+
|
|
51
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
52
|
+
# Other adapters produce: "0959-8138BMJj"
|
|
53
|
+
# REXML currently produces: "0959-8138 BMJ j" (with spaces)
|
|
54
|
+
# For round-trip compatibility, REXML should produce "0959-8138BMJj"
|
|
55
|
+
expect(text).to eq("0959-8138BMJj")
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
it "demonstrates digit transition issue - should NOT add spaces" do
|
|
59
|
+
xml = <<~XML
|
|
60
|
+
<root>
|
|
61
|
+
<volume>324</volume>
|
|
62
|
+
<issue>i7342</issue>
|
|
63
|
+
<page>pg880</page>
|
|
64
|
+
<id>11950738</id>
|
|
65
|
+
</root>
|
|
66
|
+
XML
|
|
67
|
+
|
|
68
|
+
doc = rexml_context.parse(xml.dup)
|
|
69
|
+
text = doc.root.text
|
|
70
|
+
|
|
71
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
72
|
+
# Other adapters produce: "324i7342pg88011950738"
|
|
73
|
+
# REXML currently produces: "324 i7342 pg880 11950738" (with spaces)
|
|
74
|
+
# For round-trip compatibility, REXML should produce "324i7342pg88011950738"
|
|
75
|
+
expect(text).to eq("324i7342pg88011950738")
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "demonstrates word boundary issue - should NOT add spaces" do
|
|
79
|
+
xml = <<~XML
|
|
80
|
+
<root>
|
|
81
|
+
<article-type>version-of-record</article-type>
|
|
82
|
+
<title>Primary</title>
|
|
83
|
+
</root>
|
|
84
|
+
XML
|
|
85
|
+
|
|
86
|
+
doc = rexml_context.parse(xml.dup)
|
|
87
|
+
text = doc.root.text
|
|
88
|
+
|
|
89
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
90
|
+
# Other adapters produce: "version-of-recordPrimary"
|
|
91
|
+
# REXML currently produces: "version-of-record Primary" (with space)
|
|
92
|
+
# For round-trip compatibility, REXML should produce "version-of-recordPrimary"
|
|
93
|
+
expect(text).to eq("version-of-recordPrimary")
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "demonstrates complex mixed content issue - should NOT add spaces" do
|
|
97
|
+
xml = <<~XML
|
|
98
|
+
<root>
|
|
99
|
+
<section>Primary</section>
|
|
100
|
+
<year>190</year>
|
|
101
|
+
<page>102</page>
|
|
102
|
+
<id>18219355357</id>
|
|
103
|
+
</root>
|
|
104
|
+
XML
|
|
105
|
+
|
|
106
|
+
doc = rexml_context.parse(xml.dup)
|
|
107
|
+
text = doc.root.text
|
|
108
|
+
|
|
109
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
110
|
+
# Other adapters produce: "Primary19010218219355357"
|
|
111
|
+
# REXML currently produces: "Primary 190 102 18219355357" (with spaces)
|
|
112
|
+
# For round-trip compatibility, REXML should produce "Primary19010218219355357"
|
|
113
|
+
expect(text).to eq("Primary19010218219355357")
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
it "demonstrates all patterns together - should NOT add spaces" do
|
|
117
|
+
xml = <<~XML
|
|
118
|
+
<root>
|
|
119
|
+
<journal>BMJ</journal>
|
|
120
|
+
<journal>BMJ</journal>
|
|
121
|
+
<issn>0959-8138</issn>
|
|
122
|
+
<publisher>BMJ</publisher>
|
|
123
|
+
<author>j</author>
|
|
124
|
+
<volume>324</volume>
|
|
125
|
+
<issue>i7342</issue>
|
|
126
|
+
<page>pg880</page>
|
|
127
|
+
<id>11950738</id>
|
|
128
|
+
<article-type>version-of-record</article-type>
|
|
129
|
+
<section>Primary</section>
|
|
130
|
+
<year>190</year>
|
|
131
|
+
<page>102</page>
|
|
132
|
+
<id>18219355357</id>
|
|
133
|
+
</root>
|
|
134
|
+
XML
|
|
135
|
+
|
|
136
|
+
doc = rexml_context.parse(xml.dup)
|
|
137
|
+
text = doc.root.text
|
|
138
|
+
|
|
139
|
+
# This should FAIL to demonstrate the round-trip issue
|
|
140
|
+
# Other adapters produce: "BMJBMJ0959-8138BMJj324i7342pg88011950738version-of-recordPrimary19010218219355357"
|
|
141
|
+
# REXML currently produces: "BMJ BMJ 0959-8138 BMJ j 324 i7342 pg880 11950738 version-of-record Primary 190 102 18219355357" (with spaces)
|
|
142
|
+
# For round-trip compatibility, REXML should produce concatenated version
|
|
143
|
+
expect(text).to eq("BMJBMJ0959-8138BMJj324i7342pg88011950738version-of-recordPrimary19010218219355357")
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
it "demonstrates specific round-trip failure patterns - BMJ.v" do
|
|
147
|
+
xml = <<~XML
|
|
148
|
+
<root>
|
|
149
|
+
<journal>BMJ</journal>
|
|
150
|
+
<volume>v</volume>
|
|
151
|
+
<number>324</number>
|
|
152
|
+
</root>
|
|
153
|
+
XML
|
|
154
|
+
|
|
155
|
+
doc = rexml_context.parse(xml.dup)
|
|
156
|
+
text = doc.root.text
|
|
157
|
+
|
|
158
|
+
# Based on actual adapter behavior: both nokogiri and rexml produce "BMJv324"
|
|
159
|
+
# The test expectation was wrong - should expect "BMJv324" not "BMJ.v324"
|
|
160
|
+
expect(text).to eq("BMJv324")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
it "demonstrates specific round-trip failure patterns - i7342.pg" do
|
|
164
|
+
xml = <<~XML
|
|
165
|
+
<root>
|
|
166
|
+
<issue>i7342</issue>
|
|
167
|
+
<page>pg880</page>
|
|
168
|
+
</root>
|
|
169
|
+
XML
|
|
170
|
+
|
|
171
|
+
doc = rexml_context.parse(xml.dup)
|
|
172
|
+
text = doc.root.text
|
|
173
|
+
|
|
174
|
+
# Based on actual adapter behavior: both nokogiri and rexml produce "i7342pg880"
|
|
175
|
+
# The test expectation was wrong - should expect "i7342pg880" not "i7342.pg880"
|
|
176
|
+
expect(text).to eq("i7342pg880")
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
it "demonstrates specific round-trip failure patterns - 190102" do
|
|
180
|
+
xml = <<~XML
|
|
181
|
+
<root>
|
|
182
|
+
<year>190</year>
|
|
183
|
+
<page>102</page>
|
|
184
|
+
</root>
|
|
185
|
+
XML
|
|
186
|
+
|
|
187
|
+
doc = rexml_context.parse(xml.dup)
|
|
188
|
+
text = doc.root.text
|
|
189
|
+
|
|
190
|
+
# Based on round-trip failure: expected "190102" but got "190 102"
|
|
191
|
+
expect(text).to eq("190102")
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
it "demonstrates round-trip failure pattern - bmj BMJ" do
|
|
195
|
+
xml = <<~XML
|
|
196
|
+
<root>
|
|
197
|
+
<journal>bmj</journal>
|
|
198
|
+
<journal>BMJ</journal>
|
|
199
|
+
</root>
|
|
200
|
+
XML
|
|
201
|
+
|
|
202
|
+
doc = rexml_context.parse(xml.dup)
|
|
203
|
+
text = doc.root.text
|
|
204
|
+
|
|
205
|
+
# Based on round-trip failure: expected "bmjBMJ" but got "bmj BMJ"
|
|
206
|
+
expect(text).to eq("bmjBMJ")
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
it "demonstrates round-trip failure pattern - 8138 BMJ" do
|
|
210
|
+
xml = <<~XML
|
|
211
|
+
<root>
|
|
212
|
+
<issn>8138</issn>
|
|
213
|
+
<publisher>BMJ</publisher>
|
|
214
|
+
</root>
|
|
215
|
+
XML
|
|
216
|
+
|
|
217
|
+
doc = rexml_context.parse(xml.dup)
|
|
218
|
+
text = doc.root.text
|
|
219
|
+
|
|
220
|
+
# Based on round-trip failure: expected "8138BMJ" but got "8138 BMJ"
|
|
221
|
+
expect(text).to eq("8138BMJ")
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "demonstrates round-trip failure pattern - BMJ.v 324" do
|
|
225
|
+
xml = <<~XML
|
|
226
|
+
<root>
|
|
227
|
+
<journal>BMJ</journal>
|
|
228
|
+
<volume>v</volume>
|
|
229
|
+
<number>324</number>
|
|
230
|
+
</root>
|
|
231
|
+
XML
|
|
232
|
+
|
|
233
|
+
doc = rexml_context.parse(xml.dup)
|
|
234
|
+
text = doc.root.text
|
|
235
|
+
|
|
236
|
+
# Based on actual adapter behavior: both nokogiri and rexml produce "BMJv324"
|
|
237
|
+
# The test expectation was wrong - should expect "BMJv324" not "BMJ.v324"
|
|
238
|
+
expect(text).to eq("BMJv324")
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
it "demonstrates round-trip failure pattern - 7342 880" do
|
|
242
|
+
xml = <<~XML
|
|
243
|
+
<root>
|
|
244
|
+
<issue>7342</issue>
|
|
245
|
+
<page>880</page>
|
|
246
|
+
</root>
|
|
247
|
+
XML
|
|
248
|
+
|
|
249
|
+
doc = rexml_context.parse(xml.dup)
|
|
250
|
+
text = doc.root.text
|
|
251
|
+
|
|
252
|
+
# Based on round-trip failure: expected "7342880" but got "7342 880"
|
|
253
|
+
expect(text).to eq("7342880")
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
it "demonstrates round-trip failure pattern - version-of-record Primary" do
|
|
257
|
+
xml = <<~XML
|
|
258
|
+
<root>
|
|
259
|
+
<article-type>version-of-record</article-type>
|
|
260
|
+
<section>Primary</section>
|
|
261
|
+
</root>
|
|
262
|
+
XML
|
|
263
|
+
|
|
264
|
+
doc = rexml_context.parse(xml.dup)
|
|
265
|
+
text = doc.root.text
|
|
266
|
+
|
|
267
|
+
# Based on round-trip failure: expected "version-of-recordPrimary" but got "version-of-record Primary"
|
|
268
|
+
expect(text).to eq("version-of-recordPrimary")
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
end
|