moxml 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +1 -1
  3. data/.github/workflows/rake.yml +16 -13
  4. data/.github/workflows/release.yml +1 -0
  5. data/.github/workflows/round-trip.yml +74 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +1 -0
  8. data/.rubocop_todo.yml +160 -38
  9. data/Gemfile +2 -1
  10. data/README.adoc +236 -0
  11. data/Rakefile +11 -0
  12. data/data/w3c_entities.json +2131 -0
  13. data/docs/ENTITY_SUPPORT_FOR_LUTAML_MODEL.md +102 -0
  14. data/docs/_pages/adapters/ox.adoc +30 -0
  15. data/docs/_pages/configuration.adoc +43 -0
  16. data/docs/_pages/node-api-reference.adoc +35 -0
  17. data/docs/_tutorials/namespace-handling.adoc +21 -0
  18. data/examples/rss_parser/rss_parser.rb +1 -3
  19. data/lib/moxml/adapter/base.rb +26 -2
  20. data/lib/moxml/adapter/headed_ox.rb +5 -4
  21. data/lib/moxml/adapter/libxml.rb +3 -2
  22. data/lib/moxml/adapter/nokogiri.rb +16 -3
  23. data/lib/moxml/adapter/oga.rb +124 -20
  24. data/lib/moxml/adapter/ox.rb +4 -3
  25. data/lib/moxml/adapter/rexml.rb +41 -7
  26. data/lib/moxml/builder.rb +6 -0
  27. data/lib/moxml/config.rb +52 -1
  28. data/lib/moxml/context.rb +21 -2
  29. data/lib/moxml/document.rb +6 -1
  30. data/lib/moxml/document_builder.rb +45 -1
  31. data/lib/moxml/element.rb +4 -3
  32. data/lib/moxml/entity_reference.rb +29 -0
  33. data/lib/moxml/entity_registry.rb +278 -0
  34. data/lib/moxml/node.rb +10 -8
  35. data/lib/moxml/node_set.rb +10 -6
  36. data/lib/moxml/version.rb +1 -1
  37. data/lib/moxml/xml_utils.rb +25 -2
  38. data/lib/moxml.rb +1 -0
  39. data/spec/consistency/README.md +3 -1
  40. data/spec/consistency/round_trip_spec.rb +479 -0
  41. data/spec/examples/readme_examples_spec.rb +1 -1
  42. data/spec/fixtures/round-trips/metanorma/a.xml +66 -0
  43. data/spec/fixtures/round-trips/metanorma/bilingual-en.xml +7682 -0
  44. data/spec/fixtures/round-trips/metanorma/bilingual-fr.xml +7520 -0
  45. data/spec/fixtures/round-trips/metanorma/bilingual.presentation.xml +21211 -0
  46. data/spec/fixtures/round-trips/metanorma/collection1.xml +313 -0
  47. data/spec/fixtures/round-trips/metanorma/collection1nested.xml +291 -0
  48. data/spec/fixtures/round-trips/metanorma/collection_docinline.xml +544 -0
  49. data/spec/fixtures/round-trips/metanorma/collection_full.xml +1776 -0
  50. data/spec/fixtures/round-trips/metanorma/dummy.1.xml +295 -0
  51. data/spec/fixtures/round-trips/metanorma/dummy.xml +349 -0
  52. data/spec/fixtures/round-trips/metanorma/footnotes.xml +70 -0
  53. data/spec/fixtures/round-trips/metanorma/iho.xml +116 -0
  54. data/spec/fixtures/round-trips/metanorma/rice-amd.final.xml +186 -0
  55. data/spec/fixtures/round-trips/metanorma/rice-amd.final_1.xml +180 -0
  56. data/spec/fixtures/round-trips/metanorma/rice-en.final.norepo.xml +116 -0
  57. data/spec/fixtures/round-trips/metanorma/rice-en.final.xml +149 -0
  58. data/spec/fixtures/round-trips/metanorma/rice-en.final_1.xml +144 -0
  59. data/spec/fixtures/round-trips/metanorma/rice1-en.final.xml +120 -0
  60. data/spec/fixtures/round-trips/metanorma/rice2-en.final.xml +116 -0
  61. data/spec/fixtures/round-trips/metanorma/test_sectionsplit.xml +119 -0
  62. data/spec/fixtures/round-trips/niso-jats/bmj_sample.xml +1068 -0
  63. data/spec/fixtures/round-trips/niso-jats/element_citation.xml +7 -0
  64. data/spec/fixtures/round-trips/niso-jats/pnas_sample.xml +3768 -0
  65. data/spec/fixtures/round-trips/rfcxml/rfc8881.xml +45848 -0
  66. data/spec/fixtures/round-trips/rfcxml/rfc8994.xml +6607 -0
  67. data/spec/fixtures/round-trips/rfcxml/rfc9000.xml +9064 -0
  68. data/spec/fixtures/round-trips/rfcxml/rfc9043.xml +5527 -0
  69. data/spec/fixtures/round-trips/rfcxml/rfc9051.xml +14286 -0
  70. data/spec/fixtures/round-trips/rfcxml/rfc9110.xml +18156 -0
  71. data/spec/fixtures/round-trips/rfcxml/rfc9260.xml +9136 -0
  72. data/spec/fixtures/round-trips/rfcxml/rfc9293.xml +8300 -0
  73. data/spec/fixtures/round-trips/rfcxml/rfc9380.xml +8916 -0
  74. data/spec/fixtures/round-trips/rfcxml/rfc9420.xml +8927 -0
  75. data/spec/fixtures/w3c/namespaces/1.0/001.xml +7 -0
  76. data/spec/fixtures/w3c/namespaces/1.0/002.xml +8 -0
  77. data/spec/fixtures/w3c/namespaces/1.0/003.xml +7 -0
  78. data/spec/fixtures/w3c/namespaces/1.0/004.xml +7 -0
  79. data/spec/fixtures/w3c/namespaces/1.0/005.xml +7 -0
  80. data/spec/fixtures/w3c/namespaces/1.0/006.xml +7 -0
  81. data/spec/fixtures/w3c/namespaces/1.0/007.xml +20 -0
  82. data/spec/fixtures/w3c/namespaces/1.0/008.xml +20 -0
  83. data/spec/fixtures/w3c/namespaces/1.0/009.xml +19 -0
  84. data/spec/fixtures/w3c/namespaces/1.0/010.xml +19 -0
  85. data/spec/fixtures/w3c/namespaces/1.0/011.xml +20 -0
  86. data/spec/fixtures/w3c/namespaces/1.0/012.xml +19 -0
  87. data/spec/fixtures/w3c/namespaces/1.0/013.xml +5 -0
  88. data/spec/fixtures/w3c/namespaces/1.0/014.xml +3 -0
  89. data/spec/fixtures/w3c/namespaces/1.0/015.xml +3 -0
  90. data/spec/fixtures/w3c/namespaces/1.0/016.xml +3 -0
  91. data/spec/fixtures/w3c/namespaces/1.0/017.xml +3 -0
  92. data/spec/fixtures/w3c/namespaces/1.0/018.xml +3 -0
  93. data/spec/fixtures/w3c/namespaces/1.0/019.xml +3 -0
  94. data/spec/fixtures/w3c/namespaces/1.0/020.xml +3 -0
  95. data/spec/fixtures/w3c/namespaces/1.0/021.xml +6 -0
  96. data/spec/fixtures/w3c/namespaces/1.0/022.xml +6 -0
  97. data/spec/fixtures/w3c/namespaces/1.0/023.xml +6 -0
  98. data/spec/fixtures/w3c/namespaces/1.0/024.xml +6 -0
  99. data/spec/fixtures/w3c/namespaces/1.0/025.xml +3 -0
  100. data/spec/fixtures/w3c/namespaces/1.0/026.xml +3 -0
  101. data/spec/fixtures/w3c/namespaces/1.0/027.xml +3 -0
  102. data/spec/fixtures/w3c/namespaces/1.0/028.xml +3 -0
  103. data/spec/fixtures/w3c/namespaces/1.0/029.xml +4 -0
  104. data/spec/fixtures/w3c/namespaces/1.0/030.xml +4 -0
  105. data/spec/fixtures/w3c/namespaces/1.0/031.xml +4 -0
  106. data/spec/fixtures/w3c/namespaces/1.0/032.xml +5 -0
  107. data/spec/fixtures/w3c/namespaces/1.0/033.xml +4 -0
  108. data/spec/fixtures/w3c/namespaces/1.0/034.xml +3 -0
  109. data/spec/fixtures/w3c/namespaces/1.0/035.xml +8 -0
  110. data/spec/fixtures/w3c/namespaces/1.0/036.xml +8 -0
  111. data/spec/fixtures/w3c/namespaces/1.0/037.xml +8 -0
  112. data/spec/fixtures/w3c/namespaces/1.0/038.xml +8 -0
  113. data/spec/fixtures/w3c/namespaces/1.0/039.xml +10 -0
  114. data/spec/fixtures/w3c/namespaces/1.0/040.xml +9 -0
  115. data/spec/fixtures/w3c/namespaces/1.0/041.xml +8 -0
  116. data/spec/fixtures/w3c/namespaces/1.0/042.xml +4 -0
  117. data/spec/fixtures/w3c/namespaces/1.0/043.xml +7 -0
  118. data/spec/fixtures/w3c/namespaces/1.0/044.xml +7 -0
  119. data/spec/fixtures/w3c/namespaces/1.0/045.xml +7 -0
  120. data/spec/fixtures/w3c/namespaces/1.0/046.xml +10 -0
  121. data/spec/fixtures/w3c/namespaces/1.0/047.xml +4 -0
  122. data/spec/fixtures/w3c/namespaces/1.0/048.xml +5 -0
  123. data/spec/fixtures/w3c/namespaces/1.0/LICENSE.md +32 -0
  124. data/spec/fixtures/w3c/namespaces/1.0/README.adoc +42 -0
  125. data/spec/fixtures/w3c/namespaces/1.0/rmt-ns10.xml +156 -0
  126. data/spec/integration/shared_examples/node_wrappers/element_behavior.rb +14 -0
  127. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +14 -2
  128. data/spec/integration/shared_examples/w3c_namespace_examples.rb +10 -0
  129. data/spec/integration/w3c_namespace_spec.rb +69 -0
  130. data/spec/moxml/adapter/libxml_spec.rb +7 -1
  131. data/spec/moxml/adapter/oga_spec.rb +92 -0
  132. data/spec/moxml/config_spec.rb +75 -0
  133. data/spec/moxml/entity_registry_spec.rb +184 -0
  134. data/spec/moxml/error_spec.rb +2 -2
  135. data/spec/moxml/namespace_uri_validation_spec.rb +140 -0
  136. data/spec/moxml/xpath/axes_spec.rb +3 -4
  137. data/spec/performance/xpath_benchmark_spec.rb +6 -54
  138. data/spec/support/w3c_namespace_helpers.rb +41 -0
  139. data/spec/unit/rexml_isolated_test.rb +271 -0
  140. metadata +98 -2
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe "Namespace URI validation" do
4
+ let(:context) { Moxml.new }
5
+ let(:doc) { context.create_document }
6
+ let(:element) { doc.create_element("test") }
7
+
8
+ describe "RFC 3986 URI-reference validation" do
9
+ context "with valid absolute URIs" do
10
+ %w[
11
+ http://example.com
12
+ https://www.w3.org/2001/XMLSchema
13
+ urn:isbn:12345
14
+ ftp://ftp.is.co.za/rfc/rfc1808.txt
15
+ mailto:John.Doe@example.com
16
+ mailto:bar
17
+ tel:+1-816-555-1212
18
+ zarquon://example.org/namespace
19
+ http://example.org/namespace#apples
20
+ data:text/plain;base64,SGVsbG8=
21
+ tag:example.com,2000:test
22
+ ].each do |uri|
23
+ it "accepts #{uri.inspect}" do
24
+ expect { element.add_namespace("ns", uri) }.not_to raise_error
25
+ end
26
+ end
27
+ end
28
+
29
+ context "with valid relative URI-references" do
30
+ %w[
31
+ my-custom-ns
32
+ ../relative
33
+ namespaces/zaphod
34
+ path/to/resource
35
+ hello%20world
36
+ ].each do |uri|
37
+ it "accepts #{uri.inspect}" do
38
+ expect { element.add_namespace("ns", uri) }.not_to raise_error
39
+ end
40
+ end
41
+ end
42
+
43
+ context "with valid fragment-only references" do
44
+ %w[
45
+ #fragment
46
+ #beeblebrox
47
+ ].each do |uri|
48
+ it "accepts #{uri.inspect}" do
49
+ expect { element.add_namespace("ns", uri) }.not_to raise_error
50
+ end
51
+ end
52
+ end
53
+
54
+ context "with invalid URIs" do
55
+ [
56
+ "invalid uri",
57
+ "has space",
58
+ "two spaces",
59
+ ].each do |uri|
60
+ it "rejects #{uri.inspect}" do
61
+ expect do
62
+ element.add_namespace("ns", uri)
63
+ end.to raise_error(Moxml::NamespaceError, /Invalid URI/)
64
+ end
65
+ end
66
+ end
67
+
68
+ context "with control characters" do
69
+ [
70
+ "invalid\x00uri",
71
+ "bad\x01char",
72
+ "control\x1Fchar",
73
+ ].each do |uri|
74
+ it "rejects URI containing control characters" do
75
+ expect do
76
+ element.add_namespace("ns", uri)
77
+ end.to raise_error(Moxml::NamespaceError)
78
+ end
79
+ end
80
+ end
81
+ end
82
+
83
+ describe "empty URI constraint" do
84
+ it "accepts empty URI for default namespace undeclaration" do
85
+ expect { element.add_namespace(nil, "") }.not_to raise_error
86
+ end
87
+
88
+ it "rejects empty URI for prefixed namespace declarations" do
89
+ expect do
90
+ element.add_namespace("xs", "")
91
+ end.to raise_error(Moxml::NamespaceError, /empty URI/)
92
+ end
93
+ end
94
+
95
+ describe "lenient namespace_uri_mode" do
96
+ let(:context) do
97
+ Moxml.new do |config|
98
+ config.namespace_uri_mode = :lenient
99
+ end
100
+ end
101
+
102
+ context "with non-URI strings" do
103
+ [
104
+ "invalid uri",
105
+ "has space",
106
+ "two spaces",
107
+ "my custom namespace",
108
+ "not-a-uri but still valid in lenient mode",
109
+ ].each do |uri|
110
+ it "accepts #{uri.inspect}" do
111
+ expect { element.add_namespace("ns", uri) }.not_to raise_error
112
+ end
113
+ end
114
+ end
115
+
116
+ context "with control characters" do
117
+ [
118
+ "invalid\x00uri",
119
+ "bad\x01char",
120
+ "control\x1Fchar",
121
+ ].each do |uri|
122
+ it "still rejects #{uri.inspect}" do
123
+ expect do
124
+ element.add_namespace("ns", uri)
125
+ end.to raise_error(Moxml::NamespaceError)
126
+ end
127
+ end
128
+ end
129
+
130
+ it "still rejects empty URI for prefixed namespace declarations" do
131
+ expect do
132
+ element.add_namespace("xs", "")
133
+ end.to raise_error(Moxml::NamespaceError, /empty URI/)
134
+ end
135
+
136
+ it "still accepts valid URIs" do
137
+ expect { element.add_namespace("ns", "http://example.com") }.not_to raise_error
138
+ end
139
+ end
140
+ end
@@ -36,13 +36,12 @@ RSpec.describe "XPath Axes" do
36
36
  compiler.respond_to?(:"on_axis_#{axis}", true)
37
37
  end
38
38
 
39
- puts "\nImplemented Axes: #{implemented.size}/13"
40
- implemented.each { |a| puts " ✓ #{a}" }
39
+ implemented.each { |a| }
41
40
 
42
41
  missing = axes - implemented
43
42
  if missing.any?
44
- puts "\nMissing Axes: #{missing.size}/13"
45
- missing.each { |a| puts " ✗ #{a}" }
43
+
44
+ missing.each { |a| }
46
45
  end
47
46
 
48
47
  # Expect at least 6 axes (3 original + 3 new critical axes)
@@ -6,7 +6,7 @@ require "yaml"
6
6
  require "fileutils"
7
7
  require "time"
8
8
 
9
- RSpec.describe "XPath Performance Benchmark" do
9
+ RSpec.describe "XPath Performance Benchmark", :performance do
10
10
  if ENV["SKIP_BENCHMARKS"]
11
11
  it "skips benchmarks when SKIP_BENCHMARKS is set" do
12
12
  skip "Benchmarks skipped. To run benchmarks, use: bundle exec rspec " \
@@ -59,14 +59,7 @@ RSpec.describe "XPath Performance Benchmark" do
59
59
 
60
60
  describe "XPath query performance" do
61
61
  it "benchmarks XPath operations across all adapters" do
62
- puts "\n#{'=' * 80}"
63
- puts "XPath Performance Benchmark - All Adapters"
64
- puts "=" * 80
65
-
66
- xpath_patterns.each do |pattern_name, xpath|
67
- puts "\nPattern: #{pattern_name}"
68
- puts "-" * 80
69
-
62
+ xpath_patterns.each_value do |xpath|
70
63
  Benchmark.ips do |x|
71
64
  x.config(time: 5, warmup: 2)
72
65
 
@@ -83,20 +76,10 @@ RSpec.describe "XPath Performance Benchmark" do
83
76
 
84
77
  x.compare!
85
78
  end
86
-
87
- puts "\n"
88
79
  end
89
-
90
- puts "=" * 80
91
- puts "Benchmark complete"
92
- puts "=" * 80
93
80
  end
94
81
 
95
82
  it "generates detailed performance comparison table" do
96
- puts "\n#{'=' * 80}"
97
- puts "Detailed XPath Performance Comparison"
98
- puts "=" * 80
99
-
100
83
  results_table = {}
101
84
 
102
85
  xpath_patterns.each do |pattern_name, xpath|
@@ -121,28 +104,17 @@ RSpec.describe "XPath Performance Benchmark" do
121
104
  end
122
105
  end
123
106
 
124
- puts "\nResults (operations per second):"
125
- puts "-" * 80
126
-
127
107
  adapters.each do |adapter|
128
- puts "\n#{adapter.to_s.capitalize}:"
129
108
  xpath_patterns.each_key do |pattern_name|
130
109
  ops = results_table[pattern_name][adapter]
131
110
  if ops
132
- puts " #{pattern_name}: #{ops.round(2)} ops/sec"
133
- else
134
- puts " #{pattern_name}: Not supported"
111
+
135
112
  end
136
113
  end
137
114
  end
138
115
 
139
- puts "\n#{'=' * 80}"
140
- puts "Relative Performance (fastest = 1.0x baseline):"
141
- puts "-" * 80
142
-
143
116
  relative_results = {}
144
117
  xpath_patterns.each_key do |pattern_name|
145
- puts "\n#{pattern_name}:"
146
118
  valid_results = results_table[pattern_name].compact
147
119
  next if valid_results.empty?
148
120
 
@@ -152,11 +124,10 @@ RSpec.describe "XPath Performance Benchmark" do
152
124
  if ops
153
125
  relative = ops / fastest
154
126
  relative_results[pattern_name][adapter] = relative
155
- puts " #{adapter}: #{relative.round(3)}x " \
156
- "(#{ops.round(2)} ops/sec)"
127
+
157
128
  else
158
129
  relative_results[pattern_name][adapter] = nil
159
- puts " #{adapter}: Not supported"
130
+
160
131
  end
161
132
  end
162
133
  end
@@ -187,12 +158,6 @@ RSpec.describe "XPath Performance Benchmark" do
187
158
  }
188
159
 
189
160
  File.write(output_file, YAML.dump(yaml_data))
190
-
191
- puts "\n#{'=' * 80}"
192
- puts "Results saved to: #{output_file}"
193
- puts "=" * 80
194
- puts "Test complete - see output above for results"
195
- puts "=" * 80
196
161
  end
197
162
  end
198
163
 
@@ -221,18 +186,9 @@ RSpec.describe "XPath Performance Benchmark" do
221
186
  end
222
187
 
223
188
  it "benchmarks namespace-aware XPath" do
224
- puts "\n#{'=' * 80}"
225
- puts "Namespace-Aware XPath Performance"
226
- puts "=" * 80
227
- puts "\nNote: REXML and Ox do not support namespace-aware XPath"
228
- puts "-" * 80
229
-
230
189
  namespace_capable_adapters = %i[nokogiri libxml oga]
231
190
 
232
- namespace_patterns.each do |pattern_name, (xpath, namespaces)|
233
- puts "\nPattern: #{pattern_name}"
234
- puts "-" * 80
235
-
191
+ namespace_patterns.each_value do |(xpath, namespaces)|
236
192
  Benchmark.ips do |x|
237
193
  x.config(time: 5, warmup: 2)
238
194
 
@@ -248,11 +204,7 @@ RSpec.describe "XPath Performance Benchmark" do
248
204
 
249
205
  x.compare!
250
206
  end
251
-
252
- puts "\n"
253
207
  end
254
-
255
- puts "=" * 80
256
208
  end
257
209
  end
258
210
  end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rexml/document"
4
+
5
+ W3C_NS_FIXTURES_DIR = File.expand_path("../fixtures/w3c/namespaces/1.0", __dir__)
6
+
7
+ # Parse the test catalog to get test metadata
8
+ def load_w3c_namespace_tests
9
+ catalog = File.read(File.join(W3C_NS_FIXTURES_DIR, "rmt-ns10.xml"))
10
+ doc = REXML::Document.new(catalog)
11
+ tests = []
12
+ doc.elements.each("TESTCASES/TEST") do |test_el|
13
+ tests << {
14
+ id: test_el.attributes["ID"],
15
+ uri: test_el.attributes["URI"],
16
+ type: test_el.attributes["TYPE"],
17
+ sections: test_el.attributes["SECTIONS"],
18
+ description: test_el.text.strip,
19
+ }
20
+ end
21
+ tests
22
+ end
23
+
24
+ W3C_NAMESPACE_TESTS = load_w3c_namespace_tests
25
+
26
+ # Known adapter-level limitations for specific tests.
27
+ # These are parser bugs/limitations, not moxml issues.
28
+ ADAPTER_SKIP_TESTS = {
29
+ # Test 006: ISO-8859-1 encoded IRI — adapters receive binary-read content
30
+ # and may fail on encoding before namespace processing begins.
31
+ "rmt-ns10-006" => :all,
32
+ # Test 047: DOCTYPE with colon in element name — Oga parser limitation.
33
+ "ht-ns10-047" => [:oga],
34
+ }.freeze
35
+
36
+ def skip_for_adapter?(test_id, adapter)
37
+ skip_config = ADAPTER_SKIP_TESTS[test_id]
38
+ return false unless skip_config
39
+
40
+ skip_config == :all || skip_config.include?(adapter)
41
+ end
@@ -0,0 +1,271 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rspec"
4
+ require "moxml"
5
+
6
+ RSpec.describe "REXML Adapter Isolated Test" do
7
+ let(:rexml_context) { Moxml.new(:rexml) }
8
+
9
+ describe "text extraction behavior" do
10
+ it "extracts simple text correctly" do
11
+ xml = <<~XML
12
+ <root>Hello World</root>
13
+ XML
14
+
15
+ doc = rexml_context.parse(xml.dup)
16
+ text = doc.root.text
17
+
18
+ expect(text).to eq("Hello World")
19
+ end
20
+
21
+ it "demonstrates BMJBMJ concatenation issue - should NOT add spaces" do
22
+ xml = <<~XML
23
+ <root>
24
+ <journal>BMJ</journal>
25
+ <journal>BMJ</journal>
26
+ </root>
27
+ XML
28
+
29
+ doc = rexml_context.parse(xml.dup)
30
+ text = doc.root.text
31
+
32
+ # This should FAIL to demonstrate the round-trip issue
33
+ # Other adapters produce: "BMJBMJ"
34
+ # REXML currently produces: "BMJ BMJ" (with space)
35
+ # For round-trip compatibility, REXML should produce "BMJBMJ"
36
+ expect(text).to eq("BMJBMJ")
37
+ end
38
+
39
+ it "demonstrates mixed case transition issue - should NOT add spaces" do
40
+ xml = <<~XML
41
+ <root>
42
+ <issn>0959-8138</issn>
43
+ <publisher>BMJ</publisher>
44
+ <author>j</author>
45
+ </root>
46
+ XML
47
+
48
+ doc = rexml_context.parse(xml.dup)
49
+ text = doc.root.text
50
+
51
+ # This should FAIL to demonstrate the round-trip issue
52
+ # Other adapters produce: "0959-8138BMJj"
53
+ # REXML currently produces: "0959-8138 BMJ j" (with spaces)
54
+ # For round-trip compatibility, REXML should produce "0959-8138BMJj"
55
+ expect(text).to eq("0959-8138BMJj")
56
+ end
57
+
58
+ it "demonstrates digit transition issue - should NOT add spaces" do
59
+ xml = <<~XML
60
+ <root>
61
+ <volume>324</volume>
62
+ <issue>i7342</issue>
63
+ <page>pg880</page>
64
+ <id>11950738</id>
65
+ </root>
66
+ XML
67
+
68
+ doc = rexml_context.parse(xml.dup)
69
+ text = doc.root.text
70
+
71
+ # This should FAIL to demonstrate the round-trip issue
72
+ # Other adapters produce: "324i7342pg88011950738"
73
+ # REXML currently produces: "324 i7342 pg880 11950738" (with spaces)
74
+ # For round-trip compatibility, REXML should produce "324i7342pg88011950738"
75
+ expect(text).to eq("324i7342pg88011950738")
76
+ end
77
+
78
+ it "demonstrates word boundary issue - should NOT add spaces" do
79
+ xml = <<~XML
80
+ <root>
81
+ <article-type>version-of-record</article-type>
82
+ <title>Primary</title>
83
+ </root>
84
+ XML
85
+
86
+ doc = rexml_context.parse(xml.dup)
87
+ text = doc.root.text
88
+
89
+ # This should FAIL to demonstrate the round-trip issue
90
+ # Other adapters produce: "version-of-recordPrimary"
91
+ # REXML currently produces: "version-of-record Primary" (with space)
92
+ # For round-trip compatibility, REXML should produce "version-of-recordPrimary"
93
+ expect(text).to eq("version-of-recordPrimary")
94
+ end
95
+
96
+ it "demonstrates complex mixed content issue - should NOT add spaces" do
97
+ xml = <<~XML
98
+ <root>
99
+ <section>Primary</section>
100
+ <year>190</year>
101
+ <page>102</page>
102
+ <id>18219355357</id>
103
+ </root>
104
+ XML
105
+
106
+ doc = rexml_context.parse(xml.dup)
107
+ text = doc.root.text
108
+
109
+ # This should FAIL to demonstrate the round-trip issue
110
+ # Other adapters produce: "Primary19010218219355357"
111
+ # REXML currently produces: "Primary 190 102 18219355357" (with spaces)
112
+ # For round-trip compatibility, REXML should produce "Primary19010218219355357"
113
+ expect(text).to eq("Primary19010218219355357")
114
+ end
115
+
116
+ it "demonstrates all patterns together - should NOT add spaces" do
117
+ xml = <<~XML
118
+ <root>
119
+ <journal>BMJ</journal>
120
+ <journal>BMJ</journal>
121
+ <issn>0959-8138</issn>
122
+ <publisher>BMJ</publisher>
123
+ <author>j</author>
124
+ <volume>324</volume>
125
+ <issue>i7342</issue>
126
+ <page>pg880</page>
127
+ <id>11950738</id>
128
+ <article-type>version-of-record</article-type>
129
+ <section>Primary</section>
130
+ <year>190</year>
131
+ <page>102</page>
132
+ <id>18219355357</id>
133
+ </root>
134
+ XML
135
+
136
+ doc = rexml_context.parse(xml.dup)
137
+ text = doc.root.text
138
+
139
+ # This should FAIL to demonstrate the round-trip issue
140
+ # Other adapters produce: "BMJBMJ0959-8138BMJj324i7342pg88011950738version-of-recordPrimary19010218219355357"
141
+ # REXML currently produces: "BMJ BMJ 0959-8138 BMJ j 324 i7342 pg880 11950738 version-of-record Primary 190 102 18219355357" (with spaces)
142
+ # For round-trip compatibility, REXML should produce concatenated version
143
+ expect(text).to eq("BMJBMJ0959-8138BMJj324i7342pg88011950738version-of-recordPrimary19010218219355357")
144
+ end
145
+
146
+ it "demonstrates specific round-trip failure patterns - BMJ.v" do
147
+ xml = <<~XML
148
+ <root>
149
+ <journal>BMJ</journal>
150
+ <volume>v</volume>
151
+ <number>324</number>
152
+ </root>
153
+ XML
154
+
155
+ doc = rexml_context.parse(xml.dup)
156
+ text = doc.root.text
157
+
158
+ # Based on actual adapter behavior: both nokogiri and rexml produce "BMJv324"
159
+ # The test expectation was wrong - should expect "BMJv324" not "BMJ.v324"
160
+ expect(text).to eq("BMJv324")
161
+ end
162
+
163
+ it "demonstrates specific round-trip failure patterns - i7342.pg" do
164
+ xml = <<~XML
165
+ <root>
166
+ <issue>i7342</issue>
167
+ <page>pg880</page>
168
+ </root>
169
+ XML
170
+
171
+ doc = rexml_context.parse(xml.dup)
172
+ text = doc.root.text
173
+
174
+ # Based on actual adapter behavior: both nokogiri and rexml produce "i7342pg880"
175
+ # The test expectation was wrong - should expect "i7342pg880" not "i7342.pg880"
176
+ expect(text).to eq("i7342pg880")
177
+ end
178
+
179
+ it "demonstrates specific round-trip failure patterns - 190102" do
180
+ xml = <<~XML
181
+ <root>
182
+ <year>190</year>
183
+ <page>102</page>
184
+ </root>
185
+ XML
186
+
187
+ doc = rexml_context.parse(xml.dup)
188
+ text = doc.root.text
189
+
190
+ # Based on round-trip failure: expected "190102" but got "190 102"
191
+ expect(text).to eq("190102")
192
+ end
193
+
194
+ it "demonstrates round-trip failure pattern - bmj BMJ" do
195
+ xml = <<~XML
196
+ <root>
197
+ <journal>bmj</journal>
198
+ <journal>BMJ</journal>
199
+ </root>
200
+ XML
201
+
202
+ doc = rexml_context.parse(xml.dup)
203
+ text = doc.root.text
204
+
205
+ # Based on round-trip failure: expected "bmjBMJ" but got "bmj BMJ"
206
+ expect(text).to eq("bmjBMJ")
207
+ end
208
+
209
+ it "demonstrates round-trip failure pattern - 8138 BMJ" do
210
+ xml = <<~XML
211
+ <root>
212
+ <issn>8138</issn>
213
+ <publisher>BMJ</publisher>
214
+ </root>
215
+ XML
216
+
217
+ doc = rexml_context.parse(xml.dup)
218
+ text = doc.root.text
219
+
220
+ # Based on round-trip failure: expected "8138BMJ" but got "8138 BMJ"
221
+ expect(text).to eq("8138BMJ")
222
+ end
223
+
224
+ it "demonstrates round-trip failure pattern - BMJ.v 324" do
225
+ xml = <<~XML
226
+ <root>
227
+ <journal>BMJ</journal>
228
+ <volume>v</volume>
229
+ <number>324</number>
230
+ </root>
231
+ XML
232
+
233
+ doc = rexml_context.parse(xml.dup)
234
+ text = doc.root.text
235
+
236
+ # Based on actual adapter behavior: both nokogiri and rexml produce "BMJv324"
237
+ # The test expectation was wrong - should expect "BMJv324" not "BMJ.v324"
238
+ expect(text).to eq("BMJv324")
239
+ end
240
+
241
+ it "demonstrates round-trip failure pattern - 7342 880" do
242
+ xml = <<~XML
243
+ <root>
244
+ <issue>7342</issue>
245
+ <page>880</page>
246
+ </root>
247
+ XML
248
+
249
+ doc = rexml_context.parse(xml.dup)
250
+ text = doc.root.text
251
+
252
+ # Based on round-trip failure: expected "7342880" but got "7342 880"
253
+ expect(text).to eq("7342880")
254
+ end
255
+
256
+ it "demonstrates round-trip failure pattern - version-of-record Primary" do
257
+ xml = <<~XML
258
+ <root>
259
+ <article-type>version-of-record</article-type>
260
+ <section>Primary</section>
261
+ </root>
262
+ XML
263
+
264
+ doc = rexml_context.parse(xml.dup)
265
+ text = doc.root.text
266
+
267
+ # Based on round-trip failure: expected "version-of-recordPrimary" but got "version-of-record Primary"
268
+ expect(text).to eq("version-of-recordPrimary")
269
+ end
270
+ end
271
+ end