moxml 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +1 -1
  3. data/.github/workflows/rake.yml +16 -13
  4. data/.github/workflows/release.yml +1 -0
  5. data/.github/workflows/round-trip.yml +74 -0
  6. data/.gitignore +1 -0
  7. data/.rubocop.yml +1 -0
  8. data/.rubocop_todo.yml +160 -38
  9. data/Gemfile +2 -1
  10. data/README.adoc +287 -20
  11. data/Rakefile +11 -0
  12. data/data/w3c_entities.json +2131 -0
  13. data/docs/ENTITY_SUPPORT_FOR_LUTAML_MODEL.md +102 -0
  14. data/docs/_guides/index.adoc +14 -12
  15. data/docs/_guides/node-api-consistency.adoc +572 -0
  16. data/docs/_guides/xml-declaration.adoc +5 -5
  17. data/docs/_pages/adapters/ox.adoc +30 -0
  18. data/docs/_pages/adapters/rexml.adoc +1 -1
  19. data/docs/_pages/configuration.adoc +43 -0
  20. data/docs/_pages/node-api-reference.adoc +128 -3
  21. data/docs/_tutorials/namespace-handling.adoc +21 -0
  22. data/examples/rss_parser/rss_parser.rb +1 -3
  23. data/lib/moxml/adapter/base.rb +26 -2
  24. data/lib/moxml/adapter/headed_ox.rb +5 -4
  25. data/lib/moxml/adapter/libxml.rb +18 -3
  26. data/lib/moxml/adapter/nokogiri.rb +26 -2
  27. data/lib/moxml/adapter/oga.rb +137 -20
  28. data/lib/moxml/adapter/ox.rb +29 -3
  29. data/lib/moxml/adapter/rexml.rb +54 -7
  30. data/lib/moxml/attribute.rb +6 -0
  31. data/lib/moxml/builder.rb +6 -0
  32. data/lib/moxml/config.rb +52 -1
  33. data/lib/moxml/context.rb +21 -2
  34. data/lib/moxml/doctype.rb +33 -0
  35. data/lib/moxml/document.rb +6 -1
  36. data/lib/moxml/document_builder.rb +45 -1
  37. data/lib/moxml/element.rb +10 -3
  38. data/lib/moxml/entity_reference.rb +29 -0
  39. data/lib/moxml/entity_registry.rb +278 -0
  40. data/lib/moxml/error.rb +5 -5
  41. data/lib/moxml/node.rb +22 -8
  42. data/lib/moxml/node_set.rb +10 -6
  43. data/lib/moxml/processing_instruction.rb +6 -0
  44. data/lib/moxml/version.rb +1 -1
  45. data/lib/moxml/xml_utils.rb +25 -2
  46. data/lib/moxml/xpath/errors.rb +1 -1
  47. data/lib/moxml.rb +1 -0
  48. data/spec/consistency/README.md +3 -1
  49. data/spec/consistency/round_trip_spec.rb +479 -0
  50. data/spec/examples/readme_examples_spec.rb +1 -1
  51. data/spec/fixtures/round-trips/metanorma/a.xml +66 -0
  52. data/spec/fixtures/round-trips/metanorma/bilingual-en.xml +7682 -0
  53. data/spec/fixtures/round-trips/metanorma/bilingual-fr.xml +7520 -0
  54. data/spec/fixtures/round-trips/metanorma/bilingual.presentation.xml +21211 -0
  55. data/spec/fixtures/round-trips/metanorma/collection1.xml +313 -0
  56. data/spec/fixtures/round-trips/metanorma/collection1nested.xml +291 -0
  57. data/spec/fixtures/round-trips/metanorma/collection_docinline.xml +544 -0
  58. data/spec/fixtures/round-trips/metanorma/collection_full.xml +1776 -0
  59. data/spec/fixtures/round-trips/metanorma/dummy.1.xml +295 -0
  60. data/spec/fixtures/round-trips/metanorma/dummy.xml +349 -0
  61. data/spec/fixtures/round-trips/metanorma/footnotes.xml +70 -0
  62. data/spec/fixtures/round-trips/metanorma/iho.xml +116 -0
  63. data/spec/fixtures/round-trips/metanorma/rice-amd.final.xml +186 -0
  64. data/spec/fixtures/round-trips/metanorma/rice-amd.final_1.xml +180 -0
  65. data/spec/fixtures/round-trips/metanorma/rice-en.final.norepo.xml +116 -0
  66. data/spec/fixtures/round-trips/metanorma/rice-en.final.xml +149 -0
  67. data/spec/fixtures/round-trips/metanorma/rice-en.final_1.xml +144 -0
  68. data/spec/fixtures/round-trips/metanorma/rice1-en.final.xml +120 -0
  69. data/spec/fixtures/round-trips/metanorma/rice2-en.final.xml +116 -0
  70. data/spec/fixtures/round-trips/metanorma/test_sectionsplit.xml +119 -0
  71. data/spec/fixtures/round-trips/niso-jats/bmj_sample.xml +1068 -0
  72. data/spec/fixtures/round-trips/niso-jats/element_citation.xml +7 -0
  73. data/spec/fixtures/round-trips/niso-jats/pnas_sample.xml +3768 -0
  74. data/spec/fixtures/round-trips/rfcxml/rfc8881.xml +45848 -0
  75. data/spec/fixtures/round-trips/rfcxml/rfc8994.xml +6607 -0
  76. data/spec/fixtures/round-trips/rfcxml/rfc9000.xml +9064 -0
  77. data/spec/fixtures/round-trips/rfcxml/rfc9043.xml +5527 -0
  78. data/spec/fixtures/round-trips/rfcxml/rfc9051.xml +14286 -0
  79. data/spec/fixtures/round-trips/rfcxml/rfc9110.xml +18156 -0
  80. data/spec/fixtures/round-trips/rfcxml/rfc9260.xml +9136 -0
  81. data/spec/fixtures/round-trips/rfcxml/rfc9293.xml +8300 -0
  82. data/spec/fixtures/round-trips/rfcxml/rfc9380.xml +8916 -0
  83. data/spec/fixtures/round-trips/rfcxml/rfc9420.xml +8927 -0
  84. data/spec/fixtures/w3c/namespaces/1.0/001.xml +7 -0
  85. data/spec/fixtures/w3c/namespaces/1.0/002.xml +8 -0
  86. data/spec/fixtures/w3c/namespaces/1.0/003.xml +7 -0
  87. data/spec/fixtures/w3c/namespaces/1.0/004.xml +7 -0
  88. data/spec/fixtures/w3c/namespaces/1.0/005.xml +7 -0
  89. data/spec/fixtures/w3c/namespaces/1.0/006.xml +7 -0
  90. data/spec/fixtures/w3c/namespaces/1.0/007.xml +20 -0
  91. data/spec/fixtures/w3c/namespaces/1.0/008.xml +20 -0
  92. data/spec/fixtures/w3c/namespaces/1.0/009.xml +19 -0
  93. data/spec/fixtures/w3c/namespaces/1.0/010.xml +19 -0
  94. data/spec/fixtures/w3c/namespaces/1.0/011.xml +20 -0
  95. data/spec/fixtures/w3c/namespaces/1.0/012.xml +19 -0
  96. data/spec/fixtures/w3c/namespaces/1.0/013.xml +5 -0
  97. data/spec/fixtures/w3c/namespaces/1.0/014.xml +3 -0
  98. data/spec/fixtures/w3c/namespaces/1.0/015.xml +3 -0
  99. data/spec/fixtures/w3c/namespaces/1.0/016.xml +3 -0
  100. data/spec/fixtures/w3c/namespaces/1.0/017.xml +3 -0
  101. data/spec/fixtures/w3c/namespaces/1.0/018.xml +3 -0
  102. data/spec/fixtures/w3c/namespaces/1.0/019.xml +3 -0
  103. data/spec/fixtures/w3c/namespaces/1.0/020.xml +3 -0
  104. data/spec/fixtures/w3c/namespaces/1.0/021.xml +6 -0
  105. data/spec/fixtures/w3c/namespaces/1.0/022.xml +6 -0
  106. data/spec/fixtures/w3c/namespaces/1.0/023.xml +6 -0
  107. data/spec/fixtures/w3c/namespaces/1.0/024.xml +6 -0
  108. data/spec/fixtures/w3c/namespaces/1.0/025.xml +3 -0
  109. data/spec/fixtures/w3c/namespaces/1.0/026.xml +3 -0
  110. data/spec/fixtures/w3c/namespaces/1.0/027.xml +3 -0
  111. data/spec/fixtures/w3c/namespaces/1.0/028.xml +3 -0
  112. data/spec/fixtures/w3c/namespaces/1.0/029.xml +4 -0
  113. data/spec/fixtures/w3c/namespaces/1.0/030.xml +4 -0
  114. data/spec/fixtures/w3c/namespaces/1.0/031.xml +4 -0
  115. data/spec/fixtures/w3c/namespaces/1.0/032.xml +5 -0
  116. data/spec/fixtures/w3c/namespaces/1.0/033.xml +4 -0
  117. data/spec/fixtures/w3c/namespaces/1.0/034.xml +3 -0
  118. data/spec/fixtures/w3c/namespaces/1.0/035.xml +8 -0
  119. data/spec/fixtures/w3c/namespaces/1.0/036.xml +8 -0
  120. data/spec/fixtures/w3c/namespaces/1.0/037.xml +8 -0
  121. data/spec/fixtures/w3c/namespaces/1.0/038.xml +8 -0
  122. data/spec/fixtures/w3c/namespaces/1.0/039.xml +10 -0
  123. data/spec/fixtures/w3c/namespaces/1.0/040.xml +9 -0
  124. data/spec/fixtures/w3c/namespaces/1.0/041.xml +8 -0
  125. data/spec/fixtures/w3c/namespaces/1.0/042.xml +4 -0
  126. data/spec/fixtures/w3c/namespaces/1.0/043.xml +7 -0
  127. data/spec/fixtures/w3c/namespaces/1.0/044.xml +7 -0
  128. data/spec/fixtures/w3c/namespaces/1.0/045.xml +7 -0
  129. data/spec/fixtures/w3c/namespaces/1.0/046.xml +10 -0
  130. data/spec/fixtures/w3c/namespaces/1.0/047.xml +4 -0
  131. data/spec/fixtures/w3c/namespaces/1.0/048.xml +5 -0
  132. data/spec/fixtures/w3c/namespaces/1.0/LICENSE.md +32 -0
  133. data/spec/fixtures/w3c/namespaces/1.0/README.adoc +42 -0
  134. data/spec/fixtures/w3c/namespaces/1.0/rmt-ns10.xml +156 -0
  135. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +14 -2
  136. data/spec/integration/shared_examples/w3c_namespace_examples.rb +10 -0
  137. data/spec/integration/w3c_namespace_spec.rb +69 -0
  138. data/spec/moxml/adapter/libxml_spec.rb +7 -1
  139. data/spec/moxml/adapter/oga_spec.rb +92 -0
  140. data/spec/moxml/config_spec.rb +75 -0
  141. data/spec/moxml/doctype_spec.rb +19 -3
  142. data/spec/moxml/entity_registry_spec.rb +184 -0
  143. data/spec/moxml/error_spec.rb +2 -2
  144. data/spec/moxml/namespace_uri_validation_spec.rb +140 -0
  145. data/spec/moxml/xpath/axes_spec.rb +3 -4
  146. data/spec/performance/xpath_benchmark_spec.rb +6 -54
  147. data/spec/support/w3c_namespace_helpers.rb +41 -0
  148. data/spec/unit/rexml_isolated_test.rb +271 -0
  149. metadata +99 -3
  150. data/.ruby-version +0 -1
@@ -0,0 +1,156 @@
1
+ <TESTCASES PROFILE="Richard Tobin's XML Namespaces 1.0 test suite 14 Feb 2003">
2
+
3
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="001.xml" ID="rmt-ns10-001" TYPE="valid">
4
+ Namespace name test: a perfectly good http URI
5
+ </TEST>
6
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="002.xml" ID="rmt-ns10-002" TYPE="valid">
7
+ Namespace name test: a syntactically plausible URI with a
8
+ fictitious scheme
9
+ </TEST>
10
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="003.xml" ID="rmt-ns10-003" TYPE="valid">
11
+ Namespace name test: a perfectly good http URI with a fragment
12
+ </TEST>
13
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="004.xml" ID="rmt-ns10-004" TYPE="error">
14
+ Namespace name test: a relative URI (deprecated)
15
+ </TEST>
16
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="005.xml" ID="rmt-ns10-005" TYPE="error">
17
+ Namespace name test: a same-document relative URI (deprecated)
18
+ </TEST>
19
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="006.xml" ID="rmt-ns10-006" TYPE="error">
20
+ Namespace name test: an http IRI that is not a URI
21
+ </TEST>
22
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="007.xml" ID="rmt-ns10-007" TYPE="valid">
23
+ Namespace inequality test: different capitalization
24
+ </TEST>
25
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="008.xml" ID="rmt-ns10-008" TYPE="valid">
26
+ Namespace inequality test: different escaping
27
+ </TEST>
28
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="009.xml" ID="rmt-ns10-009" TYPE="not-wf">
29
+ Namespace equality test: plain repetition
30
+ </TEST>
31
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="010.xml" ID="rmt-ns10-010" TYPE="not-wf">
32
+ Namespace equality test: use of character reference
33
+ </TEST>
34
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="011.xml" ID="rmt-ns10-011" TYPE="not-wf">
35
+ Namespace equality test: use of entity reference
36
+ </TEST>
37
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="1" URI="012.xml" ID="rmt-ns10-012" TYPE="not-wf">
38
+ Namespace inequality test: equal after attribute value normalization
39
+ </TEST>
40
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="3" URI="013.xml" ID="rmt-ns10-013" TYPE="not-wf">
41
+ Bad QName syntax: multiple colons
42
+ </TEST>
43
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="3" URI="014.xml" ID="rmt-ns10-014" TYPE="not-wf">
44
+ Bad QName syntax: colon at end
45
+ </TEST>
46
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="3" URI="015.xml" ID="rmt-ns10-015" TYPE="not-wf">
47
+ Bad QName syntax: colon at start
48
+ </TEST>
49
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="016.xml" ID="rmt-ns10-016" TYPE="not-wf">
50
+ Bad QName syntax: xmlns:
51
+ </TEST>
52
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="-" URI="017.xml" ID="rmt-ns10-017" TYPE="invalid">
53
+ Simple legal case: no namespaces
54
+ </TEST>
55
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.2" URI="018.xml" ID="rmt-ns10-018" TYPE="invalid">
56
+ Simple legal case: default namespace
57
+ </TEST>
58
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="4" URI="019.xml" ID="rmt-ns10-019" TYPE="invalid">
59
+ Simple legal case: prefixed element
60
+ </TEST>
61
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="4" URI="020.xml" ID="rmt-ns10-020" TYPE="invalid">
62
+ Simple legal case: prefixed attribute
63
+ </TEST>
64
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.2" URI="021.xml" ID="rmt-ns10-021" TYPE="invalid">
65
+ Simple legal case: default namespace and unbinding
66
+ </TEST>
67
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.2" URI="022.xml" ID="rmt-ns10-022" TYPE="invalid">
68
+ Simple legal case: default namespace and rebinding
69
+ </TEST>
70
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="023.xml" ID="rmt-ns10-023" TYPE="not-wf">
71
+ Illegal use of 1.1-style prefix unbinding in 1.0 document
72
+ </TEST>
73
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.1" URI="024.xml" ID="rmt-ns10-024" TYPE="invalid">
74
+ Simple legal case: prefix rebinding
75
+ </TEST>
76
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="4" URI="025.xml" ID="rmt-ns10-025" TYPE="not-wf">
77
+ Unbound element prefix
78
+ </TEST>
79
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="4" URI="026.xml" ID="rmt-ns10-026" TYPE="not-wf">
80
+ Unbound attribute prefix
81
+ </TEST>
82
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="2" URI="027.xml" ID="rmt-ns10-027" TYPE="invalid">
83
+ Reserved prefixes and namespaces: using the xml prefix undeclared
84
+ </TEST>
85
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="028.xml" ID="rmt-ns10-028" TYPE="invalid">
86
+ Reserved prefixes and namespaces: declaring the xml prefix correctly
87
+ </TEST>
88
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="029.xml" ID="rmt-ns10-029" TYPE="not-wf">
89
+ Reserved prefixes and namespaces: declaring the xml prefix incorrectly
90
+ </TEST>
91
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="030.xml" ID="rmt-ns10-030" TYPE="not-wf">
92
+ Reserved prefixes and namespaces: binding another prefix
93
+ to the xml namespace
94
+ </TEST>
95
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="031.xml" ID="rmt-ns10-031" TYPE="not-wf">
96
+ Reserved prefixes and namespaces: declaring the xmlns prefix
97
+ with its correct URI (illegal)
98
+ </TEST>
99
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="032.xml" ID="rmt-ns10-032" TYPE="not-wf">
100
+ Reserved prefixes and namespaces: declaring the xmlns prefix
101
+ with an incorrect URI
102
+ </TEST>
103
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="033.xml" ID="rmt-ns10-033" TYPE="not-wf">
104
+ Reserved prefixes and namespaces: binding another prefix
105
+ to the xmlns namespace
106
+ </TEST>
107
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE05" URI="034.xml" ID="rmt-ns10-034" TYPE="invalid">
108
+ Reserved prefixes and namespaces: binding a reserved prefix
109
+ </TEST>
110
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="035.xml" ID="rmt-ns10-035" TYPE="not-wf">
111
+ Attribute uniqueness: repeated identical attribute
112
+ </TEST>
113
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="036.xml" ID="rmt-ns10-036" TYPE="not-wf">
114
+ Attribute uniqueness: repeated attribute with different prefixes
115
+ </TEST>
116
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="037.xml" ID="rmt-ns10-037" TYPE="invalid">
117
+ Attribute uniqueness: different attributes with same local name
118
+ </TEST>
119
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="038.xml" ID="rmt-ns10-038" TYPE="invalid">
120
+ Attribute uniqueness: prefixed and unprefixed attributes with same
121
+ local name
122
+ </TEST>
123
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="039.xml" ID="rmt-ns10-039" TYPE="invalid">
124
+ Attribute uniqueness: prefixed and unprefixed attributes with same
125
+ local name, with default namespace
126
+ </TEST>
127
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="040.xml" ID="rmt-ns10-040" TYPE="invalid">
128
+ Attribute uniqueness: prefixed and unprefixed attributes with same
129
+ local name, with default namespace and element in default namespace
130
+ </TEST>
131
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="5.3" URI="041.xml" ID="rmt-ns10-041" TYPE="invalid">
132
+ Attribute uniqueness: prefixed and unprefixed attributes with same
133
+ local name, element in same namespace as prefixed attribute
134
+ </TEST>
135
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE08" URI="042.xml" ID="rmt-ns10-042" TYPE="not-wf">
136
+ Colon in PI name
137
+ </TEST>
138
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE08" URI="043.xml" ID="rmt-ns10-043" TYPE="not-wf">
139
+ Colon in entity name
140
+ </TEST>
141
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE08" URI="044.xml" ID="rmt-ns10-044" TYPE="not-wf">
142
+ Colon in entity name
143
+ </TEST>
144
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE08" URI="045.xml" ID="rmt-ns10-045" TYPE="invalid">
145
+ Colon in ID attribute name
146
+ </TEST>
147
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE08" URI="046.xml" ID="rmt-ns10-046" TYPE="invalid">
148
+ Colon in ID attribute name
149
+ </TEST>
150
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE03" URI="047.xml" ID="ht-ns10-047" TYPE="valid">
151
+ Reserved name: _not_ an error
152
+ </TEST>
153
+ <TEST RECOMMENDATION="NS1.0" SECTIONS="NE03" URI="048.xml" ID="ht-ns10-048" TYPE="valid">
154
+ Reserved name: _not_ an error
155
+ </TEST>
156
+ </TESTCASES>
@@ -24,10 +24,22 @@ RSpec.shared_examples "Moxml::Namespace" do
24
24
  expect(ns.uri).to eq("http://example.org")
25
25
  end
26
26
 
27
- it "validates URI" do
27
+ it "validates URI per RFC 3986" do
28
28
  expect do
29
29
  element.add_namespace("xs", "invalid uri")
30
- end.to raise_error(Moxml::NamespaceError, "Invalid URI: invalid uri")
30
+ end.to raise_error(Moxml::NamespaceError, /Invalid URI/)
31
+ end
32
+
33
+ it "accepts valid relative URI-references" do
34
+ expect do
35
+ element.add_namespace("xs", "my-custom-ns")
36
+ end.not_to raise_error
37
+ end
38
+
39
+ it "rejects empty URI for prefixed namespace declarations" do
40
+ expect do
41
+ element.add_namespace("xs", "")
42
+ end.to raise_error(Moxml::NamespaceError, /empty URI/)
31
43
  end
32
44
  end
33
45
 
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.shared_examples "W3C namespace test: should parse" do |label, fixture_file, adapter, test_id|
4
+ it label do
5
+ skip "known #{adapter} limitation" if skip_for_adapter?(test_id, adapter)
6
+
7
+ xml = File.binread(File.join(W3C_NS_FIXTURES_DIR, fixture_file))
8
+ expect { moxml_context.parse(xml) }.not_to raise_error
9
+ end
10
+ end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ # W3C XML Namespaces 1.0 Test Suite
4
+ # Source: https://www.w3.org/XML/Test/ (xmlts20130923)
5
+ # Tests from: xmlconf/eduni/namespaces/1.0/
6
+ #
7
+ # Test types per W3C:
8
+ # valid - must be accepted without errors
9
+ # error - namespace constraint violation; processors MAY report
10
+ # not-wf - namespace well-formedness violation; must be rejected
11
+ # invalid - validity error; non-validating parsers should accept
12
+
13
+ RSpec.describe "W3C XML Namespaces 1.0 test suite" do
14
+ Moxml::Adapter::AVALIABLE_ADAPTERS.each do |adapter_name|
15
+ context "with #{adapter_name}" do
16
+ around do |example|
17
+ Moxml.with_config(adapter_name) do
18
+ example.run
19
+ end
20
+ end
21
+
22
+ let(:moxml_context) { Moxml.new }
23
+
24
+ W3C_NAMESPACE_TESTS.each do |test|
25
+ next unless File.exist?(File.join(W3C_NS_FIXTURES_DIR, test[:uri]))
26
+
27
+ test_label = "#{test[:id]}: #{test[:description]}"
28
+
29
+ case test[:type]
30
+ when "valid"
31
+ it_behaves_like "W3C namespace test: should parse",
32
+ "#{test_label} [valid]", test[:uri], adapter_name, test[:id]
33
+
34
+ when "error"
35
+ # Namespace errors are advisory — processors MAY report them.
36
+ # We accept these documents (e.g. relative URIs are valid URI-references).
37
+ it_behaves_like "W3C namespace test: should parse",
38
+ "#{test_label} [error - accepted]", test[:uri], adapter_name, test[:id]
39
+
40
+ when "not-wf"
41
+ # Namespace well-formedness violations should be caught by the parser,
42
+ # but enforcement varies significantly by adapter. These tests document
43
+ # adapter behavior and are not directly related to URI validation.
44
+ it "#{test_label} [not-wf]" do
45
+ xml = File.binread(File.join(W3C_NS_FIXTURES_DIR, test[:uri]))
46
+ raised = false
47
+ begin
48
+ moxml_context.parse(xml, strict: true)
49
+ rescue StandardError
50
+ raised = true
51
+ end
52
+
53
+ if raised
54
+ # Good: adapter correctly rejects namespace-ill-formed document
55
+ else
56
+ skip "#{adapter_name} does not enforce this namespace well-formedness rule"
57
+ end
58
+ end
59
+
60
+ when "invalid"
61
+ # Validity errors are for validating parsers. Non-validating parsers
62
+ # (which moxml wraps) should accept these documents.
63
+ it_behaves_like "W3C namespace test: should parse",
64
+ "#{test_label} [invalid - accepted]", test[:uri], adapter_name, test[:id]
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -1,6 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "libxml"
3
+ begin
4
+ require "libxml"
5
+ rescue LoadError
6
+ # LibXML gem not available - skip all specs in this file
7
+ return
8
+ end
9
+
4
10
  require "moxml/adapter/libxml"
5
11
 
6
12
  RSpec.describe Moxml::Adapter::Libxml do
@@ -11,4 +11,96 @@ RSpec.describe Moxml::Adapter::Oga do
11
11
  end
12
12
 
13
13
  it_behaves_like "xml adapter"
14
+
15
+ describe "entity handling" do
16
+ it "preserves non-breaking space through parse and serialize round-trip" do
17
+ xml = "<root>Item&nbsp;One</root>"
18
+ doc = described_class.parse(xml)
19
+ serialized = doc.to_xml
20
+ # After round-trip, the entity reference should be preserved
21
+ expect(serialized).to include("&nbsp;")
22
+ expect(serialized).to include("Item")
23
+ expect(serialized).to include("One")
24
+ end
25
+
26
+ it "correctly parses numeric character references" do
27
+ xml = "<root>&#160;</root>"
28
+ doc = described_class.parse(xml)
29
+ text = described_class.text_content(doc.at_xpath("//root"))
30
+
31
+ # Should contain the actual non-breaking space character (U+00A0)
32
+ expect(text.bytes).to include(160)
33
+ end
34
+
35
+ it "handles multiple different entities" do
36
+ xml = "<root>&nbsp;&mdash;&lsquo;</root>"
37
+ doc = described_class.parse(xml)
38
+ serialized = doc.to_xml
39
+
40
+ # All entities should be preserved in round-trip
41
+ expect(serialized).to include("&nbsp;")
42
+ expect(serialized).to include("&mdash;")
43
+ expect(serialized).to include("&lsquo;")
44
+ end
45
+
46
+ it "preserves entities when creating text nodes programmatically" do
47
+ context = Moxml::Context.new(:oga)
48
+ doc = context.create_document
49
+ root = doc.create_element("root")
50
+ doc.add_child(root)
51
+ text = doc.create_text("Hello&nbsp;World")
52
+ root.add_child(text)
53
+
54
+ serialized = doc.to_xml
55
+ expect(serialized).to include("Hello&nbsp;World")
56
+ end
57
+
58
+ it "preserves entities when setting text content programmatically" do
59
+ context = Moxml::Context.new(:oga)
60
+ doc = context.create_document
61
+ root = doc.create_element("root")
62
+ doc.add_child(root)
63
+ text = doc.create_text("placeholder")
64
+ root.add_child(text)
65
+ text.content = "Value&nbsp;Here"
66
+
67
+ serialized = doc.to_xml
68
+ expect(serialized).to include("Value&nbsp;Here")
69
+ end
70
+
71
+ it "preserves entities in attribute values through parse round-trip" do
72
+ xml = '<root attr="one&nbsp;two"/>'
73
+ doc = described_class.parse(xml)
74
+ root_native = doc.at_xpath("//root")
75
+ value = described_class.get_attribute_value(root_native, "attr")
76
+
77
+ expect(value).to eq("one&nbsp;two")
78
+ end
79
+
80
+ it "preserves entities when setting attribute values programmatically" do
81
+ context = Moxml::Context.new(:oga)
82
+ doc = context.create_document
83
+ root = doc.create_element("root")
84
+ doc.add_child(root)
85
+ root["data"] = "a&nbsp;b"
86
+
87
+ serialized = doc.to_xml
88
+ expect(serialized).to include('data="a&nbsp;b"')
89
+ end
90
+
91
+ it "does not marker-encode unknown entities in programmatic text" do
92
+ # Unknown entities (not in W3C registry) are left as-is by encode_entity_markers.
93
+ # Oga will drop them during serialization since they're not valid XML entities.
94
+ context = Moxml::Context.new(:oga)
95
+ doc = context.create_document
96
+ root = doc.create_element("root")
97
+ doc.add_child(root)
98
+ text = doc.create_text("test&foo;bar")
99
+ root.add_child(text)
100
+
101
+ serialized = doc.to_xml
102
+ # &foo; is not a known entity, so it won't survive Oga's serialization
103
+ expect(serialized).not_to include("\x01")
104
+ end
105
+ end
14
106
  end
@@ -12,6 +12,81 @@ RSpec.describe Moxml::Config do
12
12
  expect(config.default_indent).to eq(2)
13
13
  expect(config.entity_encoding).to eq(:basic)
14
14
  end
15
+
16
+ it "sets default entity_load_mode to :required" do
17
+ expect(config.entity_load_mode).to eq(:required)
18
+ end
19
+
20
+ it "sets default namespace_uri_mode to :strict" do
21
+ expect(config.namespace_uri_mode).to eq(:strict)
22
+ end
23
+ end
24
+
25
+ describe "#entity_load_mode=" do
26
+ it "accepts valid modes" do
27
+ %i[required optional disabled custom].each do |mode|
28
+ config.entity_load_mode = mode
29
+ expect(config.entity_load_mode).to eq(mode)
30
+ end
31
+ end
32
+
33
+ it "raises error for invalid mode" do
34
+ expect do
35
+ config.entity_load_mode = :invalid
36
+ end.to raise_error(ArgumentError)
37
+ end
38
+ end
39
+
40
+ describe "#load_external_entities=" do
41
+ it "maps true to :required" do
42
+ config.load_external_entities = true
43
+ expect(config.entity_load_mode).to eq(:required)
44
+ end
45
+
46
+ it "maps false to :disabled" do
47
+ config.load_external_entities = false
48
+ expect(config.entity_load_mode).to eq(:disabled)
49
+ end
50
+
51
+ it "accepts symbol values" do
52
+ config.load_external_entities = :optional
53
+ expect(config.entity_load_mode).to eq(:optional)
54
+ end
55
+ end
56
+
57
+ describe "#load_external_entities" do
58
+ it "returns true when mode is :required" do
59
+ config.entity_load_mode = :required
60
+ expect(config.load_external_entities).to be true
61
+ end
62
+
63
+ it "returns false when mode is not :required" do
64
+ config.entity_load_mode = :disabled
65
+ expect(config.load_external_entities).to be false
66
+ end
67
+ end
68
+
69
+ describe "#namespace_uri_mode=" do
70
+ it "accepts :strict" do
71
+ config.namespace_uri_mode = :strict
72
+ expect(config.namespace_uri_mode).to eq(:strict)
73
+ end
74
+
75
+ it "accepts :lenient" do
76
+ config.namespace_uri_mode = :lenient
77
+ expect(config.namespace_uri_mode).to eq(:lenient)
78
+ end
79
+
80
+ it "accepts string values" do
81
+ config.namespace_uri_mode = "lenient"
82
+ expect(config.namespace_uri_mode).to eq(:lenient)
83
+ end
84
+
85
+ it "raises error for invalid mode" do
86
+ expect do
87
+ config.namespace_uri_mode = :invalid
88
+ end.to raise_error(ArgumentError, /Invalid namespace_uri_mode/)
89
+ end
15
90
  end
16
91
 
17
92
  describe "#adapter=" do
@@ -8,7 +8,6 @@ RSpec.describe Moxml::Doctype do
8
8
 
9
9
  describe "#name" do
10
10
  it "returns doctype name" do
11
- skip "Doctype accessor methods not yet implemented in all adapters"
12
11
  doctype = doc.create_doctype("root", nil, "test.dtd")
13
12
  expect(doctype.name).to eq("root")
14
13
  end
@@ -16,15 +15,32 @@ RSpec.describe Moxml::Doctype do
16
15
 
17
16
  describe "#system_id" do
18
17
  it "returns system identifier" do
19
- skip "Doctype accessor methods not yet implemented in all adapters"
20
18
  doctype = doc.create_doctype("root", nil, "test.dtd")
21
19
  expect(doctype.system_id).to eq("test.dtd")
22
20
  end
23
21
  end
24
22
 
23
+ describe "#external_id" do
24
+ it "returns external identifier when present" do
25
+ doctype = doc.create_doctype("html", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd")
26
+ expect(doctype.external_id).to eq("-//W3C//DTD HTML 4.01//EN")
27
+ end
28
+
29
+ it "returns nil when not present" do
30
+ doctype = doc.create_doctype("root", nil, "test.dtd")
31
+ expect(doctype.external_id).to be_nil
32
+ end
33
+ end
34
+
35
+ describe "#identifier" do
36
+ it "returns the doctype name" do
37
+ doctype = doc.create_doctype("html", nil, nil)
38
+ expect(doctype.identifier).to eq("html")
39
+ end
40
+ end
41
+
25
42
  describe "creation" do
26
43
  it "creates a doctype" do
27
- skip "Doctype accessor methods not yet implemented in all adapters"
28
44
  doctype = doc.create_doctype("html", nil, nil)
29
45
  expect(doctype).to be_a(described_class)
30
46
  expect(doctype.name).to eq("html")
@@ -0,0 +1,184 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Moxml::EntityRegistry do
4
+ before do
5
+ described_class.reset
6
+ end
7
+
8
+ describe ".entity_data" do
9
+ it "loads entity data from bundled JSON" do
10
+ data = described_class.entity_data
11
+ expect(data).to be_a(Hash)
12
+ expect(data.keys).to include("amp", "nbsp", "copy")
13
+ end
14
+
15
+ it "caches entity data" do
16
+ data1 = described_class.entity_data
17
+ data2 = described_class.entity_data
18
+ expect(data1.object_id).to eq(data2.object_id)
19
+ end
20
+ end
21
+
22
+ describe ".reset" do
23
+ it "clears cached entity data" do
24
+ data1 = described_class.entity_data
25
+ described_class.reset
26
+ # After reset, a new data hash is loaded
27
+ expect(described_class.entity_data).not_to be(data1)
28
+ end
29
+ end
30
+
31
+ describe "#initialize with :required mode" do
32
+ it "loads all entities from bundled data" do
33
+ registry = described_class.new(mode: :required)
34
+ expect(registry.by_name.keys.length).to be > 2000
35
+ expect(registry.declared?("nbsp")).to be true
36
+ expect(registry.declared?("amp")).to be true
37
+ end
38
+
39
+ it "raises error if entity data unavailable" do
40
+ allow(described_class).to receive(:entity_data).and_return(nil)
41
+ expect do
42
+ described_class.new(mode: :required)
43
+ end.to raise_error(Moxml::EntityRegistry::EntityDataError)
44
+ end
45
+ end
46
+
47
+ describe "#initialize with :disabled mode" do
48
+ it "creates empty registry" do
49
+ registry = described_class.new(mode: :disabled)
50
+ expect(registry.by_name.keys).to be_empty
51
+ expect(registry.declared?("nbsp")).to be false
52
+ end
53
+ end
54
+
55
+ describe "#initialize with :optional mode" do
56
+ it "loads entities when available" do
57
+ registry = described_class.new(mode: :optional)
58
+ expect(registry.by_name.keys.length).to be > 2000
59
+ expect(registry.declared?("nbsp")).to be true
60
+ end
61
+
62
+ it "does not raise when entity data unavailable" do
63
+ allow(described_class).to receive(:entity_data).and_return(nil)
64
+ expect do
65
+ described_class.new(mode: :optional)
66
+ end.not_to raise_error
67
+ end
68
+ end
69
+
70
+ describe "#initialize with :custom mode" do
71
+ it "loads custom entities from provider" do
72
+ custom_provider = -> { { "custom" => 12345, "special" => 67890 } }
73
+ registry = described_class.new(mode: :custom,
74
+ entity_provider: custom_provider)
75
+ expect(registry.by_name.keys).to contain_exactly("custom", "special")
76
+ expect(registry.declared?("custom")).to be true
77
+ expect(registry.codepoint_for_name("custom")).to eq(12345)
78
+ end
79
+
80
+ it "works with nil provider" do
81
+ registry = described_class.new(mode: :custom, entity_provider: nil)
82
+ expect(registry.by_name.keys).to be_empty
83
+ end
84
+
85
+ it "works with provider returning nil" do
86
+ registry = described_class.new(mode: :custom, entity_provider: -> {})
87
+ expect(registry.by_name.keys).to be_empty
88
+ end
89
+ end
90
+
91
+ describe "#declared?" do
92
+ it "returns true for known entities" do
93
+ registry = described_class.new
94
+ expect(registry.declared?("nbsp")).to be true
95
+ expect(registry.declared?("amp")).to be true
96
+ expect(registry.declared?("copy")).to be true
97
+ end
98
+
99
+ it "returns false for unknown entities" do
100
+ registry = described_class.new
101
+ expect(registry.declared?("unknown_entity")).to be false
102
+ end
103
+ end
104
+
105
+ describe "#codepoint_for_name" do
106
+ it "returns Unicode codepoint for entity name" do
107
+ registry = described_class.new
108
+ expect(registry.codepoint_for_name("nbsp")).to eq(160)
109
+ expect(registry.codepoint_for_name("amp")).to eq(38)
110
+ expect(registry.codepoint_for_name("copy")).to eq(169)
111
+ end
112
+
113
+ it "returns nil for unknown entity" do
114
+ registry = described_class.new
115
+ expect(registry.codepoint_for_name("unknown")).to be_nil
116
+ end
117
+ end
118
+
119
+ describe "#names_for_codepoint" do
120
+ it "returns all entity names for a codepoint" do
121
+ registry = described_class.new
122
+ # nbsp has codepoint 160
123
+ names = registry.names_for_codepoint(160)
124
+ expect(names).to be_an(Array)
125
+ expect(names).to include("nbsp")
126
+ end
127
+ end
128
+
129
+ describe "#primary_name_for_codepoint" do
130
+ it "returns first entity name for codepoint" do
131
+ registry = described_class.new
132
+ name = registry.primary_name_for_codepoint(160)
133
+ expect(name).to be_a(String)
134
+ end
135
+ end
136
+
137
+ describe "#register" do
138
+ it "adds entities to the registry" do
139
+ registry = described_class.new(mode: :disabled)
140
+ registry.register({ "new_entity" => 99999 })
141
+ expect(registry.declared?("new_entity")).to be true
142
+ expect(registry.codepoint_for_name("new_entity")).to eq(99999)
143
+ end
144
+
145
+ it "allows multiple names for same codepoint" do
146
+ registry = described_class.new(mode: :disabled)
147
+ registry.register({ "entity_a" => 12345, "entity_b" => 12345 })
148
+ expect(registry.names_for_codepoint(12345)).to contain_exactly(
149
+ "entity_a", "entity_b"
150
+ )
151
+ end
152
+ end
153
+
154
+ describe "#clear!" do
155
+ it "removes all entities" do
156
+ registry = described_class.new
157
+ expect(registry.by_name.keys).not_to be_empty
158
+ registry.clear!
159
+ expect(registry.by_name.keys).to be_empty
160
+ end
161
+ end
162
+
163
+ describe "load_html5, load_mathml, load_iso, load_all" do
164
+ it "load_html5 returns self for chaining" do
165
+ registry = described_class.new
166
+ expect(registry.load_html5).to be(registry)
167
+ end
168
+
169
+ it "load_mathml returns self for chaining" do
170
+ registry = described_class.new
171
+ expect(registry.load_mathml).to be(registry)
172
+ end
173
+
174
+ it "load_iso returns self for chaining" do
175
+ registry = described_class.new
176
+ expect(registry.load_iso).to be(registry)
177
+ end
178
+
179
+ it "load_all returns self for chaining" do
180
+ registry = described_class.new
181
+ expect(registry.load_all).to be(registry)
182
+ end
183
+ end
184
+ end