rng 0.1.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/release.yml +8 -3
  4. data/.gitignore +11 -0
  5. data/.rubocop.yml +10 -7
  6. data/.rubocop_todo.yml +229 -23
  7. data/CHANGELOG.md +317 -0
  8. data/CLAUDE.md +139 -0
  9. data/Gemfile +11 -12
  10. data/README.adoc +1538 -11
  11. data/Rakefile +11 -3
  12. data/docs/Gemfile +8 -0
  13. data/docs/_config.yml +23 -0
  14. data/docs/getting-started/index.adoc +75 -0
  15. data/docs/guides/error-handling.adoc +137 -0
  16. data/docs/guides/external-references.adoc +128 -0
  17. data/docs/guides/index.adoc +24 -0
  18. data/docs/guides/parsing-rnc.adoc +141 -0
  19. data/docs/guides/parsing-rng-xml.adoc +81 -0
  20. data/docs/guides/rng-to-rnc.adoc +101 -0
  21. data/docs/guides/validation.adoc +85 -0
  22. data/docs/index.adoc +52 -0
  23. data/docs/reference/api.adoc +126 -0
  24. data/docs/reference/cli.adoc +182 -0
  25. data/docs/understanding/architecture.adoc +58 -0
  26. data/docs/understanding/rng-vs-rnc.adoc +118 -0
  27. data/exe/rng +5 -0
  28. data/lib/rng/any_name.rb +10 -8
  29. data/lib/rng/attribute.rb +28 -26
  30. data/lib/rng/choice.rb +24 -24
  31. data/lib/rng/cli.rb +607 -0
  32. data/lib/rng/data.rb +10 -10
  33. data/lib/rng/datatype_declaration.rb +26 -0
  34. data/lib/rng/define.rb +44 -41
  35. data/lib/rng/div.rb +36 -0
  36. data/lib/rng/documentation.rb +9 -0
  37. data/lib/rng/element.rb +39 -37
  38. data/lib/rng/empty.rb +7 -7
  39. data/lib/rng/except.rb +25 -25
  40. data/lib/rng/external_ref.rb +8 -8
  41. data/lib/rng/external_ref_resolver.rb +602 -0
  42. data/lib/rng/foreign_attribute.rb +26 -0
  43. data/lib/rng/foreign_element.rb +33 -0
  44. data/lib/rng/grammar.rb +14 -12
  45. data/lib/rng/group.rb +26 -24
  46. data/lib/rng/include.rb +5 -6
  47. data/lib/rng/include_processor.rb +461 -0
  48. data/lib/rng/interleave.rb +23 -23
  49. data/lib/rng/list.rb +22 -22
  50. data/lib/rng/mixed.rb +23 -23
  51. data/lib/rng/name.rb +6 -7
  52. data/lib/rng/namespace_declaration.rb +47 -0
  53. data/lib/rng/namespaces.rb +15 -0
  54. data/lib/rng/not_allowed.rb +7 -7
  55. data/lib/rng/ns_name.rb +9 -9
  56. data/lib/rng/one_or_more.rb +23 -23
  57. data/lib/rng/optional.rb +23 -23
  58. data/lib/rng/param.rb +7 -8
  59. data/lib/rng/parent_ref.rb +8 -8
  60. data/lib/rng/parse_tree_processor.rb +695 -0
  61. data/lib/rng/pattern.rb +7 -7
  62. data/lib/rng/ref.rb +8 -8
  63. data/lib/rng/rnc_builder.rb +927 -0
  64. data/lib/rng/rnc_parser.rb +605 -305
  65. data/lib/rng/rnc_to_rng_converter.rb +1408 -0
  66. data/lib/rng/schema_preamble.rb +73 -0
  67. data/lib/rng/schema_validator.rb +1622 -0
  68. data/lib/rng/start.rb +27 -25
  69. data/lib/rng/test_suite_parser.rb +168 -0
  70. data/lib/rng/text.rb +11 -8
  71. data/lib/rng/to_rnc.rb +4 -35
  72. data/lib/rng/value.rb +6 -7
  73. data/lib/rng/version.rb +1 -1
  74. data/lib/rng/zero_or_more.rb +23 -23
  75. data/lib/rng.rb +68 -17
  76. data/rng.gemspec +18 -19
  77. data/scripts/extract_spectest_resources.rb +96 -0
  78. data/spec/fixtures/compacttest.xml +2511 -0
  79. data/spec/fixtures/external/circular_a.rng +7 -0
  80. data/spec/fixtures/external/circular_b.rng +7 -0
  81. data/spec/fixtures/external/circular_main.rng +7 -0
  82. data/spec/fixtures/external/external_ref_lib.rng +7 -0
  83. data/spec/fixtures/external/external_ref_main.rng +7 -0
  84. data/spec/fixtures/external/include_lib.rng +7 -0
  85. data/spec/fixtures/external/include_main.rng +3 -0
  86. data/spec/fixtures/external/nested_chain.rng +6 -0
  87. data/spec/fixtures/external/nested_leaf.rng +7 -0
  88. data/spec/fixtures/external/nested_mid.rng +8 -0
  89. data/spec/fixtures/metanorma/3gpp.rnc +35 -0
  90. data/spec/fixtures/metanorma/3gpp.rng +105 -0
  91. data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
  92. data/spec/fixtures/metanorma/bipm.rnc +148 -0
  93. data/spec/fixtures/metanorma/bipm.rng +376 -0
  94. data/spec/fixtures/metanorma/bsi.rnc +104 -0
  95. data/spec/fixtures/metanorma/bsi.rng +332 -0
  96. data/spec/fixtures/metanorma/csa.rnc +45 -0
  97. data/spec/fixtures/metanorma/csa.rng +131 -0
  98. data/spec/fixtures/metanorma/csd.rnc +43 -0
  99. data/spec/fixtures/metanorma/csd.rng +132 -0
  100. data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
  101. data/spec/fixtures/metanorma/gbstandard.rng +316 -0
  102. data/spec/fixtures/metanorma/iec.rnc +49 -0
  103. data/spec/fixtures/metanorma/iec.rng +193 -0
  104. data/spec/fixtures/metanorma/ietf.rnc +275 -0
  105. data/spec/fixtures/metanorma/ietf.rng +925 -0
  106. data/spec/fixtures/metanorma/iho.rnc +58 -0
  107. data/spec/fixtures/metanorma/iho.rng +179 -0
  108. data/spec/fixtures/metanorma/isodoc.rnc +873 -0
  109. data/spec/fixtures/metanorma/isodoc.rng +2704 -0
  110. data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
  111. data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
  112. data/spec/fixtures/metanorma/isostandard.rnc +166 -0
  113. data/spec/fixtures/metanorma/isostandard.rng +494 -0
  114. data/spec/fixtures/metanorma/itu.rnc +122 -0
  115. data/spec/fixtures/metanorma/itu.rng +377 -0
  116. data/spec/fixtures/metanorma/m3d.rnc +41 -0
  117. data/spec/fixtures/metanorma/m3d.rng +122 -0
  118. data/spec/fixtures/metanorma/mpfd.rnc +36 -0
  119. data/spec/fixtures/metanorma/mpfd.rng +95 -0
  120. data/spec/fixtures/metanorma/nist.rnc +77 -0
  121. data/spec/fixtures/metanorma/nist.rng +216 -0
  122. data/spec/fixtures/metanorma/ogc.rnc +51 -0
  123. data/spec/fixtures/metanorma/ogc.rng +151 -0
  124. data/spec/fixtures/metanorma/reqt.rnc +6 -0
  125. data/spec/fixtures/metanorma/rsd.rnc +36 -0
  126. data/spec/fixtures/metanorma/rsd.rng +95 -0
  127. data/spec/fixtures/metanorma/un.rnc +103 -0
  128. data/spec/fixtures/metanorma/un.rng +367 -0
  129. data/spec/fixtures/rnc/base.rnc +4 -0
  130. data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
  131. data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
  132. data/spec/fixtures/rnc/main_with_include.rnc +5 -0
  133. data/spec/fixtures/rnc/test_augment.rnc +10 -0
  134. data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
  135. data/spec/fixtures/rnc/top_level_include.rnc +8 -0
  136. data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
  137. data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
  138. data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
  139. data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
  140. data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
  141. data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
  142. data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
  143. data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
  144. data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
  145. data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
  146. data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
  147. data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
  148. data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
  149. data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
  150. data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
  151. data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
  152. data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
  153. data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
  154. data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
  155. data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
  156. data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
  157. data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
  158. data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
  159. data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
  160. data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
  161. data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
  162. data/spec/fixtures/spectest_external/resources.json +149 -0
  163. data/spec/rng/advanced_rnc_spec.rb +101 -0
  164. data/spec/rng/compacttest_spec.rb +197 -0
  165. data/spec/rng/datatype_declaration_spec.rb +28 -0
  166. data/spec/rng/div_spec.rb +207 -0
  167. data/spec/rng/external_ref_resolver_spec.rb +122 -0
  168. data/spec/rng/metanorma_conversion_spec.rb +159 -0
  169. data/spec/rng/namespace_declaration_spec.rb +60 -0
  170. data/spec/rng/namespace_support_spec.rb +199 -0
  171. data/spec/rng/rnc_parser_spec.rb +498 -22
  172. data/spec/rng/rnc_roundtrip_spec.rb +96 -82
  173. data/spec/rng/rng_generation_spec.rb +288 -0
  174. data/spec/rng/roundtrip_spec.rb +342 -0
  175. data/spec/rng/schema_preamble_spec.rb +145 -0
  176. data/spec/rng/schema_spec.rb +68 -64
  177. data/spec/rng/spectest_spec.rb +168 -90
  178. data/spec/rng_spec.rb +2 -2
  179. data/spec/spec_helper.rb +7 -42
  180. metadata +141 -8
@@ -0,0 +1,1622 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'uri'
5
+
6
+ module Rng
7
+ # Raised when a schema violates RELAX NG structural rules
8
+ class SchemaValidationError < StandardError
9
+ attr_reader :xpath, :line
10
+
11
+ def initialize(message, xpath: nil, line: nil)
12
+ super(message)
13
+ @xpath = xpath
14
+ @line = line
15
+ end
16
+
17
+ def to_s
18
+ location = [@xpath, @line].compact.join(':')
19
+ base = super
20
+ location.empty? ? base : "#{location}: #{base}"
21
+ end
22
+ end
23
+
24
+ # Validates raw XML against RELAX NG structural rules.
25
+ # Operates on the Nokogiri XML tree BEFORE Lutaml deserialization,
26
+ # because Lutaml silently drops unmapped content.
27
+ class SchemaValidator
28
+ RNG_NS = 'http://relaxng.org/ns/structure/1.0'
29
+ ANNOTATIONS_NS = 'http://relaxng.org/ns/compatibility/annotations/1.0'
30
+
31
+ LEAF_ELEMENTS = %w[empty text notAllowed ref parentRef value].freeze
32
+ OBSOLETE_ELEMENTS = %w[not difference key keyRef].freeze
33
+ OBSOLETE_ATTRS = %w[key keyRef global].freeze
34
+ NAME_REQUIRED = %w[element attribute].freeze
35
+ CONTAINER_ELEMENTS = %w[group choice interleave optional zeroOrMore oneOrMore list mixed define start].freeze
36
+ VALID_ROOT_ELEMENTS = %w[grammar element group choice interleave notAllowed externalRef data].freeze
37
+ NO_ATTR_LEAF_ELEMENTS = %w[empty text notAllowed].freeze
38
+
39
+ KNOWN_ATTRS = {
40
+ 'element' => %w[name ns],
41
+ 'attribute' => %w[name ns],
42
+ 'ref' => %w[name],
43
+ 'parentRef' => %w[name],
44
+ 'define' => %w[name combine],
45
+ 'data' => %w[type datatypeLibrary combine],
46
+ 'value' => %w[type datatypeLibrary combine],
47
+ 'list' => %w[datatypeLibrary combine],
48
+ 'externalRef' => %w[href ns],
49
+ 'include' => %w[href],
50
+ 'param' => %w[name],
51
+ 'grammar' => %w[ns datatypeLibrary],
52
+ 'start' => %w[combine],
53
+ 'anyName' => %w[],
54
+ 'nsName' => %w[ns],
55
+ 'except' => %w[]
56
+ }.freeze
57
+ VALID_UNPREFIXED_ATTRS = %w[name ns type datatypeLibrary combine href key keyRef global].freeze
58
+ GENERIC_ATTRS = %w[name ns type datatypeLibrary combine href].freeze
59
+
60
+ # Elements not allowed as content in attribute
61
+ ATTR_DISALLOWED = %w[element attribute group interleave mixed].freeze
62
+ # Elements not allowed in list content
63
+ LIST_DISALLOWED = %w[element attribute list interleave mixed].freeze
64
+ # Elements not allowed in data/except content
65
+ DATA_EXCEPT_DISALLOWED = %w[element attribute text list interleave mixed group choice].freeze
66
+
67
+ class << self
68
+ def validate(xml_input)
69
+ doc = Nokogiri::XML(xml_input)
70
+ root = doc.root
71
+ report_error('Document has no root element') unless root
72
+ validator = new
73
+ validator.validate_node(root)
74
+ true
75
+ end
76
+
77
+ def validate_all(xml_input)
78
+ doc = Nokogiri::XML(xml_input)
79
+ root = doc.root
80
+ return [SchemaValidationError.new('Document has no root element')] unless root
81
+
82
+ validator = new(collect_all: true)
83
+ begin
84
+ validator.validate_node(root)
85
+ rescue SchemaValidationError
86
+ # Continue collecting
87
+ end
88
+ validator.errors
89
+ end
90
+
91
+ def valid?(xml_input)
92
+ doc = Nokogiri::XML(xml_input)
93
+ root = doc.root
94
+ return false unless root
95
+
96
+ validator = new
97
+ validator.validate_node(root)
98
+ true
99
+ rescue SchemaValidationError
100
+ false
101
+ end
102
+
103
+ def validate_with_location(xml_input)
104
+ validate_all(xml_input)
105
+ end
106
+ end
107
+
108
+ attr_reader :errors
109
+
110
+ def initialize(collect_all: false)
111
+ @errors = []
112
+ @collect_all = collect_all
113
+ end
114
+
115
+ def validate_node(node, parent_context: nil)
116
+ return unless node.is_a?(Nokogiri::XML::Element)
117
+
118
+ ns = node.namespace&.href
119
+ local_name = node.name
120
+ is_root = node.parent&.document&.root == node
121
+ xpath = node.path.delete_prefix('/')
122
+
123
+ # For root elements, check validity first
124
+ if is_root
125
+ if ns != RNG_NS && !VALID_ROOT_ELEMENTS.include?(local_name)
126
+ report_error("Invalid root element '#{local_name}'", xpath: xpath, node: node)
127
+ return
128
+ end
129
+ else
130
+ # Skip foreign elements (non-RNG namespace) for non-root elements
131
+ return unless ns == RNG_NS || (ns.nil? && !local_name.empty?)
132
+ return if ns == ANNOTATIONS_NS
133
+ end
134
+
135
+ # ---- Existing rules ----
136
+ report_obsolete_element(local_name, xpath)
137
+ report_invalid_root(local_name, node, xpath)
138
+ validate_obsolete_attrs(node, xpath)
139
+ validate_leaf_no_children(local_name, node, xpath)
140
+ validate_required_attrs(local_name, node, xpath)
141
+ validate_name_attr(local_name, node, xpath)
142
+ validate_ncname_strict(node, xpath)
143
+ validate_datatype_library(node, xpath)
144
+ validate_href(node, xpath)
145
+ validate_single_except(local_name, node, xpath)
146
+ validate_container_children(local_name, node, xpath)
147
+ validate_name_class_and_pattern(local_name, node, xpath)
148
+ validate_content_model(node, xpath, parent_context)
149
+ validate_context(local_name, node, xpath)
150
+ validate_name_value_purity(local_name, node, xpath)
151
+ validate_xmlns_restrictions(local_name, node, xpath)
152
+ validate_name_class_except(local_name, node, xpath)
153
+ validate_name_conflict(local_name, node, xpath)
154
+ validate_group_content(local_name, node, xpath)
155
+ validate_leaf_no_attrs(local_name, node, xpath)
156
+ validate_unknown_attrs(node, xpath)
157
+ validate_single_attribute_pattern(local_name, node, xpath)
158
+ validate_no_duplicate_attribute_names(local_name, node, xpath)
159
+
160
+ # ---- New rules for spectest coverage ----
161
+ validate_except_not_empty(local_name, node, xpath)
162
+ validate_xmlns_in_name_class(local_name, node, xpath)
163
+ validate_xmlns_in_anyname_attribute(local_name, node, xpath)
164
+ validate_xmlns_in_anyname_attribute(local_name, node, xpath)
165
+ validate_name_not_empty(local_name, node, xpath)
166
+ validate_grammar_structure(local_name, node, xpath)
167
+ validate_define_combine(local_name, node, xpath)
168
+ validate_combine_consistency(local_name, node, xpath)
169
+ validate_attribute_name_class_overlap(local_name, node, xpath)
170
+ validate_nsname_except_rules(local_name, node, xpath)
171
+ validate_param_for_builtin_types(local_name, node, xpath)
172
+ validate_data_except_strict(node, xpath)
173
+ validate_attribute_choice_content(local_name, node, xpath)
174
+ validate_interleave_attribute_overlap(local_name, node, xpath)
175
+ validate_interleave_name_class_overlap(local_name, node, xpath)
176
+ validate_list_content_strict(local_name, node, xpath)
177
+ validate_element_attribute_overlap(local_name, node, xpath)
178
+ validate_grammar_root_element(local_name, node, xpath)
179
+ validate_grammar_must_have_start(local_name, node, xpath)
180
+ validate_grammar_nesting(local_name, node, xpath)
181
+ validate_ref_resolution(local_name, node, xpath)
182
+ validate_recursive_ref(local_name, node, xpath)
183
+ validate_xmlns_in_name_class_choice(local_name, node, xpath)
184
+ validate_builtin_type(local_name, node, xpath)
185
+ validate_datatype_library_empty(local_name, node, xpath)
186
+ validate_start_content(local_name, node, xpath)
187
+ validate_start_element_conflicts(local_name, node, xpath)
188
+ validate_group_text_data(local_name, node, xpath)
189
+ validate_data_except_content_types(local_name, node, xpath)
190
+ validate_infinite_attribute_name_class(local_name, node, xpath)
191
+ validate_oneOrMore_attribute_overlap(local_name, node, xpath)
192
+ validate_oneOrMore_infinite_attribute_name(local_name, node, xpath)
193
+
194
+ # Recurse
195
+ node.element_children.each do |child|
196
+ next if child.namespace&.href == ANNOTATIONS_NS
197
+
198
+ validate_node(child, parent_context: context_for_child(local_name, node))
199
+ end
200
+ end
201
+
202
+ def report_error(message, xpath: nil, node: nil)
203
+ line = node&.line if node
204
+ error = SchemaValidationError.new(message, xpath: xpath, line: line)
205
+ raise error unless @collect_all
206
+
207
+ @errors << error
208
+ nil
209
+ end
210
+
211
+ private
212
+
213
+ def rng_ns_children(node)
214
+ node.element_children.select { |c| (c.namespace&.href == RNG_NS) || (c.namespace.nil? && !c.name.empty?) }
215
+ end
216
+
217
+ def context_for_child(parent_name, _parent_node)
218
+ case parent_name
219
+ when 'start', 'grammar', 'div', 'include' then :grammar
220
+ when 'element', 'attribute' then :content
221
+ when 'data', 'list', 'value' then :data
222
+ end
223
+ end
224
+
225
+ # ---- Helper methods ----
226
+ def find_ancestor(node, name)
227
+ current = node.parent
228
+ while current
229
+ return current if current.name == name &&
230
+ (current.namespace&.href == RNG_NS || (current.namespace.nil? && !current.name.empty?))
231
+
232
+ current = current.parent
233
+ end
234
+ nil
235
+ end
236
+
237
+ def collect_defines_in_grammar(grammar)
238
+ names = []
239
+ rng_ns_children(grammar).each do |child|
240
+ case child.name
241
+ when 'define'
242
+ n = child['name']
243
+ names << n if n && !n.strip.empty?
244
+ when 'div'
245
+ names.concat(collect_defines_in_grammar(child))
246
+ when 'include'
247
+ rng_ns_children(child).each do |inc_child|
248
+ if inc_child.name == 'define'
249
+ n = inc_child['name']
250
+ names << n if n && !n.strip.empty?
251
+ end
252
+ end
253
+ end
254
+ end
255
+ names
256
+ end
257
+
258
+ def collect_element_names_non_choice(nodes)
259
+ names = []
260
+ nodes.each do |node|
261
+ case node.name
262
+ when 'element'
263
+ names << node['name'] if node['name']
264
+ when 'choice'
265
+ # Don't recurse - duplicates across branches OK
266
+ when 'group', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
267
+ names.concat(collect_element_names_non_choice(rng_ns_children(node)))
268
+ end
269
+ end
270
+ names
271
+ end
272
+
273
+ def collect_element_names_from_children(nodes)
274
+ names = []
275
+ nodes.each do |node|
276
+ case node.name
277
+ when 'element'
278
+ names << node['name'] if node['name']
279
+ when 'choice', 'group', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
280
+ names.concat(collect_element_names_from_children(rng_ns_children(node)))
281
+ end
282
+ end
283
+ names
284
+ end
285
+
286
+ def collect_attribute_names_non_choice(nodes)
287
+ names = []
288
+ nodes.each do |node|
289
+ case node.name
290
+ when 'attribute'
291
+ names << node['name'] if node['name']
292
+ when 'choice'
293
+ # Don't recurse - duplicates across branches OK
294
+ when 'group', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
295
+ names.concat(collect_attribute_names_non_choice(rng_ns_children(node)))
296
+ end
297
+ end
298
+ names
299
+ end
300
+
301
+ def get_attribute_name_class(attr_node)
302
+ name = attr_node['name']
303
+ return { type: :name, name: name, ns: attr_node['ns'] || '' } if name
304
+
305
+ rng_ns_children(attr_node).each do |child|
306
+ case child.name
307
+ when 'name'
308
+ return { type: :name, name: child.text.strip, ns: child['ns'] || '' }
309
+ when 'anyName'
310
+ return { type: :anyName, except: get_name_class_except(child) }
311
+ when 'nsName'
312
+ return { type: :nsName, ns: child['ns'] || '', except: get_name_class_except(child) }
313
+ end
314
+ end
315
+ nil
316
+ end
317
+
318
+ def get_element_name_class(element_node)
319
+ name = element_node['name']
320
+ return { type: :name, name: name, ns: element_node['ns'] || '' } if name
321
+
322
+ rng_ns_children(element_node).each do |child|
323
+ case child.name
324
+ when 'name'
325
+ return { type: :name, name: child.text.strip, ns: child['ns'] || '' }
326
+ when 'anyName'
327
+ return { type: :anyName, except: get_name_class_except(child) }
328
+ when 'nsName'
329
+ return { type: :nsName, ns: child['ns'] || '', except: get_name_class_except(child) }
330
+ end
331
+ end
332
+ nil
333
+ end
334
+
335
+ def get_name_class_except(node)
336
+ rng_ns_children(node).find { |c| c.name == 'except' }
337
+ end
338
+
339
+ def collect_attribute_name_classes(nodes)
340
+ result = []
341
+ nodes.each do |node|
342
+ case node.name
343
+ when 'attribute'
344
+ nc = get_attribute_name_class(node)
345
+ result << nc if nc
346
+ when 'group', 'choice', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
347
+ result.concat(collect_attribute_name_classes(rng_ns_children(node)))
348
+ end
349
+ end
350
+ result
351
+ end
352
+
353
+ def name_classes_overlap?(nc1, nc2)
354
+ return nc1[:name] == nc2[:name] && nc1[:ns] == nc2[:ns] if nc1[:type] == :name && nc2[:type] == :name
355
+ return true if nc1[:type] == :anyName && nc2[:type] == :anyName
356
+ return anyName_overlaps_with?(nc1, nc2) if nc1[:type] == :anyName
357
+ return anyName_overlaps_with?(nc2, nc1) if nc2[:type] == :anyName
358
+ return nc1[:ns] == nc2[:ns] if nc1[:type] == :nsName && nc2[:type] == :nsName
359
+ return nc1[:ns] == nc2[:ns] if nc1[:type] == :nsName && nc2[:type] == :name
360
+ return nc1[:ns] == nc2[:ns] if nc2[:type] == :nsName && nc1[:type] == :name
361
+
362
+ false
363
+ end
364
+
365
+ def anyName_overlaps_with?(anyName_nc, other_nc)
366
+ exc = anyName_nc[:except]
367
+ return true unless exc
368
+
369
+ case other_nc[:type]
370
+ when :name
371
+ !name_in_except?(other_nc[:name], other_nc[:ns], exc)
372
+ when :nsName
373
+ !nsName_fully_in_except?(other_nc[:ns], exc)
374
+ when :anyName
375
+ true
376
+ else
377
+ true
378
+ end
379
+ end
380
+
381
+ def name_in_except?(name, ns, except_node)
382
+ rng_ns_children(except_node).any? do |child|
383
+ case child.name
384
+ when 'name'
385
+ child_ns = child['ns'] || ''
386
+ child.text.strip == name && child_ns == ns
387
+ when 'anyName'
388
+ true
389
+ when 'nsName'
390
+ (child['ns'] || '') == ns
391
+ when 'choice'
392
+ rng_ns_children(child).any? { |gc| name_in_except?(name, ns, gc) }
393
+ else
394
+ false
395
+ end
396
+ end
397
+ end
398
+
399
+ def nsName_fully_in_except?(ns, except_node)
400
+ rng_ns_children(except_node).any? do |child|
401
+ case child.name
402
+ when 'anyName' then true
403
+ when 'nsName' then child['ns'] == ns
404
+ else false
405
+ end
406
+ end
407
+ end
408
+
409
+ def element_name_classes_overlap?(nc1, nc2)
410
+ name_classes_overlap?(nc1, nc2)
411
+ end
412
+
413
+ def contains_element_pattern?(node)
414
+ rng_ns_children(node).any? do |c|
415
+ case c.name
416
+ when 'element', 'ref' then true
417
+ when 'group', 'choice', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
418
+ contains_element_pattern?(c)
419
+ else false
420
+ end
421
+ end
422
+ end
423
+
424
+ # ---- Existing validation rules ----
425
+ def report_obsolete_element(local_name, xpath)
426
+ return unless OBSOLETE_ELEMENTS.include?(local_name)
427
+
428
+ report_error("Element '#{local_name}' is obsolete and not supported", xpath: xpath)
429
+ end
430
+
431
+ def report_invalid_root(local_name, node, xpath)
432
+ return unless node.parent&.document&.root == node
433
+ return if VALID_ROOT_ELEMENTS.include?(local_name)
434
+
435
+ report_error("Invalid root element '#{local_name}'", xpath: xpath)
436
+ end
437
+
438
+ def validate_obsolete_attrs(node, xpath)
439
+ node.attributes.each do |name, attr|
440
+ attr_ns = attr.namespace&.href
441
+ if attr_ns == RNG_NS && OBSOLETE_ATTRS.include?(name)
442
+ report_error("Attribute '#{name}' on '#{node.name}' is obsolete",
443
+ xpath: xpath)
444
+ end
445
+ next unless attr_ns.nil? && attr.namespace&.prefix.nil?
446
+
447
+ report_error("Attribute 'name' on 'start' is obsolete", xpath: xpath) if name == 'name' && node.name == 'start'
448
+ if name == 'global' && node.name == 'attribute'
449
+ report_error("Attribute 'global' on 'attribute' is obsolete",
450
+ xpath: xpath)
451
+ end
452
+ report_error("Attribute '#{name}' on 'data' is obsolete", xpath: xpath) if %w[key
453
+ keyRef].include?(name) && node.name == 'data'
454
+ end
455
+ end
456
+
457
+ def validate_leaf_no_children(local_name, node, xpath)
458
+ return unless LEAF_ELEMENTS.include?(local_name)
459
+
460
+ rng_kids = rng_ns_children(node)
461
+ return if rng_kids.empty?
462
+
463
+ report_error("'#{local_name}' must not have child elements", xpath: xpath)
464
+ end
465
+
466
+ def validate_required_attrs(local_name, node, xpath)
467
+ case local_name
468
+ when 'define'
469
+ report_error("'define' must have a 'name' attribute", xpath: xpath) unless node['name']
470
+ when 'ref', 'parentRef'
471
+ report_error("'#{local_name}' must have a 'name' attribute", xpath: xpath) unless node['name']
472
+ when 'data'
473
+ report_error("'data' must have a 'type' attribute", xpath: xpath) unless node['type']
474
+ when 'externalRef', 'include'
475
+ report_error("'#{local_name}' must have an 'href' attribute", xpath: xpath) unless node['href']
476
+ when 'param'
477
+ report_error("'param' must have a 'name' attribute", xpath: xpath) unless node['name']
478
+ end
479
+ end
480
+
481
+ def validate_name_attr(local_name, node, xpath)
482
+ name = node['name']
483
+ return unless name
484
+
485
+ case local_name
486
+ when 'ref', 'parentRef', 'define'
487
+ # ref, parentRef, and define names must be NCNames (no colons)
488
+ validate_ncname(name, local_name, xpath)
489
+ when 'element', 'attribute'
490
+ # element and attribute names can be QNames (may contain colons)
491
+ # Only validate NCName restrictions for the local part if needed
492
+ validate_qname(name, local_name, xpath, node)
493
+ end
494
+ end
495
+
496
+ def validate_ncname(value, context, xpath)
497
+ return if value.nil? || value.empty?
498
+
499
+ report_error("'#{context}' name '#{value}' must be an NCName (no colon)", xpath: xpath) if value.include?(':')
500
+ return if valid_ncname_string?(value)
501
+
502
+ report_error("'#{context}' name '#{value}' is not a valid NCName", xpath: xpath)
503
+ end
504
+
505
+ def validate_qname(value, context, xpath, node)
506
+ return if value.nil? || value.empty?
507
+
508
+ # QName may contain a colon, but both prefix and local part must be valid NCNames
509
+ return unless value.include?(':')
510
+
511
+ prefix, local = value.split(':', 2)
512
+ if prefix && !prefix.empty? && !valid_ncname_string?(prefix)
513
+ report_error("'#{context}' QName prefix '#{prefix}' must be an NCName",
514
+ xpath: xpath)
515
+ end
516
+ # Local part must exist and not be empty if colon is present
517
+ if local.nil? || local.empty?
518
+ report_error("'#{context}' QName '#{value}' must not have an empty local part", xpath: xpath)
519
+ elsif !valid_ncname_string?(local)
520
+ report_error("'#{context}' QName local part '#{local}' must be an NCName", xpath: xpath)
521
+ end
522
+ # Check that the prefix is declared (if present)
523
+ return unless prefix && !prefix.empty? && !prefix_declared?(prefix, node)
524
+
525
+ report_error("'#{context}' QName prefix '#{prefix}' is not declared", xpath: xpath)
526
+ end
527
+
528
+ def prefix_declared?(prefix, node)
529
+ return false unless node
530
+
531
+ current = node
532
+ while current.is_a?(Nokogiri::XML::Element)
533
+ current.namespace_definitions.each do |ns|
534
+ return true if ns.prefix == prefix
535
+ end
536
+ current = current.parent
537
+ end
538
+ false
539
+ end
540
+
541
+ def valid_ncname_string?(value)
542
+ return false if value.nil? || value.empty?
543
+
544
+ cp = value.codepoints.first
545
+ return false unless cp
546
+ return false unless valid_name_start_char?(cp)
547
+
548
+ # Check remaining characters are valid NameChars
549
+ value.codepoints.drop(1).each do |c|
550
+ return false unless valid_name_char?(c)
551
+ end
552
+ true
553
+ end
554
+
555
+ def validate_ncname_strict(node, xpath)
556
+ # Only check name elements that are name classes (not <name> used for defines)
557
+ return unless node.name == 'name'
558
+
559
+ parent = node.parent
560
+ # <name> as child of element/attribute is a name class - check NCName
561
+ return unless parent && %w[element attribute].include?(parent.name)
562
+
563
+ name_text = node.text.strip
564
+ return if name_text.empty?
565
+
566
+ # Check first character is a valid NameStartChar
567
+ cp = name_text.codepoints.first
568
+ return unless cp
569
+
570
+ return if valid_name_start_char?(cp)
571
+
572
+ report_error("Name '#{name_text}' is not a valid NCName: invalid start character", xpath: xpath)
573
+ end
574
+
575
+ def valid_name_start_char?(cp)
576
+ cp.between?(0x41, 0x5A) || # A-Z
577
+ cp.between?(0x61, 0x7A) || # a-z
578
+ cp == 0x5F || # _
579
+ cp.between?(0xC0, 0xD6) ||
580
+ cp.between?(0xD8, 0xF6) ||
581
+ cp.between?(0xF8, 0x2FF) ||
582
+ cp.between?(0x370, 0x37D) ||
583
+ cp.between?(0x37F, 0x1FFF) ||
584
+ cp.between?(0x200C, 0x200D) ||
585
+ cp.between?(0x2070, 0x218F) ||
586
+ cp.between?(0x2C00, 0x2FEF) ||
587
+ cp.between?(0x3001, 0xD7FF) ||
588
+ cp.between?(0xF900, 0xFDCF) ||
589
+ cp.between?(0xFDF0, 0xFFFD) ||
590
+ cp.between?(0x10000, 0xEFFFF)
591
+ end
592
+
593
+ def valid_name_char?(cp)
594
+ valid_name_start_char?(cp) ||
595
+ cp.between?(0x30, 0x39) || # 0-9
596
+ cp == 0x2D || # -
597
+ cp == 0x2E || # .
598
+ cp == 0xB7 # middle dot
599
+ end
600
+
601
+ def validate_datatype_library(node, xpath)
602
+ dtl = node['datatypeLibrary']
603
+ return unless dtl && !dtl.empty?
604
+
605
+ begin
606
+ uri = URI.parse(dtl)
607
+ report_error("datatypeLibrary '#{dtl}' must have a scheme", xpath: xpath) unless uri.scheme
608
+ # Must have scheme-specific part after the colon
609
+ # e.g., "foo:" has no scheme-specific part and is invalid
610
+ after_scheme = dtl[(uri.scheme.length + 1)..]
611
+ if !after_scheme || after_scheme.empty?
612
+ report_error("datatypeLibrary '#{dtl}' must have a non-empty scheme-specific part",
613
+ xpath: xpath)
614
+ end
615
+ if uri.fragment || uri.query
616
+ report_error("datatypeLibrary '#{dtl}' must not have fragment or query",
617
+ xpath: xpath)
618
+ end
619
+ rescue URI::InvalidURIError
620
+ report_error("datatypeLibrary '#{dtl}' is not a valid URI", xpath: xpath)
621
+ end
622
+ end
623
+
624
+ def validate_href(node, xpath)
625
+ href = node['href']
626
+ return unless href && node.name == 'externalRef'
627
+
628
+ return unless href.include?('#')
629
+
630
+ report_error("externalRef href '#{href}' must not contain a fragment identifier", xpath: xpath)
631
+ end
632
+
633
+ def validate_single_except(local_name, node, xpath)
634
+ return unless %w[anyName nsName data].include?(local_name)
635
+
636
+ count = rng_ns_children(node).count { |c| c.name == 'except' }
637
+ report_error("'#{local_name}' must not have multiple 'except' children", xpath: xpath) if count > 1
638
+ end
639
+
640
+ def validate_container_children(local_name, node, xpath)
641
+ return unless CONTAINER_ELEMENTS.include?(local_name)
642
+
643
+ kids = rng_ns_children(node)
644
+ report_error("'#{local_name}' must have at least one child pattern", xpath: xpath) if kids.empty?
645
+ # start can only have one child
646
+ return unless local_name == 'start' && kids.length > 1
647
+
648
+ report_error("'start' must have exactly one child pattern", xpath: xpath)
649
+ end
650
+
651
+ def validate_name_class_and_pattern(local_name, node, xpath)
652
+ return unless %w[element attribute].include?(local_name)
653
+
654
+ kids = rng_ns_children(node)
655
+ has_name_class = kids.any? { |c| %w[name anyName nsName].include?(c.name) }
656
+ has_name_class ||= kids.any? { |c| c.name == 'choice' && choice_contains_name_class?(c) }
657
+ has_name_class = true if node['name']
658
+ has_name_class = true if kids.any? { |c| c.name == 'ref' }
659
+ has_pattern = kids.any? { |c| !%w[name anyName nsName].include?(c.name) }
660
+
661
+ # For elements: RELAX NG requires both name class AND pattern
662
+ # For attributes: requires name class, but no pattern needed if only name attr
663
+ report_error("'#{local_name}' must have a name class", xpath: xpath) unless has_name_class
664
+ return unless local_name == 'element'
665
+ return if has_pattern
666
+
667
+ report_error("'#{local_name}' must have a pattern", xpath: xpath)
668
+ end
669
+
670
+ def choice_contains_name_class?(choice_node)
671
+ return false unless choice_node.name == 'choice'
672
+
673
+ rng_ns_children(choice_node).any? do |c|
674
+ case c.name
675
+ when 'name', 'anyName', 'nsName' then true
676
+ when 'choice' then choice_contains_name_class?(c)
677
+ else false
678
+ end
679
+ end
680
+ end
681
+
682
+ def validate_content_model(node, xpath, _parent_context)
683
+ case node.name
684
+ when 'attribute'
685
+ validate_attribute_content(node, xpath)
686
+ when 'list'
687
+ validate_list_content(node, xpath)
688
+ when 'except'
689
+ validate_data_except_content(node, xpath) if node.parent&.name == 'data'
690
+ when 'interleave'
691
+ validate_interleave_content(node, xpath)
692
+ when 'mixed'
693
+ validate_mixed_content(node, xpath)
694
+ end
695
+ end
696
+
697
+ def validate_attribute_content(node, xpath)
698
+ rng_ns_children(node).each do |child|
699
+ if ATTR_DISALLOWED.include?(child.name)
700
+ report_error("'attribute' content must not contain '#{child.name}' pattern",
701
+ xpath: xpath)
702
+ end
703
+ end
704
+ end
705
+
706
+ def validate_list_content(node, xpath)
707
+ rng_ns_children(node).each do |child|
708
+ if LIST_DISALLOWED.include?(child.name)
709
+ report_error("'list' content must not contain '#{child.name}'",
710
+ xpath: xpath)
711
+ end
712
+ end
713
+ end
714
+
715
+ def validate_data_except_content(node, xpath)
716
+ rng_ns_children(node).each do |child|
717
+ if DATA_EXCEPT_DISALLOWED.include?(child.name)
718
+ report_error("'data/except' content must not contain '#{child.name}'",
719
+ xpath: xpath)
720
+ end
721
+ end
722
+ end
723
+
724
+ def validate_interleave_content(node, xpath)
725
+ kids = rng_ns_children(node)
726
+ report_error("'interleave' must not contain multiple 'text' patterns", xpath: xpath) if kids.count do |c|
727
+ c.name == 'text'
728
+ end > 1
729
+ names = collect_element_names_non_choice(kids)
730
+ dups = names.tally.select { |_, v| v > 1 }.keys
731
+ return if dups.empty?
732
+
733
+ report_error("'interleave' must not contain overlapping element names: #{dups.join(', ')}",
734
+ xpath: xpath)
735
+ end
736
+
737
+ def validate_mixed_content(node, xpath)
738
+ return unless rng_ns_children(node).any? { |c| c.name == 'mixed' }
739
+
740
+ report_error("'mixed' must not contain nested 'mixed'", xpath: xpath)
741
+ end
742
+
743
+ def validate_context(local_name, node, xpath)
744
+ parent_local = node.parent&.name
745
+ case local_name
746
+ when 'define'
747
+ report_error("'define' is not allowed inside '#{parent_local}'", xpath: xpath) unless %w[grammar div
748
+ include].include?(parent_local)
749
+ when 'start'
750
+ report_error("'start' is not allowed inside '#{parent_local}'", xpath: xpath) unless %w[grammar
751
+ div].include?(parent_local)
752
+ when 'include'
753
+ report_error("'include' is not allowed inside '#{parent_local}'", xpath: xpath) unless %w[grammar
754
+ div].include?(parent_local)
755
+ end
756
+ end
757
+
758
+ def validate_name_value_purity(local_name, node, xpath)
759
+ return unless %w[name value].include?(local_name)
760
+
761
+ return unless node.element_children.any?
762
+
763
+ report_error("'#{local_name}' must not contain child elements", xpath: xpath)
764
+ end
765
+
766
+ def validate_xmlns_restrictions(local_name, node, xpath)
767
+ return unless local_name == 'attribute'
768
+
769
+ name = node['name']
770
+ report_error("Attribute name 'xmlns' is not allowed", xpath: xpath) if name && name.strip == 'xmlns'
771
+ ns = node['ns']
772
+ return unless ['http://www.w3.org/2000/xmlns', 'http://www.w3.org/2000/xmlns/'].include?(ns)
773
+
774
+ report_error('Attribute with xmlns namespace is not allowed', xpath: xpath)
775
+ end
776
+
777
+ def validate_name_class_except(local_name, node, xpath)
778
+ return unless local_name == 'except'
779
+
780
+ parent = node.parent
781
+ return unless %w[anyName nsName].include?(parent&.name)
782
+
783
+ check_name_class_except_children(parent, rng_ns_children(node), xpath)
784
+ end
785
+
786
+ def check_name_class_except_children(parent, children, xpath)
787
+ children.each do |child|
788
+ targets = child.name == 'choice' ? rng_ns_children(child) : [child]
789
+ targets.each do |target|
790
+ if parent.name == 'anyName' && target.name == 'anyName'
791
+ report_error("'anyName/except' must not contain 'anyName'",
792
+ xpath: xpath)
793
+ end
794
+ if parent.name == 'nsName' && target.name == 'nsName'
795
+ parent_ns = parent['ns'] || ''
796
+ child_ns = target['ns'] || ''
797
+ if parent_ns == child_ns
798
+ report_error("'nsName/except' must not contain 'nsName' with same namespace",
799
+ xpath: xpath)
800
+ end
801
+ end
802
+ if parent.name == 'nsName' && target.name == 'anyName'
803
+ report_error("'nsName/except' containing 'anyName' results in empty name class",
804
+ xpath: xpath)
805
+ end
806
+ end
807
+ end
808
+ end
809
+
810
+ def validate_name_conflict(local_name, node, xpath)
811
+ return unless %w[element attribute].include?(local_name)
812
+
813
+ return unless node['name'] && rng_ns_children(node).any? { |c| c.name == 'name' }
814
+
815
+ report_error("'#{local_name}' cannot have both a name attribute and a name child", xpath: xpath)
816
+ end
817
+
818
+ def validate_group_content(local_name, node, xpath)
819
+ return unless local_name == 'group'
820
+
821
+ rng_ns_children(node).each do |child|
822
+ report_error("'group' must not contain a name class '#{child.name}'", xpath: xpath) if %w[name anyName
823
+ nsName].include?(child.name)
824
+ end
825
+ end
826
+
827
+ def validate_leaf_no_attrs(local_name, node, xpath)
828
+ return unless NO_ATTR_LEAF_ELEMENTS.include?(local_name)
829
+
830
+ node.attributes.each do |attr_name, attr|
831
+ attr_ns = attr.namespace&.href
832
+ next if attr_ns && attr_ns != RNG_NS
833
+ next if attr_name == 'xmlns' || attr_name.start_with?('xmlns:')
834
+
835
+ report_error("'#{local_name}' must not have attributes", xpath: xpath)
836
+ return
837
+ end
838
+ end
839
+
840
+ def validate_unknown_attrs(node, xpath)
841
+ node.attributes.each do |attr_name, attr|
842
+ next if attr_name == 'xmlns'
843
+
844
+ attr_ns = attr.namespace&.href
845
+ if (attr_ns == RNG_NS) && !known_rng_attr?(
846
+ node.name, attr_name
847
+ )
848
+ report_error("Unknown attribute '#{attr_name}' on '#{node.name}'",
849
+ xpath: xpath)
850
+ end
851
+ next unless attr_ns.nil? && attr.namespace&.prefix.nil? && !known_unprefixed_attr?(
852
+ node.name, attr_name
853
+ )
854
+
855
+ report_error("Unknown attribute '#{attr_name}' on '#{node.name}'",
856
+ xpath: xpath)
857
+ end
858
+ end
859
+
860
+ def known_rng_attr?(element_name, attr_name)
861
+ KNOWN_ATTRS.fetch(element_name, []).include?(attr_name) || GENERIC_ATTRS.include?(attr_name)
862
+ end
863
+
864
+ def known_unprefixed_attr?(element_name, attr_name)
865
+ KNOWN_ATTRS.fetch(element_name, []).include?(attr_name) || VALID_UNPREFIXED_ATTRS.include?(attr_name)
866
+ end
867
+
868
+ def validate_single_attribute_pattern(local_name, node, xpath)
869
+ return unless local_name == 'attribute'
870
+
871
+ kids = rng_ns_children(node)
872
+ # Name class elements that don't count as patterns
873
+ name_class_exclusions = %w[name anyName nsName choice]
874
+ pattern_count = kids.count { |c| !name_class_exclusions.include?(c.name) }
875
+ report_error("'attribute' must not have multiple patterns", xpath: xpath) if pattern_count > 1
876
+ end
877
+
878
+ def validate_no_duplicate_attribute_names(local_name, node, xpath)
879
+ return unless local_name == 'element'
880
+
881
+ check_duplicate_attrs_in_children(rng_ns_children(node), xpath)
882
+ end
883
+
884
+ def check_duplicate_attrs_in_children(nodes, xpath)
885
+ attr_names = []
886
+ nodes.each do |n|
887
+ case n.name
888
+ when 'attribute'
889
+ attr_names << n['name'] if n['name']
890
+ when 'choice'
891
+ # Don't recurse - duplicates across branches OK
892
+ when 'group', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
893
+ check_duplicate_attrs_in_children(rng_ns_children(n), xpath)
894
+ end
895
+ end
896
+ dups = attr_names.tally.select { |_, v| v > 1 }.keys
897
+ return if dups.empty?
898
+
899
+ report_error("'element' must not have duplicate attribute names: #{dups.join(', ')}",
900
+ xpath: xpath)
901
+ end
902
+
903
+ # ---- New validation rules for spectest coverage ----
904
+ def validate_except_not_empty(local_name, node, xpath)
905
+ return unless local_name == 'except'
906
+ return unless rng_ns_children(node).empty?
907
+
908
+ parent = node.parent
909
+ return unless %w[anyName nsName data].include?(parent&.name)
910
+
911
+ report_error("'#{parent.name}/except' must not be empty", xpath: xpath)
912
+ end
913
+
914
+ def validate_xmlns_in_name_class(local_name, node, xpath)
915
+ return unless local_name == 'name'
916
+
917
+ parent = node.parent
918
+ return unless parent&.name == 'attribute'
919
+
920
+ return unless node.text.strip == 'xmlns'
921
+
922
+ ns = node['ns']
923
+ return unless ns.nil? || ns == ''
924
+
925
+ report_error("Attribute name 'xmlns' is not allowed", xpath: xpath)
926
+ end
927
+
928
+ def validate_xmlns_in_anyname_attribute(local_name, node, xpath)
929
+ return unless local_name == 'anyName'
930
+
931
+ # Walk up to find if we're inside an attribute
932
+ p = node.parent
933
+ return unless p && %w[choice attribute].include?(p.name)
934
+
935
+ if p.name == 'choice'
936
+ p = p.parent
937
+ return unless p && p.name == 'attribute'
938
+ end
939
+ # anyName in attribute context must have except covering xmlns
940
+ except_node = rng_ns_children(node).find { |c| c.name == 'except' }
941
+ return unless except_node
942
+
943
+ return if except_covers_xmlns?(except_node)
944
+
945
+ report_error("'anyName' in attribute does not exclude xmlns namespace names", xpath: xpath)
946
+ end
947
+
948
+ # Check if except clause fully covers all xmlns-related names.
949
+ # RELAX NG 4.16: attribute name class must not match:
950
+ # - xmlns (bare name in empty namespace)
951
+ # - any name in http://www.w3.org/2000/xmlns/ namespace (xmlns:*)
952
+ # An except must exclude BOTH to be sufficient.
953
+ def except_covers_xmlns?(except_node)
954
+ covers_bare_xmlns = false
955
+ covers_xmlns_ns = false
956
+ rng_ns_children(except_node).each do |child|
957
+ case child.name
958
+ when 'anyName'
959
+ # anyName in except excludes everything
960
+ covers_bare_xmlns = true
961
+ covers_xmlns_ns = true
962
+ when 'nsName'
963
+ ns = child['ns'] || ''
964
+ covers_xmlns_ns = true if ns == 'http://www.w3.org/2000/xmlns/'
965
+ when 'name'
966
+ covers_bare_xmlns = true if child.text.strip == 'xmlns' && (child['ns'].nil? || child['ns'] == '')
967
+ when 'choice'
968
+ # For choice in except, if ANY alternative excludes a name, it IS excluded
969
+ rng_ns_children(child).each do |gc|
970
+ case gc.name
971
+ when 'anyName'
972
+ covers_bare_xmlns = true
973
+ covers_xmlns_ns = true
974
+ when 'nsName'
975
+ covers_xmlns_ns = true if (gc['ns'] || '') == 'http://www.w3.org/2000/xmlns/'
976
+ when 'name'
977
+ covers_bare_xmlns = true if gc.text.strip == 'xmlns' && (gc['ns'].nil? || gc['ns'] == '')
978
+ end
979
+ end
980
+ end
981
+ end
982
+ covers_bare_xmlns && covers_xmlns_ns
983
+ end
984
+
985
+ def validate_name_not_empty(local_name, node, xpath)
986
+ return unless %w[element attribute ref parentRef define].include?(local_name)
987
+
988
+ name = node['name']
989
+ return unless name
990
+
991
+ return unless name.strip.empty?
992
+
993
+ report_error("'#{local_name}' name attribute must not be empty", xpath: xpath)
994
+ end
995
+
996
+ def validate_grammar_structure(local_name, node, xpath)
997
+ return unless local_name == 'grammar'
998
+ return unless node.parent&.document&.root == node
999
+
1000
+ kids = rng_ns_children(node)
1001
+ has_start = kids.any? { |c| c.name == 'start' }
1002
+ has_include = kids.any? { |c| c.name == 'include' }
1003
+ has_define = kids.any? { |c| c.name == 'define' }
1004
+ # A grammar must have start, include, or be non-empty with valid children
1005
+ return if has_start || has_include || has_define || kids.empty?
1006
+
1007
+ report_error("'grammar' must have a 'start', 'include', or 'define' child", xpath: xpath)
1008
+ end
1009
+
1010
+ def validate_define_combine(local_name, node, xpath)
1011
+ return unless local_name == 'grammar'
1012
+
1013
+ defines = rng_ns_children(node).select { |c| c.name == 'define' }
1014
+ defines.group_by { |d| d['name'] }.each do |name, group|
1015
+ next unless group.length > 1
1016
+
1017
+ without_combine = group.count { |d| d['combine'].nil? || d['combine'].strip.empty? }
1018
+ if without_combine > 1
1019
+ report_error("Multiple 'define' elements with name '#{name}' without 'combine' attribute",
1020
+ xpath: xpath)
1021
+ end
1022
+ end
1023
+ end
1024
+
1025
+ def validate_combine_consistency(local_name, node, xpath)
1026
+ return unless local_name == 'grammar'
1027
+
1028
+ defines = rng_ns_children(node).select { |c| c.name == 'define' }
1029
+ defines.group_by { |d| d['name'] }.each do |name, group|
1030
+ next unless group.length > 1
1031
+
1032
+ vals = group.filter_map { |d| d['combine'] }.map(&:strip).reject(&:empty?).uniq
1033
+ if vals.length > 1
1034
+ report_error("Inconsistent 'combine' values for define '#{name}': #{vals.join(', ')}",
1035
+ xpath: xpath)
1036
+ end
1037
+ end
1038
+ end
1039
+
1040
+ def validate_attribute_name_class_overlap(local_name, node, xpath)
1041
+ return unless local_name == 'element'
1042
+
1043
+ ncs = collect_attribute_name_classes(rng_ns_children(node))
1044
+ ncs.each_with_index do |nc1, i|
1045
+ ncs.each_with_index do |nc2, j|
1046
+ next if j <= i
1047
+
1048
+ if name_classes_overlap?(nc1, nc2)
1049
+ report_error("'element' contains overlapping attribute name classes", xpath: xpath)
1050
+ return
1051
+ end
1052
+ end
1053
+ end
1054
+ end
1055
+
1056
+ def validate_nsname_except_rules(local_name, node, xpath)
1057
+ return unless local_name == 'except'
1058
+
1059
+ parent = node.parent
1060
+ return unless %w[anyName nsName].include?(parent&.name)
1061
+
1062
+ rng_ns_children(node).each do |child|
1063
+ targets = child.name == 'choice' ? rng_ns_children(child) : [child]
1064
+ targets.each do |target|
1065
+ if parent.name == 'nsName' && target.name == 'nsName' && ((parent['ns'] || '') == (target['ns'] || ''))
1066
+ report_error("'nsName/except' must not contain 'nsName' with the same namespace",
1067
+ xpath: xpath)
1068
+ end
1069
+ if parent.name == 'nsName' && target.name == 'anyName'
1070
+ report_error("'nsName/except' containing 'anyName' results in empty name class",
1071
+ xpath: xpath)
1072
+ end
1073
+ end
1074
+ end
1075
+ end
1076
+
1077
+ def validate_param_for_builtin_types(local_name, node, xpath)
1078
+ return unless local_name == 'data'
1079
+
1080
+ params = rng_ns_children(node).select { |c| c.name == 'param' }
1081
+ return if params.empty?
1082
+
1083
+ dtl = node['datatypeLibrary']
1084
+ type = node['type']
1085
+ return unless (dtl.nil? || dtl.empty?) && %w[string token].include?(type)
1086
+
1087
+ report_error("Built-in type '#{type}' does not support 'param' children", xpath: xpath)
1088
+ end
1089
+
1090
+ def validate_data_except_strict(node, xpath)
1091
+ return unless node.name == 'except'
1092
+ return unless node.parent&.name == 'data'
1093
+
1094
+ rng_ns_children(node).each do |child|
1095
+ report_error("'data/except' content must not contain 'oneOrMore'", xpath: xpath) if child.name == 'oneOrMore'
1096
+ end
1097
+ end
1098
+
1099
+ def validate_attribute_choice_content(local_name, node, xpath)
1100
+ return unless local_name == 'attribute'
1101
+
1102
+ rng_ns_children(node).each do |child|
1103
+ next unless child.name == 'choice'
1104
+
1105
+ rng_ns_children(child).each do |gc|
1106
+ if gc.name == 'element'
1107
+ report_error("'attribute/choice' content must not contain 'element'", xpath: xpath)
1108
+ elsif gc.name == 'attribute'
1109
+ report_error("'attribute/choice' content must not contain 'attribute'", xpath: xpath)
1110
+ end
1111
+ end
1112
+ end
1113
+ end
1114
+
1115
+ def validate_interleave_attribute_overlap(local_name, node, xpath)
1116
+ return unless local_name == 'interleave'
1117
+
1118
+ attr_names = rng_ns_children(node).select { |c| c.name == 'attribute' }.filter_map { |a| a['name'] }
1119
+ dups = attr_names.tally.select { |_, v| v > 1 }.keys
1120
+ return if dups.empty?
1121
+
1122
+ report_error("'interleave' must not contain overlapping attribute names: #{dups.join(', ')}",
1123
+ xpath: xpath)
1124
+ end
1125
+
1126
+ def validate_interleave_name_class_overlap(local_name, node, xpath)
1127
+ return unless local_name == 'interleave'
1128
+
1129
+ ncs = rng_ns_children(node).select { |c| c.name == 'element' }.filter_map { |e| get_element_name_class(e) }
1130
+ ncs.each_with_index do |nc1, i|
1131
+ ncs.each_with_index do |nc2, j|
1132
+ next if j <= i
1133
+
1134
+ if element_name_classes_overlap?(nc1, nc2)
1135
+ report_error("'interleave' must not contain overlapping element name classes", xpath: xpath)
1136
+ return
1137
+ end
1138
+ end
1139
+ end
1140
+ end
1141
+
1142
+ def validate_list_content_strict(local_name, node, xpath)
1143
+ return unless local_name == 'list'
1144
+
1145
+ rng_ns_children(node).each do |child|
1146
+ case child.name
1147
+ when 'interleave'
1148
+ report_error("'list' content must not contain 'interleave'", xpath: xpath)
1149
+ when 'text'
1150
+ report_error("'list' content must not contain 'text'", xpath: xpath)
1151
+ when 'choice'
1152
+ rng_ns_children(child).each do |gc|
1153
+ case gc.name
1154
+ when 'list'
1155
+ report_error("'list/choice' content must not contain nested 'list'", xpath: xpath)
1156
+ when 'element'
1157
+ report_error("'list/choice' content must not contain 'element'", xpath: xpath)
1158
+ when 'attribute'
1159
+ report_error("'list/choice' content must not contain 'attribute'", xpath: xpath)
1160
+ when 'text'
1161
+ report_error("'list/choice' content must not contain 'text'", xpath: xpath)
1162
+ when 'interleave'
1163
+ report_error("'list/choice' content must not contain 'interleave'", xpath: xpath)
1164
+ end
1165
+ end
1166
+ end
1167
+ end
1168
+ end
1169
+
1170
+ def validate_element_attribute_overlap(local_name, node, xpath)
1171
+ return unless local_name == 'element'
1172
+
1173
+ rng_ns_children(node).each do |child|
1174
+ next unless %w[oneOrMore zeroOrMore].include?(child.name)
1175
+
1176
+ rng_ns_children(child).each do |gc|
1177
+ next unless %w[group interleave].include?(gc.name)
1178
+
1179
+ attrs = rng_ns_children(gc).select { |c| c.name == 'attribute' }
1180
+ next unless attrs.length > 1
1181
+
1182
+ ncs = attrs.filter_map { |a| get_attribute_name_class(a) }
1183
+ ncs.each_with_index do |nc1, i|
1184
+ ncs.each_with_index do |nc2, j|
1185
+ next if j <= i
1186
+
1187
+ if name_classes_overlap?(nc1, nc2)
1188
+ report_error("Repeating #{gc.name} contains overlapping attribute name classes", xpath: xpath)
1189
+ return
1190
+ end
1191
+ end
1192
+ end
1193
+ end
1194
+ end
1195
+ end
1196
+
1197
+ def validate_grammar_root_element(local_name, node, xpath)
1198
+ return unless local_name == 'grammar'
1199
+ return unless node.parent&.document&.root == node
1200
+
1201
+ kids = rng_ns_children(node)
1202
+ return unless kids.any? { |c| c.name == 'element' } && kids.any? { |c| c.name == 'start' }
1203
+
1204
+ report_error("'grammar' must not have both 'element' and 'start' as direct children", xpath: xpath)
1205
+ end
1206
+
1207
+ # Grammar must have a start element (unless it has include which provides one)
1208
+ def validate_grammar_must_have_start(local_name, node, xpath)
1209
+ return unless local_name == 'grammar'
1210
+
1211
+ kids = rng_ns_children(node)
1212
+ has_start = kids.any? { |c| c.name == 'start' }
1213
+ has_include = kids.any? { |c| c.name == 'include' }
1214
+ # Grammar with no start and no include must have no children that need start
1215
+ # But grammar with only defines is invalid (no start reachable)
1216
+ return if has_start || has_include
1217
+
1218
+ # Check if grammar has any children at all (define/div only = no start)
1219
+ non_div_kids = kids.reject { |c| c.name == 'div' }
1220
+ if non_div_kids.any? && !non_div_kids.all? { |c| c.name == 'define' }
1221
+ # Has children but no start — could be grammar inside define
1222
+ # Top-level grammar with defines but no start is invalid
1223
+ report_error("'grammar' must have a 'start' child", xpath: xpath) if node.parent&.document&.root == node
1224
+ elsif non_div_kids.empty?
1225
+ # Empty grammar at top level
1226
+ report_error("'grammar' must have a 'start' child", xpath: xpath) if node.parent&.document&.root == node
1227
+ elsif node.parent&.document&.root == node
1228
+ # Grammar with only defines (no start, no include)
1229
+ report_error("'grammar' must have a 'start' child", xpath: xpath)
1230
+ end
1231
+ end
1232
+
1233
+ # Grammar nested inside define/choice/group must have a start
1234
+ def validate_grammar_nesting(local_name, node, xpath)
1235
+ return unless local_name == 'grammar'
1236
+
1237
+ kids = rng_ns_children(node)
1238
+ has_start = kids.any? { |c| c.name == 'start' }
1239
+ has_include = kids.any? { |c| c.name == 'include' }
1240
+ kids.any? { |c| c.name == 'define' }
1241
+ # Any grammar (including nested) must have start or include
1242
+ return if has_start || has_include
1243
+
1244
+ report_error("'grammar' must have a 'start' or 'include' child", xpath: xpath)
1245
+ end
1246
+
1247
+ # Every ref/parentRef must resolve to a define
1248
+ def validate_ref_resolution(local_name, node, xpath)
1249
+ return unless local_name == 'grammar'
1250
+ return unless node.parent&.document&.root == node
1251
+
1252
+ # Skip if grammar has includes — refs may come from included files
1253
+ kids = rng_ns_children(node)
1254
+ return if kids.any? { |c| c.name == 'include' }
1255
+
1256
+ # Collect all defines in this grammar (including nested grammars and divs)
1257
+ all_defines = collect_all_defines(node)
1258
+ # Collect all refs/parentRefs
1259
+ all_refs = collect_all_refs(node)
1260
+ # Check each ref has a matching define
1261
+ all_refs.each do |ref|
1262
+ name = ref[:name]
1263
+ unless all_defines.include?(name)
1264
+ report_error("'#{ref[:type]}' name '#{name}' has no matching 'define'",
1265
+ xpath: xpath)
1266
+ end
1267
+ end
1268
+ end
1269
+
1270
+ # Detect recursive refs (self-referencing defines)
1271
+ def validate_recursive_ref(local_name, node, xpath)
1272
+ return unless local_name == 'grammar'
1273
+ return unless node.parent&.document&.root == node
1274
+
1275
+ # Collect all defines and their ref dependencies
1276
+ defines = collect_define_dependencies(node)
1277
+ # Check for cycles using DFS
1278
+ defines.each_key do |name|
1279
+ report_error("'define' name '#{name}' is recursive", xpath: xpath) if has_cycle?(name, defines, [])
1280
+ end
1281
+ end
1282
+
1283
+ # xmlns in attribute name class choice
1284
+ def validate_xmlns_in_name_class_choice(local_name, node, xpath)
1285
+ return unless local_name == 'attribute'
1286
+
1287
+ # Check all name class children recursively for xmlns
1288
+ check_name_class_for_xmlns(node, xpath)
1289
+ end
1290
+
1291
+ # Built-in type validation (type must be valid)
1292
+ def validate_builtin_type(local_name, node, xpath)
1293
+ return unless %w[data value].include?(local_name)
1294
+
1295
+ type = node['type']
1296
+ return unless type
1297
+
1298
+ dtl = node['datatypeLibrary']
1299
+ # If no datatypeLibrary or empty, only built-in types allowed
1300
+ return unless dtl.nil? || dtl.empty?
1301
+ return if %w[string token].include?(type)
1302
+
1303
+ report_error("Unknown built-in type '#{type}'; only 'string' and 'token' are built-in types", xpath: xpath)
1304
+ end
1305
+
1306
+ # datatypeLibrary="" with non-built-in type
1307
+ def validate_datatype_library_empty(local_name, node, xpath)
1308
+ return unless %w[data value].include?(local_name)
1309
+
1310
+ dtl = node['datatypeLibrary']
1311
+ type = node['type']
1312
+ return unless dtl == ''
1313
+ return unless type && !%w[string token].include?(type)
1314
+
1315
+ report_error("datatypeLibrary must not be empty for non-built-in type '#{type}'", xpath: xpath)
1316
+ end
1317
+
1318
+ # Start element content restrictions
1319
+ # start must contain only element patterns (not attribute/data/text/value/list/empty)
1320
+ def validate_start_content(local_name, node, xpath)
1321
+ return unless local_name == 'start'
1322
+
1323
+ kids = rng_ns_children(node)
1324
+ first = kids.first
1325
+ return unless first
1326
+
1327
+ check_start_pattern(first, xpath)
1328
+ end
1329
+
1330
+ # Element name conflicts in start (group with duplicate element names)
1331
+ def validate_start_element_conflicts(local_name, node, xpath)
1332
+ return unless local_name == 'start'
1333
+
1334
+ kids = rng_ns_children(node)
1335
+ return if kids.empty?
1336
+
1337
+ check_start_element_overlap(kids.first, xpath)
1338
+ end
1339
+
1340
+ # Group must not have multiple text/data patterns
1341
+ def validate_group_text_data(local_name, node, xpath)
1342
+ return unless %w[group interleave].include?(local_name)
1343
+
1344
+ kids = rng_ns_children(node)
1345
+ data_count = kids.count { |c| %w[data value].include?(c.name) }
1346
+ text_count = kids.count { |c| c.name == 'text' }
1347
+ return unless data_count + text_count > 1
1348
+
1349
+ report_error("'#{local_name}' must not contain multiple data/value/text patterns", xpath: xpath)
1350
+ end
1351
+
1352
+ # data/except must not contain empty
1353
+ def validate_data_except_content_types(local_name, node, xpath)
1354
+ return unless local_name == 'except'
1355
+ return unless node.parent&.name == 'data'
1356
+
1357
+ rng_ns_children(node).each do |child|
1358
+ report_error("'data/except' must not contain 'empty'", xpath: xpath) if child.name == 'empty'
1359
+ end
1360
+ end
1361
+
1362
+ # Attribute with infinite name class (anyName without except, or nsName ns="")
1363
+ def validate_infinite_attribute_name_class(local_name, node, xpath)
1364
+ return unless local_name == 'attribute'
1365
+
1366
+ nc = get_attribute_name_class(node)
1367
+ if nc
1368
+ # anyName without except is infinite
1369
+ if nc[:type] == :anyName && !nc[:except]
1370
+ report_error("'attribute' with 'anyName' (no except) matches infinite names",
1371
+ xpath: xpath)
1372
+ end
1373
+ # nsName with ns="" is equivalent to anyName
1374
+ if nc[:type] == :nsName && (nc[:ns] == '' || nc[:ns].nil?)
1375
+ report_error("'attribute' with 'nsName' ns='' matches all names",
1376
+ xpath: xpath)
1377
+ end
1378
+ end
1379
+ # Check choice name classes (even when nc is nil, e.g. when name class is a choice)
1380
+ rng_ns_children(node).each do |child|
1381
+ next unless child.name == 'choice'
1382
+
1383
+ check_choice_for_infinite_attr_name_class(child, xpath)
1384
+ end
1385
+ end
1386
+
1387
+ def collect_all_defines(grammar_node)
1388
+ names = Set.new
1389
+ rng_ns_children(grammar_node).each do |child|
1390
+ case child.name
1391
+ when 'define'
1392
+ n = child['name']
1393
+ names << n if n && !n.strip.empty?
1394
+ when 'div'
1395
+ names.merge(collect_all_defines(child))
1396
+ when 'include'
1397
+ rng_ns_children(child).each do |inc_child|
1398
+ if inc_child.name == 'define'
1399
+ n = inc_child['name']
1400
+ names << n if n && !n.strip.empty?
1401
+ end
1402
+ end
1403
+ end
1404
+ end
1405
+ names
1406
+ end
1407
+
1408
+ def collect_all_refs(node)
1409
+ refs = []
1410
+ return refs unless node.is_a?(Nokogiri::XML::Element)
1411
+
1412
+ ns = node.namespace&.href
1413
+ local = node.name
1414
+ if ns == RNG_NS || (ns.nil? && !local.empty?)
1415
+ case local
1416
+ when 'ref'
1417
+ refs << { name: node['name'], type: 'ref' } if node['name']
1418
+ when 'parentRef'
1419
+ refs << { name: node['name'], type: 'parentRef' } if node['name']
1420
+ end
1421
+ node.element_children.each do |child|
1422
+ refs.concat(collect_all_refs(child))
1423
+ end
1424
+ end
1425
+ refs
1426
+ end
1427
+
1428
+ def collect_define_dependencies(grammar_node)
1429
+ deps = {}
1430
+ rng_ns_children(grammar_node).each do |child|
1431
+ case child.name
1432
+ when 'define'
1433
+ name = child['name']
1434
+ next unless name
1435
+
1436
+ refs = collect_direct_refs(child)
1437
+ deps[name] = refs
1438
+ when 'div'
1439
+ deps.merge!(collect_define_dependencies(child))
1440
+ end
1441
+ end
1442
+ deps
1443
+ end
1444
+
1445
+ def collect_direct_refs(node)
1446
+ refs = []
1447
+ return refs unless node.is_a?(Nokogiri::XML::Element)
1448
+
1449
+ ns = node.namespace&.href
1450
+ local = node.name
1451
+ if ns == RNG_NS || (ns.nil? && !local.empty?)
1452
+ case local
1453
+ when 'ref'
1454
+ refs << node['name'] if node['name']
1455
+ when 'parentRef'
1456
+ refs << node['name'] if node['name']
1457
+ end
1458
+ node.element_children.each do |child|
1459
+ next if child.name == 'define' # Don't recurse into nested grammar's defines
1460
+
1461
+ refs.concat(collect_direct_refs(child))
1462
+ end
1463
+ end
1464
+ refs
1465
+ end
1466
+
1467
+ def has_cycle?(name, deps, visited)
1468
+ return false unless deps.key?(name)
1469
+ return true if visited.include?(name)
1470
+
1471
+ visited << name
1472
+ deps[name]&.each do |dep|
1473
+ return true if has_cycle?(dep, deps, visited)
1474
+ end
1475
+ visited.delete(name)
1476
+ false
1477
+ end
1478
+
1479
+ def check_name_class_for_xmlns(node, xpath)
1480
+ rng_ns_children(node).each do |child|
1481
+ case child.name
1482
+ when 'name'
1483
+ if child.text.strip == 'xmlns'
1484
+ ns = child['ns'] || ''
1485
+ report_error("Attribute name class contains 'xmlns'", xpath: xpath) if ns == ''
1486
+ end
1487
+ when 'choice'
1488
+ rng_ns_children(child).each do |gc|
1489
+ case gc.name
1490
+ when 'name'
1491
+ if gc.text.strip == 'xmlns'
1492
+ ns = gc['ns'] || ''
1493
+ report_error("Attribute name class contains 'xmlns'", xpath: xpath) if ns == ''
1494
+ end
1495
+ end
1496
+ end
1497
+ when 'anyName'
1498
+ # anyName in attribute context - handled by infinite name class rule
1499
+ when 'nsName'
1500
+ # Check except
1501
+ exc = rng_ns_children(child).find { |c| c.name == 'except' }
1502
+ check_name_class_for_xmlns(exc, xpath) if exc
1503
+ end
1504
+ end
1505
+ end
1506
+
1507
+ def check_start_pattern(pattern, xpath)
1508
+ case pattern.name
1509
+ when 'attribute'
1510
+ report_error("'start' must not contain 'attribute'", xpath: xpath)
1511
+ when 'data', 'value'
1512
+ report_error("'start' must not contain '#{pattern.name}'", xpath: xpath)
1513
+ when 'text'
1514
+ report_error("'start' must not contain 'text'", xpath: xpath)
1515
+ when 'list'
1516
+ report_error("'start' must not contain 'list'", xpath: xpath)
1517
+ when 'empty'
1518
+ report_error("'start' must not contain 'empty'", xpath: xpath)
1519
+ when 'group', 'choice', 'interleave', 'optional', 'zeroOrMore', 'oneOrMore', 'mixed'
1520
+ rng_ns_children(pattern).each { |c| check_start_pattern(c, xpath) }
1521
+ end
1522
+ end
1523
+
1524
+ def check_start_element_overlap(pattern, xpath)
1525
+ case pattern.name
1526
+ when 'group'
1527
+ names = collect_element_names_non_choice(rng_ns_children(pattern))
1528
+ dups = names.tally.select { |_, v| v > 1 }.keys
1529
+ unless dups.empty?
1530
+ report_error("'start' group must not have overlapping element names: #{dups.join(', ')}",
1531
+ xpath: xpath)
1532
+ end
1533
+ when 'choice'
1534
+ # Check each branch independently
1535
+ rng_ns_children(pattern).each { |c| check_start_element_overlap(c, xpath) }
1536
+ when 'oneOrMore'
1537
+ # oneOrMore of element means it can match multiple times
1538
+ rng_ns_children(pattern).each do |c|
1539
+ check_start_element_overlap(c, xpath)
1540
+ # oneOrMore itself creates a duplicate with sibling patterns
1541
+ report_error("'start' oneOrMore of element allows multiple matches", xpath: xpath) if c.name == 'element'
1542
+ end
1543
+ when 'group', 'interleave'
1544
+ check_start_element_overlap_group(pattern, rng_ns_children(pattern), xpath)
1545
+ end
1546
+ end
1547
+
1548
+ def check_start_element_overlap_group(_parent, children, xpath)
1549
+ names = collect_element_names_non_choice(children)
1550
+ dups = names.tally.select { |_, v| v > 1 }.keys
1551
+ return if dups.empty?
1552
+
1553
+ report_error("'start' must not have overlapping element names: #{dups.join(', ')}", xpath: xpath)
1554
+ end
1555
+
1556
+ # oneOrMore/zeroOrMore containing group/interleave with multiple attributes
1557
+ def validate_oneOrMore_attribute_overlap(local_name, node, xpath)
1558
+ return unless %w[oneOrMore zeroOrMore].include?(local_name)
1559
+
1560
+ rng_ns_children(node).each do |child|
1561
+ next unless %w[group interleave].include?(child.name)
1562
+
1563
+ attrs = rng_ns_children(child).select { |c| c.name == 'attribute' }
1564
+ next unless attrs.length >= 2
1565
+
1566
+ report_error("'#{local_name}' with '#{child.name}' containing multiple attributes creates name overlap",
1567
+ xpath: xpath)
1568
+ end
1569
+ end
1570
+
1571
+ # oneOrMore/zeroOrMore of attribute with infinite name class
1572
+ def validate_oneOrMore_infinite_attribute_name(local_name, node, xpath)
1573
+ return unless %w[oneOrMore zeroOrMore].include?(local_name)
1574
+
1575
+ rng_ns_children(node).each do |child|
1576
+ next unless child.name == 'attribute'
1577
+
1578
+ nc = get_attribute_name_class(child)
1579
+ next unless nc
1580
+
1581
+ if (nc[:type] == :anyName) && !nc[:except]
1582
+ # anyName attribute in oneOrMore = infinite match
1583
+ # But if there's an except, it might be OK
1584
+ report_error("'#{local_name}' of 'attribute' with 'anyName' creates infinite attribute matches", xpath: xpath)
1585
+ end
1586
+ # Check for choice name classes too
1587
+ rng_ns_children(child).each do |gc|
1588
+ next unless gc.name == 'choice'
1589
+
1590
+ check_choice_for_infinite_attr_in_repeat(gc, xpath)
1591
+ end
1592
+ end
1593
+ end
1594
+
1595
+ def check_choice_for_infinite_attr_in_repeat(choice_node, xpath)
1596
+ rng_ns_children(choice_node).each do |gc|
1597
+ case gc.name
1598
+ when 'anyName'
1599
+ report_error("Repeating attribute with choice containing 'anyName' creates infinite matches", xpath: xpath)
1600
+ when 'nsName'
1601
+ ns = gc['ns'] || ''
1602
+ if ns == ''
1603
+ report_error("Repeating attribute with choice containing 'nsName' ns='' creates infinite matches",
1604
+ xpath: xpath)
1605
+ end
1606
+ end
1607
+ end
1608
+ end
1609
+
1610
+ def check_choice_for_infinite_attr_name_class(choice_node, xpath)
1611
+ rng_ns_children(choice_node).each do |gc|
1612
+ case gc.name
1613
+ when 'anyName'
1614
+ report_error("'attribute' choice contains 'anyName' (infinite name class)", xpath: xpath)
1615
+ when 'nsName'
1616
+ ns = gc['ns'] || ''
1617
+ report_error("'attribute' choice contains 'nsName' ns='' (infinite name class)", xpath: xpath) if ns == ''
1618
+ end
1619
+ end
1620
+ end
1621
+ end
1622
+ end