rng 0.1.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/release.yml +8 -3
  4. data/.gitignore +11 -0
  5. data/.rubocop.yml +10 -7
  6. data/.rubocop_todo.yml +229 -23
  7. data/CHANGELOG.md +317 -0
  8. data/CLAUDE.md +139 -0
  9. data/Gemfile +11 -12
  10. data/README.adoc +1538 -11
  11. data/Rakefile +11 -3
  12. data/docs/Gemfile +8 -0
  13. data/docs/_config.yml +23 -0
  14. data/docs/getting-started/index.adoc +75 -0
  15. data/docs/guides/error-handling.adoc +137 -0
  16. data/docs/guides/external-references.adoc +128 -0
  17. data/docs/guides/index.adoc +24 -0
  18. data/docs/guides/parsing-rnc.adoc +141 -0
  19. data/docs/guides/parsing-rng-xml.adoc +81 -0
  20. data/docs/guides/rng-to-rnc.adoc +101 -0
  21. data/docs/guides/validation.adoc +85 -0
  22. data/docs/index.adoc +52 -0
  23. data/docs/reference/api.adoc +126 -0
  24. data/docs/reference/cli.adoc +182 -0
  25. data/docs/understanding/architecture.adoc +58 -0
  26. data/docs/understanding/rng-vs-rnc.adoc +118 -0
  27. data/exe/rng +5 -0
  28. data/lib/rng/any_name.rb +10 -8
  29. data/lib/rng/attribute.rb +28 -26
  30. data/lib/rng/choice.rb +24 -24
  31. data/lib/rng/cli.rb +607 -0
  32. data/lib/rng/data.rb +10 -10
  33. data/lib/rng/datatype_declaration.rb +26 -0
  34. data/lib/rng/define.rb +44 -41
  35. data/lib/rng/div.rb +36 -0
  36. data/lib/rng/documentation.rb +9 -0
  37. data/lib/rng/element.rb +39 -37
  38. data/lib/rng/empty.rb +7 -7
  39. data/lib/rng/except.rb +25 -25
  40. data/lib/rng/external_ref.rb +8 -8
  41. data/lib/rng/external_ref_resolver.rb +602 -0
  42. data/lib/rng/foreign_attribute.rb +26 -0
  43. data/lib/rng/foreign_element.rb +33 -0
  44. data/lib/rng/grammar.rb +14 -12
  45. data/lib/rng/group.rb +26 -24
  46. data/lib/rng/include.rb +5 -6
  47. data/lib/rng/include_processor.rb +461 -0
  48. data/lib/rng/interleave.rb +23 -23
  49. data/lib/rng/list.rb +22 -22
  50. data/lib/rng/mixed.rb +23 -23
  51. data/lib/rng/name.rb +6 -7
  52. data/lib/rng/namespace_declaration.rb +47 -0
  53. data/lib/rng/namespaces.rb +15 -0
  54. data/lib/rng/not_allowed.rb +7 -7
  55. data/lib/rng/ns_name.rb +9 -9
  56. data/lib/rng/one_or_more.rb +23 -23
  57. data/lib/rng/optional.rb +23 -23
  58. data/lib/rng/param.rb +7 -8
  59. data/lib/rng/parent_ref.rb +8 -8
  60. data/lib/rng/parse_tree_processor.rb +695 -0
  61. data/lib/rng/pattern.rb +7 -7
  62. data/lib/rng/ref.rb +8 -8
  63. data/lib/rng/rnc_builder.rb +927 -0
  64. data/lib/rng/rnc_parser.rb +605 -305
  65. data/lib/rng/rnc_to_rng_converter.rb +1408 -0
  66. data/lib/rng/schema_preamble.rb +73 -0
  67. data/lib/rng/schema_validator.rb +1622 -0
  68. data/lib/rng/start.rb +27 -25
  69. data/lib/rng/test_suite_parser.rb +168 -0
  70. data/lib/rng/text.rb +11 -8
  71. data/lib/rng/to_rnc.rb +4 -35
  72. data/lib/rng/value.rb +6 -7
  73. data/lib/rng/version.rb +1 -1
  74. data/lib/rng/zero_or_more.rb +23 -23
  75. data/lib/rng.rb +68 -17
  76. data/rng.gemspec +18 -19
  77. data/scripts/extract_spectest_resources.rb +96 -0
  78. data/spec/fixtures/compacttest.xml +2511 -0
  79. data/spec/fixtures/external/circular_a.rng +7 -0
  80. data/spec/fixtures/external/circular_b.rng +7 -0
  81. data/spec/fixtures/external/circular_main.rng +7 -0
  82. data/spec/fixtures/external/external_ref_lib.rng +7 -0
  83. data/spec/fixtures/external/external_ref_main.rng +7 -0
  84. data/spec/fixtures/external/include_lib.rng +7 -0
  85. data/spec/fixtures/external/include_main.rng +3 -0
  86. data/spec/fixtures/external/nested_chain.rng +6 -0
  87. data/spec/fixtures/external/nested_leaf.rng +7 -0
  88. data/spec/fixtures/external/nested_mid.rng +8 -0
  89. data/spec/fixtures/metanorma/3gpp.rnc +35 -0
  90. data/spec/fixtures/metanorma/3gpp.rng +105 -0
  91. data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
  92. data/spec/fixtures/metanorma/bipm.rnc +148 -0
  93. data/spec/fixtures/metanorma/bipm.rng +376 -0
  94. data/spec/fixtures/metanorma/bsi.rnc +104 -0
  95. data/spec/fixtures/metanorma/bsi.rng +332 -0
  96. data/spec/fixtures/metanorma/csa.rnc +45 -0
  97. data/spec/fixtures/metanorma/csa.rng +131 -0
  98. data/spec/fixtures/metanorma/csd.rnc +43 -0
  99. data/spec/fixtures/metanorma/csd.rng +132 -0
  100. data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
  101. data/spec/fixtures/metanorma/gbstandard.rng +316 -0
  102. data/spec/fixtures/metanorma/iec.rnc +49 -0
  103. data/spec/fixtures/metanorma/iec.rng +193 -0
  104. data/spec/fixtures/metanorma/ietf.rnc +275 -0
  105. data/spec/fixtures/metanorma/ietf.rng +925 -0
  106. data/spec/fixtures/metanorma/iho.rnc +58 -0
  107. data/spec/fixtures/metanorma/iho.rng +179 -0
  108. data/spec/fixtures/metanorma/isodoc.rnc +873 -0
  109. data/spec/fixtures/metanorma/isodoc.rng +2704 -0
  110. data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
  111. data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
  112. data/spec/fixtures/metanorma/isostandard.rnc +166 -0
  113. data/spec/fixtures/metanorma/isostandard.rng +494 -0
  114. data/spec/fixtures/metanorma/itu.rnc +122 -0
  115. data/spec/fixtures/metanorma/itu.rng +377 -0
  116. data/spec/fixtures/metanorma/m3d.rnc +41 -0
  117. data/spec/fixtures/metanorma/m3d.rng +122 -0
  118. data/spec/fixtures/metanorma/mpfd.rnc +36 -0
  119. data/spec/fixtures/metanorma/mpfd.rng +95 -0
  120. data/spec/fixtures/metanorma/nist.rnc +77 -0
  121. data/spec/fixtures/metanorma/nist.rng +216 -0
  122. data/spec/fixtures/metanorma/ogc.rnc +51 -0
  123. data/spec/fixtures/metanorma/ogc.rng +151 -0
  124. data/spec/fixtures/metanorma/reqt.rnc +6 -0
  125. data/spec/fixtures/metanorma/rsd.rnc +36 -0
  126. data/spec/fixtures/metanorma/rsd.rng +95 -0
  127. data/spec/fixtures/metanorma/un.rnc +103 -0
  128. data/spec/fixtures/metanorma/un.rng +367 -0
  129. data/spec/fixtures/rnc/base.rnc +4 -0
  130. data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
  131. data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
  132. data/spec/fixtures/rnc/main_with_include.rnc +5 -0
  133. data/spec/fixtures/rnc/test_augment.rnc +10 -0
  134. data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
  135. data/spec/fixtures/rnc/top_level_include.rnc +8 -0
  136. data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
  137. data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
  138. data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
  139. data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
  140. data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
  141. data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
  142. data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
  143. data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
  144. data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
  145. data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
  146. data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
  147. data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
  148. data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
  149. data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
  150. data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
  151. data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
  152. data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
  153. data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
  154. data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
  155. data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
  156. data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
  157. data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
  158. data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
  159. data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
  160. data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
  161. data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
  162. data/spec/fixtures/spectest_external/resources.json +149 -0
  163. data/spec/rng/advanced_rnc_spec.rb +101 -0
  164. data/spec/rng/compacttest_spec.rb +197 -0
  165. data/spec/rng/datatype_declaration_spec.rb +28 -0
  166. data/spec/rng/div_spec.rb +207 -0
  167. data/spec/rng/external_ref_resolver_spec.rb +122 -0
  168. data/spec/rng/metanorma_conversion_spec.rb +159 -0
  169. data/spec/rng/namespace_declaration_spec.rb +60 -0
  170. data/spec/rng/namespace_support_spec.rb +199 -0
  171. data/spec/rng/rnc_parser_spec.rb +498 -22
  172. data/spec/rng/rnc_roundtrip_spec.rb +96 -82
  173. data/spec/rng/rng_generation_spec.rb +288 -0
  174. data/spec/rng/roundtrip_spec.rb +342 -0
  175. data/spec/rng/schema_preamble_spec.rb +145 -0
  176. data/spec/rng/schema_spec.rb +68 -64
  177. data/spec/rng/spectest_spec.rb +168 -90
  178. data/spec/rng_spec.rb +2 -2
  179. data/spec/spec_helper.rb +7 -42
  180. metadata +141 -8
@@ -0,0 +1,1408 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'set'
5
+
6
+ module Rng
7
+ # RncToRngConverter converts RNC parse trees to RNG XML format.
8
+ #
9
+ # This class takes the parse tree output from the Parslet RNC parser and
10
+ # converts it to RNG XML using Nokogiri's XML builder. The resulting XML
11
+ # can then be deserialized into Grammar objects using Lutaml::Model.
12
+ #
13
+ # @example Convert a parse tree to RNG XML
14
+ # tree = parser.parse(rnc_content)
15
+ # converter = Rng::RncToRngConverter.new
16
+ # rng_xml = converter.convert(tree)
17
+ # grammar = Rng::Grammar.from_xml(rng_xml)
18
+ class RncToRngConverter
19
+ RNG_NAMESPACE = 'http://relaxng.org/ns/structure/1.0'
20
+
21
+ # Convert a parse tree to RNG XML
22
+ #
23
+ # @param tree [Hash] The parse tree from RncParser
24
+ # @return [String] RNG XML string
25
+ def convert(tree)
26
+ # Track defined names for augmentation support
27
+ defined_names = Set.new
28
+
29
+ # Check if we need the annotations namespace
30
+ @has_documentation = has_documentation_comments?(tree)
31
+
32
+ # Collect prefixed namespace declarations from preamble
33
+ @namespace_prefixes = {}
34
+ collect_namespace_prefixes(tree[:preamble_items])
35
+
36
+ # Validate that element notations in preamble annotations are only used with element/attribute patterns
37
+ validate_preamble_element_notation_usage(tree[:preamble_items], tree[:definitions])
38
+
39
+ builder = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
40
+ grammar_attrs = { xmlns: 'http://relaxng.org/ns/structure/1.0' }
41
+ if @has_documentation
42
+ grammar_attrs[:'xmlns:a'] =
43
+ 'http://relaxng.org/ns/compatibility/annotations/1.0'
44
+ end
45
+
46
+ xml.grammar(grammar_attrs) do
47
+ # Add namespace if present
48
+ if tree[:namespace]
49
+ xml.parent[:ns] =
50
+ process_string_literal(tree[:namespace][:namespace_uri])
51
+ end
52
+
53
+ # Add datatype library if present
54
+ if tree[:datatype_library]
55
+ xml.parent[:datatypeLibrary] =
56
+ process_string_literal(tree[:datatype_library][:uri])
57
+ elsif tree[:datatype_map] && !tree[:datatype_map].empty?
58
+ # From ParseTreeProcessor - datatype_map is {prefix => uri}
59
+ # Use the first (typically only) datatype library
60
+ xml.parent[:datatypeLibrary] = tree[:datatype_map].values.first
61
+ end
62
+
63
+ # If no explicit start but we have top-level elements, wrap them in start
64
+ has_explicit_start = tree[:start] && tree[:start][:start_pattern]
65
+ has_top_elements = tree[:definitions]&.any? do |d|
66
+ d.key?(:top_element)
67
+ end
68
+ has_top_choice = tree[:definitions]&.any? do |d|
69
+ d.key?(:top_choice)
70
+ end
71
+
72
+ if has_explicit_start
73
+ # Process explicit start pattern
74
+ xml.start do
75
+ add_documentation(xml, tree[:start]) if tree[:start][:docs]
76
+ process_pattern_list(xml, tree[:start][:start_pattern])
77
+ end
78
+ elsif has_top_choice
79
+ # No explicit start, but has top-level choice - wrap in start with choice
80
+ first_choice = tree[:definitions].find { |d| d.key?(:top_choice) }
81
+ xml.start do
82
+ xml.choice do
83
+ process_content_item(xml, first_choice[:top_choice][:first])
84
+ first_choice[:top_choice][:choice_items]&.each do |item|
85
+ process_content_item(xml, item)
86
+ end
87
+ end
88
+ end
89
+ elsif has_top_elements
90
+ # No explicit start, but has top-level elements - wrap first one in start
91
+ xml.start do
92
+ first_element = tree[:definitions].find do |d|
93
+ d.key?(:top_element)
94
+ end
95
+ process_content_item(xml, first_element[:top_element])
96
+ end
97
+ end
98
+
99
+ # Process named patterns and remaining top-level elements
100
+ tree[:definitions]&.each_with_index do |def_item, idx|
101
+ if def_item.key?(:href)
102
+ # Include directive
103
+ href = process_string_literal(def_item[:href])
104
+
105
+ if def_item[:override]
106
+ # Include with override block - override is properly scoped
107
+ override = def_item[:override]
108
+
109
+ # Check if override has any content
110
+ has_content = (override[:start] && override[:start][:start_pattern]) ||
111
+ (override[:patterns] && !override[:patterns].empty?)
112
+
113
+ if has_content
114
+ xml.include(href: href) do
115
+ # Process override start if present
116
+ if override[:start] && override[:start][:start_pattern]
117
+ xml.start do
118
+ process_pattern_list(xml,
119
+ override[:start][:start_pattern])
120
+ end
121
+ end
122
+
123
+ # Process override patterns (named patterns, div blocks, and top-level elements)
124
+ override[:patterns]&.each do |pattern_item|
125
+ if pattern_item.key?(:name)
126
+ # Named pattern in override
127
+ name = process_identifier(pattern_item[:name])
128
+ operator = pattern_item[:operator] ? extract_string(pattern_item[:operator]) : '='
129
+
130
+ if operator == '='
131
+ xml.define(name: name) do
132
+ if pattern_item[:docs]
133
+ add_documentation(xml,
134
+ pattern_item)
135
+ end
136
+ process_pattern_list(xml, pattern_item[:pattern])
137
+ end
138
+ else
139
+ combine_type = operator == '|=' ? 'choice' : 'interleave'
140
+ xml.define(name: name, combine: combine_type) do
141
+ if pattern_item[:docs]
142
+ add_documentation(xml,
143
+ pattern_item)
144
+ end
145
+ process_pattern_list(xml, pattern_item[:pattern])
146
+ end
147
+ end
148
+ elsif pattern_item.key?(:top_element)
149
+ # Top-level element in override
150
+ process_content_item(xml, pattern_item[:top_element])
151
+ elsif pattern_item.key?(:div)
152
+ # Div block in override
153
+ process_div_block(xml, pattern_item[:div])
154
+ end
155
+ end
156
+ end
157
+ else
158
+ # Empty override block
159
+ xml.include(href: href)
160
+ end
161
+ else
162
+ # Include without override block
163
+ xml.include(href: href)
164
+ end
165
+ elsif def_item.key?(:name)
166
+ # Named pattern - handle augmentation operators
167
+ name = process_identifier(def_item[:name])
168
+ operator = def_item[:operator] ? extract_string(def_item[:operator]) : '='
169
+
170
+ if operator == '=' || !defined_names.include?(name)
171
+ # First definition or normal definition
172
+ xml.define(name: name) do
173
+ add_documentation(xml, def_item) if def_item[:docs]
174
+ process_pattern_list(xml, def_item[:pattern])
175
+ end
176
+ defined_names.add(name)
177
+ else
178
+ # Augmentation - use combine attribute
179
+ combine_type = operator == '|=' ? 'choice' : 'interleave'
180
+ xml.define(name: name, combine: combine_type) do
181
+ add_documentation(xml, def_item) if def_item[:docs]
182
+ process_pattern_list(xml, def_item[:pattern])
183
+ end
184
+ end
185
+ elsif def_item.key?(:top_element) && has_explicit_start
186
+ # Top-level element (only add if we already have explicit start)
187
+ process_content_item(xml, def_item[:top_element])
188
+ elsif def_item.key?(:top_element) && !has_explicit_start && idx.positive?
189
+ # Additional top-level elements after the first (which is in start)
190
+ process_content_item(xml, def_item[:top_element])
191
+ elsif def_item.key?(:top_choice) && !has_explicit_start
192
+ # Top-level choice already handled in start generation above, skip
193
+ elsif def_item.key?(:div)
194
+ # Div block for documentation and grouping
195
+ process_div_block(xml, def_item[:div])
196
+ elsif def_item.key?(:standalone)
197
+ # Standalone pattern (bare wildcard, etc.)
198
+ process_standalone_pattern(xml, def_item[:standalone])
199
+ elsif def_item.key?(:foreign_name)
200
+ # Foreign element annotation - emit as foreign element in RNG XML
201
+ process_foreign_element(xml, def_item)
202
+ end
203
+ end
204
+ end
205
+ end
206
+
207
+ builder.to_xml
208
+ end
209
+
210
+ private
211
+
212
+ # Check if tree contains any documentation comments
213
+ def has_documentation_comments?(tree)
214
+ return false unless tree.is_a?(Hash)
215
+
216
+ tree.each do |key, value|
217
+ return true if %i[documentation docs].include?(key) && value
218
+
219
+ if value.is_a?(Hash)
220
+ return true if has_documentation_comments?(value)
221
+ elsif value.is_a?(Array)
222
+ value.each do |item|
223
+ return true if item.is_a?(Hash) && has_documentation_comments?(item)
224
+ end
225
+ end
226
+ end
227
+
228
+ false
229
+ end
230
+
231
+ # Extract documentation text from doc_comments structure
232
+ def extract_documentation(docs)
233
+ return nil unless docs && docs[:documentation]
234
+
235
+ doc_lines = docs[:documentation]
236
+ doc_lines = [doc_lines] unless doc_lines.is_a?(Array)
237
+
238
+ # Join all doc lines, stripping the leading space if present
239
+ doc_lines.filter_map do |line|
240
+ text = line[:doc_line]
241
+ text = extract_string(text) if text
242
+ # Strip leading space from ## comment
243
+ text = text.sub(/^\s+/, '') if text
244
+ text
245
+ end.join("\n")
246
+ end
247
+
248
+ # Add documentation element if present
249
+ def add_documentation(xml, item)
250
+ return unless item[:docs]
251
+
252
+ doc_text = extract_documentation(item[:docs])
253
+ return unless doc_text && !doc_text.empty?
254
+
255
+ xml.send(:'a:documentation', doc_text)
256
+ end
257
+
258
+ # Add annotations (foreign attributes and elements) if present
259
+ def add_annotations(xml, item, processor = nil)
260
+ return unless item[:annotations]
261
+
262
+ # Use provided processor or create one
263
+ proc = processor || ParseTreeProcessor.new({})
264
+
265
+ # Extract annotations using processor
266
+ annotations = proc.extract_annotations(item)
267
+
268
+ # Add foreign attributes to parent element
269
+ annotations[:attributes].each do |attr|
270
+ attr_name = if attr[:namespace]
271
+ "#{attr[:namespace]}:#{attr[:name]}"
272
+ else
273
+ attr[:name]
274
+ end
275
+ xml.parent[attr_name] = attr[:value]
276
+ end
277
+
278
+ # Add foreign elements as children
279
+ annotations[:elements].each do |elem|
280
+ elem_name = if elem[:namespace]
281
+ "#{elem[:namespace]}:#{elem[:name]}"
282
+ else
283
+ elem[:name]
284
+ end
285
+
286
+ # Create foreign element with proper namespace handling
287
+ xml.send(elem_name) do
288
+ # Add nested attributes if present
289
+ elem[:attributes].each do |nested_attr|
290
+ nested_name = if nested_attr[:namespace]
291
+ "#{nested_attr[:namespace]}:#{nested_attr[:name]}"
292
+ else
293
+ nested_attr[:name]
294
+ end
295
+ xml.parent[nested_name] = nested_attr[:value]
296
+ end
297
+
298
+ # Add text content if present
299
+ xml.text(elem[:content]) if elem[:content] && !elem[:content].empty?
300
+
301
+ # Add nested elements recursively
302
+ elem[:elements].each do |nested_elem|
303
+ # Recursively handle nested elements
304
+ # TODO: Implement full nesting if needed
305
+ end
306
+ end
307
+ end
308
+ end
309
+
310
+ # Extract clean string from Parslet::Slice or other objects
311
+ #
312
+ # @param obj [Object] The object to extract string from
313
+ # @return [String] Clean string without position markers
314
+ def extract_string(obj)
315
+ if obj.respond_to?(:str)
316
+ # Parslet::Slice - use .str to get clean string
317
+ obj.str
318
+ elsif obj.is_a?(String)
319
+ obj
320
+ else
321
+ obj.to_s
322
+ end
323
+ end
324
+
325
+ # Map of character escapes to actual characters
326
+ CHAR_ESCAPE_MAP = {
327
+ '"' => '"',
328
+ '\\' => '\\',
329
+ 'n' => "\n",
330
+ 'r' => "\r",
331
+ 't' => "\t",
332
+ # RELAX NG character class escapes - preserve backslash
333
+ 'i' => '\\i',
334
+ 'c' => '\\c',
335
+ 'd' => '\\d',
336
+ 'w' => '\\w'
337
+ }.freeze
338
+
339
+ # Validate Unicode code point
340
+ #
341
+ # @param code_point [Integer] The Unicode code point to validate
342
+ # @param context [Symbol] Context where the character is used (:identifier or :string)
343
+ # @return [Integer] The validated code point
344
+ # @raise [ArgumentError] If the code point is invalid (surrogate or out of range)
345
+ def validate_unicode_codepoint(code_point, context = :string)
346
+ # Check for surrogate pairs (0xD800-0xDFFF)
347
+ if code_point.between?(0xD800, 0xDFFF)
348
+ raise ArgumentError,
349
+ "Invalid Unicode: surrogate code point U+#{code_point.to_s(16).upcase} is not allowed"
350
+ end
351
+
352
+ # Check for out-of-range (> 0x10FFFF)
353
+ if code_point > 0x10FFFF
354
+ raise ArgumentError,
355
+ "Invalid Unicode: code point U+#{code_point.to_s(16).upcase} exceeds maximum (U+10FFFF)"
356
+ end
357
+
358
+ # Check for whitespace in identifiers
359
+ if context == :identifier
360
+ char = [code_point].pack('U')
361
+ if char.match?(/\s/)
362
+ raise ArgumentError,
363
+ "Invalid identifier: whitespace character U+#{code_point.to_s(16).upcase} is not allowed in identifiers"
364
+ end
365
+ end
366
+
367
+ code_point
368
+ end
369
+
370
+ # Process escape sequences in parsed content
371
+ #
372
+ # @param parts [Array, Hash, String] The parts to process
373
+ # @param context [Symbol] Context where the text is used (:identifier or :string)
374
+ # @return [String] The processed string
375
+ def process_escape_sequences(parts, context = :string)
376
+ return '' unless parts
377
+ return parts if parts.is_a?(String)
378
+
379
+ parts = [parts] unless parts.is_a?(Array)
380
+
381
+ parts.map do |part|
382
+ case part
383
+ when Hash
384
+ if part[:hex_escape]
385
+ # Convert hex to Unicode character with validation
386
+ hex = extract_string(part[:hex_escape][:hex])
387
+ code_point = hex.to_i(16)
388
+
389
+ # DEBUG output
390
+ puts "DEBUG: Processing hex escape: #{hex} -> code_point: 0x#{code_point.to_s(16).upcase}" if ENV['RNG_DEBUG']
391
+
392
+ validate_unicode_codepoint(code_point, context)
393
+ [code_point].pack('U')
394
+ elsif part[:char_escape]
395
+ # Map character escape
396
+ char = extract_string(part[:char_escape][:char])
397
+ CHAR_ESCAPE_MAP[char] || char
398
+ elsif part[:backslash_escape]
399
+ # Backslash escape in identifier: \x -> x
400
+ if part[:backslash_escape][:escaped_backslash]
401
+ '\\'
402
+ elsif part[:backslash_escape][:escaped_char]
403
+ extract_string(part[:backslash_escape][:escaped_char])
404
+ elsif part[:backslash_escape][:escaped_keyword]
405
+ extract_string(part[:backslash_escape][:escaped_keyword])
406
+ else
407
+ extract_string(part[:backslash_escape])
408
+ end
409
+ elsif part[:char]
410
+ # Regular character from identifier/string
411
+ extract_string(part[:char])
412
+ else
413
+ # Fallback - extract as string
414
+ extract_string(part)
415
+ end
416
+ else
417
+ extract_string(part)
418
+ end
419
+ end.join
420
+ end
421
+
422
+ # Extract multi-line triple-quoted string content (no escape processing)
423
+ def extract_multi_line_parts(parts)
424
+ return '' unless parts
425
+ return extract_string(parts) unless parts.is_a?(Array)
426
+
427
+ parts.map { |p| extract_string(p) }.join
428
+ end
429
+
430
+ # Process identifier with potential escape sequences
431
+ def process_identifier(id_node)
432
+ return extract_string(id_node) unless id_node.is_a?(Hash)
433
+
434
+ if id_node[:identifier_parts]
435
+ process_escape_sequences(id_node[:identifier_parts], :identifier)
436
+ elsif id_node[:identifier]
437
+ extract_string(id_node[:identifier])
438
+ else
439
+ extract_string(id_node)
440
+ end
441
+ end
442
+
443
+ # Collect prefixed namespace declarations from preamble items
444
+ def collect_namespace_prefixes(preamble_items)
445
+ return unless preamble_items
446
+
447
+ # Handle single Parslet::Slice (when there's only one preamble item)
448
+ items = preamble_items.is_a?(Parslet::Slice) ? [preamble_items] : preamble_items
449
+
450
+ # Track seen annotation attributes for duplicate detection (across all notations)
451
+ seen_ann_attrs = {}
452
+
453
+ items.each do |item|
454
+ # Skip non-Hash items (e.g., Parslet::Slice from annotation content)
455
+ next unless item.is_a?(Hash)
456
+
457
+ if item[:prefixed_ns]
458
+ prefix_info = item[:prefixed_ns][:prefix]
459
+ prefix = process_identifier(prefix_info)
460
+ uri_info = item[:prefixed_ns][:uri]
461
+ uri = process_string_literal(uri_info)
462
+
463
+ # TC 13: xmlns prefix is reserved
464
+ raise StandardError, "namespace prefix 'xmlns' is reserved" if prefix == 'xmlns'
465
+
466
+ # TC 14: xmlns URI cannot be used as a namespace URI
467
+ raise StandardError, "namespace URI 'http://www.w3.org/2000/xmlns' is reserved" if uri == 'http://www.w3.org/2000/xmlns'
468
+
469
+ # TC 15: xml prefix must map to the XML namespace URI
470
+ if prefix == 'xml' && uri != 'http://www.w3.org/XML/1998/namespace'
471
+ raise StandardError,
472
+ "namespace prefix 'xml' must be bound to 'http://www.w3.org/XML/1998/namespace'"
473
+ end
474
+
475
+ # TC 16: XML namespace URI must use xml prefix
476
+ if uri == 'http://www.w3.org/XML/1998/namespace' && prefix != 'xml'
477
+ raise StandardError,
478
+ "namespace URI 'http://www.w3.org/XML/1998/namespace' must use prefix 'xml'"
479
+ end
480
+
481
+ @namespace_prefixes[prefix] = uri
482
+ elsif item[:ann] && item[:ann][:ann_items]
483
+ # Validate annotations in notations (wrapped in :ann hash)
484
+ validate_preamble_annotations(item[:ann][:ann_items], seen_ann_attrs)
485
+ elsif item[:ann_items]
486
+ # Direct ann_items (backward compatibility)
487
+ validate_preamble_annotations(item[:ann_items], seen_ann_attrs)
488
+ end
489
+ end
490
+ end
491
+
492
+ # Validate annotations in preamble notations (TC 11, 12, 18, 70, 71)
493
+ def validate_preamble_annotations(ann_items, seen_ann_attrs)
494
+ return unless ann_items
495
+
496
+ items = ann_items.is_a?(Array) ? ann_items : [ann_items]
497
+
498
+ items.each do |ann|
499
+ next unless ann.is_a?(Hash)
500
+
501
+ if ann[:ann_attr]
502
+ validate_annotation_attribute(ann[:ann_attr], seen_ann_attrs)
503
+ elsif ann[:ann_elem]
504
+ # TC 71: RNG namespace elements in annotations are forbidden
505
+ validate_annotation_element(ann[:ann_elem])
506
+ end
507
+ end
508
+ end
509
+
510
+ # Validate that element notations in preamble annotations are only used with element/attribute patterns
511
+ # TC 80, 81: Element notations like x[] in annotations should only annotate element/attribute patterns
512
+ def validate_preamble_element_notation_usage(preamble_items, patterns)
513
+ return unless preamble_items && patterns
514
+
515
+ # Check if preamble contains element notations
516
+ preamble_items_array = preamble_items.is_a?(Array) ? preamble_items : [preamble_items]
517
+ has_element_notation = false
518
+
519
+ preamble_items_array.each do |item|
520
+ next unless item.is_a?(Hash)
521
+
522
+ if item[:ann] && item[:ann][:ann_items]
523
+ ann_items = item[:ann][:ann_items]
524
+ ann_items_array = ann_items.is_a?(Array) ? ann_items : [ann_items]
525
+ ann_items_array.each do |ann|
526
+ next unless ann.is_a?(Hash)
527
+
528
+ if ann[:ann_elem]
529
+ has_element_notation = true
530
+ break
531
+ end
532
+ end
533
+ end
534
+ break if has_element_notation
535
+ end
536
+
537
+ return unless has_element_notation
538
+
539
+ # Check if first pattern is element or attribute definition
540
+ first_pattern = patterns.first
541
+ return unless first_pattern
542
+
543
+ is_element_or_attribute = first_pattern.key?(:top_element) ||
544
+ first_pattern.key?(:top_choice) ||
545
+ first_pattern.key?(:attribute_def)
546
+
547
+ return if is_element_or_attribute
548
+
549
+ raise StandardError, 'element notation in annotation must be used with element or attribute pattern'
550
+ end
551
+
552
+ # Validate a single annotation attribute in preamble
553
+ def validate_annotation_attribute(ann_attr, seen_ann_attrs)
554
+ name_node = ann_attr[:ann_name]
555
+ ann_attr[:attr_value]
556
+
557
+ # Extract prefix and local name
558
+ prefix = name_node[:prefix] ? process_identifier(name_node[:prefix]) : nil
559
+ local = process_identifier(name_node)
560
+
561
+ # TC 11: Check for duplicate annotation attributes (same prefix:local)
562
+ attr_key = prefix ? "#{prefix}:#{local}" : local
563
+ raise StandardError, "duplicate annotation attribute '#{attr_key}'" if seen_ann_attrs.key?(attr_key)
564
+
565
+ # TC 12: Check for duplicate even with different prefixes that map to same URI
566
+ if prefix
567
+ prefix_uri = @namespace_prefixes[prefix]
568
+ if prefix_uri
569
+ # Check if any previously seen prefix maps to the same URI with same local name
570
+ seen_ann_attrs.each do |key, info|
571
+ next unless info[:uri] == prefix_uri && info[:local] == local
572
+
573
+ raise StandardError, "duplicate annotation attribute '#{attr_key}' (same as '#{key}')"
574
+ end
575
+ end
576
+ end
577
+
578
+ seen_ann_attrs[attr_key] = { uri: prefix_uri, local: local }
579
+
580
+ # TC 18: xmlns attribute is forbidden in annotations
581
+ raise StandardError, 'xmlns attribute is not allowed in annotations' if local == 'xmlns' && prefix.nil?
582
+
583
+ # TC 70: RNG namespace attributes forbidden
584
+ return unless prefix && @namespace_prefixes[prefix] == RNG_NAMESPACE
585
+
586
+ raise StandardError, 'attributes in the RELAX NG namespace are not allowed'
587
+ end
588
+
589
+ # Validate a single annotation element in preamble
590
+ def validate_annotation_element(ann_elem)
591
+ name_node = ann_elem[:elem_name]
592
+
593
+ # Extract prefix and local name
594
+ prefix = name_node[:prefix] ? process_identifier(name_node[:prefix]) : nil
595
+ process_identifier(name_node)
596
+
597
+ # TC 71: RNG namespace elements forbidden
598
+ return unless prefix && @namespace_prefixes[prefix] == RNG_NAMESPACE
599
+
600
+ raise StandardError, 'elements in the RELAX NG namespace are not allowed'
601
+ end
602
+
603
+ # Resolve a namespace prefix to a URI
604
+ # Returns the prefix itself if not found (for backward compatibility)
605
+ def resolve_namespace_prefix(prefix)
606
+ return prefix unless prefix
607
+
608
+ @namespace_prefixes.fetch(prefix, prefix)
609
+ end
610
+
611
+ # Process string literal with optional concatenation
612
+ #
613
+ # @param str_node [Hash] String node from parse tree (can have :concatenations)
614
+ # @return [String] Concatenated string value
615
+ def process_string_literal(str_node)
616
+ return '' unless str_node
617
+ return str_node if str_node.is_a?(String)
618
+
619
+ # Process base string with potential escapes
620
+ base_str = if str_node[:multi_line_parts]
621
+ # Multi-line triple-quoted string: no escape processing
622
+ extract_multi_line_parts(str_node[:multi_line_parts])
623
+ elsif str_node[:string_parts]
624
+ process_escape_sequences(str_node[:string_parts])
625
+ elsif str_node[:string]
626
+ extract_string(str_node[:string])
627
+ else
628
+ ''
629
+ end
630
+
631
+ # Handle concatenation
632
+ return base_str unless str_node[:concatenations]
633
+
634
+ parts = [base_str]
635
+ concatenations = str_node[:concatenations]
636
+ concatenations = [concatenations] unless concatenations.is_a?(Array)
637
+
638
+ concatenations.each do |concat|
639
+ next unless concat
640
+ next unless concat.is_a?(Hash) # FIX: Validate concat is a Hash
641
+
642
+ if concat[:concat_multi_line_parts]
643
+ parts << extract_multi_line_parts(concat[:concat_multi_line_parts])
644
+ elsif concat[:concat_string_parts]
645
+ parts << process_escape_sequences(concat[:concat_string_parts])
646
+ elsif concat[:concat_string]
647
+ parts << extract_string(concat[:concat_string])
648
+ end
649
+ end
650
+
651
+ parts.join
652
+ end
653
+
654
+ # Process a pattern which can be a single item or pattern_list structure
655
+ def process_pattern_list(xml, pattern)
656
+ # Handle new pattern_list structure with :first, :choice_items, :sequence_items
657
+ if pattern.is_a?(Hash) && pattern.key?(:first)
658
+ first_item = pattern[:first]
659
+
660
+ if pattern[:interleave_items] && !pattern[:interleave_items].empty?
661
+ # Interleave pattern - generate <interleave>
662
+ xml.interleave do
663
+ process_content_item(xml, first_item)
664
+ pattern[:interleave_items].each do |item|
665
+ process_content_item(xml, item)
666
+ end
667
+ end
668
+ elsif pattern[:choice_items] && !pattern[:choice_items].empty?
669
+ # Choice pattern - generate <choice>
670
+ xml.choice do
671
+ process_content_item(xml, first_item)
672
+ pattern[:choice_items].each do |item|
673
+ process_content_item(xml, item)
674
+ end
675
+ end
676
+ elsif pattern[:sequence_items] && !pattern[:sequence_items].empty?
677
+ # Sequence pattern - generate <group> if multiple items
678
+ items = [first_item] + pattern[:sequence_items]
679
+ if items.length == 1
680
+ process_content_item(xml, items[0])
681
+ else
682
+ xml.group do
683
+ items.each { |item| process_content_item(xml, item) }
684
+ end
685
+ end
686
+ else
687
+ # Single item
688
+ process_content_item(xml, first_item)
689
+ end
690
+ elsif pattern.is_a?(Array)
691
+ # Legacy array format (for backward compatibility)
692
+ if pattern.length == 1
693
+ process_content_item(xml, pattern[0])
694
+ else
695
+ xml.group do
696
+ pattern.each { |item| process_content_item(xml, item) }
697
+ end
698
+ end
699
+ else
700
+ # Single item (direct hash)
701
+ process_content_item(xml, pattern)
702
+ end
703
+ end
704
+
705
+ def process_standalone_pattern(xml, item)
706
+ if item.key?(:bare_any_name)
707
+ # Bare anyName wildcard: *
708
+ any_name_info = item[:bare_any_name]
709
+ except_clause = any_name_info[:any_name_except]
710
+ xml.element do
711
+ xml.anyName do
712
+ if except_clause
713
+ xml.except_ do
714
+ process_name_except(xml, except_clause, parent_type: :any_name)
715
+ end
716
+ end
717
+ end
718
+ end
719
+ else
720
+ # Fallback to content item processing
721
+ process_content_item(xml, item)
722
+ end
723
+ end
724
+
725
+ def process_content_item(xml, item)
726
+ if item.key?(:type) && item.key?(:name)
727
+ # Attribute definition (has both :type and :name keys)
728
+ attrs = {}
729
+
730
+ # Handle name wildcards or regular qualified names
731
+ name_obj = item[:name]
732
+
733
+ # Unwrap the extra :name level from name_class.as(:name)
734
+ name_obj = name_obj[:name] if name_obj.is_a?(Hash) && name_obj.key?(:name)
735
+
736
+ # Skip if name_obj is nil (shouldn't happen but be defensive)
737
+ return if name_obj.nil?
738
+
739
+ if name_obj.key?(:any_name)
740
+ # anyName wildcard
741
+ attr_name_type = :any_name
742
+ except_clause = name_obj[:any_name][:except] if name_obj[:any_name].is_a?(Hash)
743
+ elsif name_obj.key?(:ns_name)
744
+ # nsName wildcard
745
+ attr_name_type = :ns_name
746
+ ns_prefix = process_identifier(name_obj[:ns_name][:prefix])
747
+ except_clause = name_obj[:ns_name][:except] if name_obj[:ns_name].is_a?(Hash)
748
+ elsif name_obj.key?(:local_name)
749
+ # Regular qualified name
750
+ attr_name_type = :qualified
751
+ attrs[:name] = process_identifier(name_obj[:local_name])
752
+ if name_obj[:prefix]
753
+ attrs[:ns] =
754
+ resolve_namespace_prefix(process_identifier(name_obj[:prefix]))
755
+ end
756
+ elsif name_obj.key?(:name_choice)
757
+ # Name choice - generate choice of attributes
758
+ attr_name_type = :name_choice
759
+ name_choice = name_obj[:name_choice]
760
+ # Collect all names in the choice
761
+ all_names = [name_choice[:local_name]]
762
+ name_choice[:name_choice_items]&.each do |nc_item|
763
+ all_names << nc_item[:local_name] if nc_item[:local_name]
764
+ end
765
+ else
766
+ # Fallback - treat as regular name
767
+ attr_name_type = :qualified
768
+ # name_obj might be the identifier directly
769
+ attrs[:name] =
770
+ name_obj[:identifier] ? extract_string(name_obj[:identifier]) : name_obj.to_s
771
+ end
772
+
773
+ # Check for occurrence marker
774
+ occurrence = item[:occurrence]
775
+
776
+ attribute_block = lambda do |xml_ctx|
777
+ case attr_name_type
778
+ when :any_name
779
+ # Generate <attribute><anyName> with optional <except>
780
+ xml_ctx.attribute do
781
+ add_documentation(xml_ctx, item) if item[:docs]
782
+ xml_ctx.anyName do
783
+ if except_clause
784
+ xml_ctx.except_ do
785
+ process_name_except(xml_ctx, except_clause, parent_type: :any_name)
786
+ end
787
+ end
788
+ end
789
+ process_attribute_type(xml_ctx, item[:type])
790
+ end
791
+ when :ns_name
792
+ # Generate <attribute><nsName> with ns attribute and optional <except>
793
+ xml_ctx.attribute do
794
+ add_documentation(xml_ctx, item) if item[:docs]
795
+ xml_ctx.nsName(ns: ns_prefix) do
796
+ if except_clause
797
+ xml_ctx.except_ do
798
+ process_name_except(xml_ctx, except_clause, parent_type: :ns_name)
799
+ end
800
+ end
801
+ end
802
+ process_attribute_type(xml_ctx, item[:type])
803
+ end
804
+ when :name_choice
805
+ # Generate <attribute><choice><name>...<name>...</choice>...</attribute>
806
+ add_documentation(xml_ctx, item) if item[:docs]
807
+ xml_ctx.attribute do
808
+ xml_ctx.choice do
809
+ all_names.each do |name_info|
810
+ xml_ctx.name(process_identifier(name_info))
811
+ end
812
+ end
813
+ process_attribute_type(xml_ctx, item[:type])
814
+ end
815
+ else
816
+ # Regular named attribute
817
+ xml_ctx.attribute(attrs) do
818
+ add_documentation(xml_ctx, item) if item[:docs]
819
+ process_attribute_type(xml_ctx, item[:type])
820
+ end
821
+ end
822
+ end
823
+
824
+ if occurrence
825
+ # Wrap in occurrence element
826
+ occurrence_tag = case occurrence.to_s
827
+ when '*' then 'zeroOrMore'
828
+ when '+' then 'oneOrMore'
829
+ when '?' then 'optional'
830
+ end
831
+ xml.send(occurrence_tag) do
832
+ attribute_block.call(xml)
833
+ end
834
+ else
835
+ attribute_block.call(xml)
836
+ end
837
+ elsif item.key?(:name)
838
+ # Element definition
839
+ attrs = {}
840
+
841
+ # Handle name wildcards or regular qualified names
842
+ name_obj = item[:name]
843
+
844
+ # Unwrap the extra :name level from name_class.as(:name)
845
+ name_obj = name_obj[:name] if name_obj.is_a?(Hash) && name_obj.key?(:name)
846
+
847
+ # Unwrap name_choice from name_class.as(:name_choice)
848
+ name_obj = name_obj[:name_choice] if name_obj.is_a?(Hash) && name_obj.key?(:name_choice)
849
+
850
+ if name_obj.key?(:any_name)
851
+ # anyName wildcard - no name attribute needed, will be handled separately
852
+ element_name_type = :any_name
853
+ except_clause = name_obj[:any_name][:except] if name_obj[:any_name].is_a?(Hash)
854
+ elsif name_obj.key?(:ns_name)
855
+ # nsName wildcard
856
+ element_name_type = :ns_name
857
+ ns_prefix = process_identifier(name_obj[:ns_name][:prefix])
858
+ except_clause = name_obj[:ns_name][:except] if name_obj[:ns_name].is_a?(Hash)
859
+ elsif name_obj.key?(:local_name)
860
+ # Check if this is a choice between multiple names (e.g., name1|name2|name3)
861
+ if name_obj.key?(:name_choice_items) && name_obj[:name_choice_items].is_a?(Array) && !name_obj[:name_choice_items].empty?
862
+ # Choice between multiple names
863
+ element_name_type = :name_choice
864
+ choice_names = [name_obj[:local_name]] + name_obj[:name_choice_items].map { |n| n[:local_name] || n }
865
+ else
866
+ # Regular qualified name
867
+ element_name_type = :qualified
868
+ attrs[:name] = process_identifier(name_obj[:local_name])
869
+ if name_obj[:prefix]
870
+ attrs[:ns] =
871
+ resolve_namespace_prefix(process_identifier(name_obj[:prefix]))
872
+ end
873
+ end
874
+ else
875
+ # Fallback - treat as regular name
876
+ element_name_type = :qualified
877
+ # name_obj might be the identifier directly
878
+ attrs[:name] =
879
+ name_obj[:identifier] ? extract_string(name_obj[:identifier]) : name_obj.to_s
880
+ end
881
+
882
+ # Determine if we need occurrence wrapper
883
+ occurrence = item[:occurrence]
884
+
885
+ element_block = lambda do |xml_ctx|
886
+ case element_name_type
887
+ when :any_name
888
+ # Generate <anyName> with optional <except>
889
+ xml_ctx.element do
890
+ add_documentation(xml_ctx, item) if item[:docs]
891
+ xml_ctx.anyName do
892
+ if except_clause
893
+ xml_ctx.except_ do
894
+ process_name_except(xml_ctx, except_clause, parent_type: :any_name)
895
+ end
896
+ end
897
+ end
898
+ process_element_content(xml_ctx, item[:content]) if item[:content]
899
+ end
900
+ when :ns_name
901
+ # Generate <nsName> with ns attribute and optional <except>
902
+ xml_ctx.element do
903
+ add_documentation(xml_ctx, item) if item[:docs]
904
+ xml_ctx.nsName(ns: ns_prefix) do
905
+ if except_clause
906
+ xml_ctx.except_ do
907
+ process_name_except(xml_ctx, except_clause, parent_type: :ns_name)
908
+ end
909
+ end
910
+ end
911
+ process_element_content(xml_ctx, item[:content]) if item[:content]
912
+ end
913
+ when :name_choice
914
+ # Element with choice of names (e.g., element foo|bar { ... })
915
+ xml_ctx.element do
916
+ add_documentation(xml_ctx, item) if item[:docs]
917
+ xml_ctx.choice do
918
+ choice_names.each do |name_part|
919
+ name_str = if name_part.is_a?(Hash) && name_part[:identifier_parts]
920
+ process_identifier(name_part)
921
+ elsif name_part.is_a?(Hash) && name_part[:local_name]
922
+ process_identifier(name_part[:local_name])
923
+ elsif name_part.is_a?(Hash) && name_part[:prefix]
924
+ process_identifier(name_part)
925
+ else
926
+ name_part.to_s
927
+ end
928
+ xml_ctx.name(name_str)
929
+ end
930
+ end
931
+ process_element_content(xml_ctx, item[:content]) if item[:content]
932
+ end
933
+ else
934
+ # Regular named element
935
+ xml_ctx.element(attrs) do
936
+ add_documentation(xml_ctx, item) if item[:docs]
937
+ process_element_content(xml_ctx, item[:content]) if item[:content]
938
+ end
939
+ end
940
+ end
941
+
942
+ if occurrence
943
+ # Wrap in occurrence element
944
+ occurrence_tag = case occurrence.to_s
945
+ when '*' then 'zeroOrMore'
946
+ when '+' then 'oneOrMore'
947
+ when '?' then 'optional'
948
+ end
949
+
950
+ xml.send(occurrence_tag) do
951
+ element_block.call(xml)
952
+ end
953
+ else
954
+ element_block.call(xml)
955
+ end
956
+ elsif item.key?(:text)
957
+ xml.parent << Nokogiri::XML::Node.new('text', xml.doc)
958
+ elsif item.key?(:empty)
959
+ xml.parent << Nokogiri::XML::Node.new('empty', xml.doc)
960
+ elsif item.key?(:not_allowed)
961
+ xml.parent << Nokogiri::XML::Node.new('notAllowed', xml.doc)
962
+ elsif item.key?(:list_content)
963
+ xml.list do
964
+ process_list_content(xml, item[:list_content])
965
+ end
966
+ elsif item.key?(:parent_pattern)
967
+ xml.parentRef(name: process_identifier(item[:parent_pattern]))
968
+ elsif item.key?(:external_href)
969
+ xml.externalRef(href: process_string_literal(item[:external_href]))
970
+ elsif item.key?(:group)
971
+ occurrence = item[:occurrence]
972
+
973
+ if occurrence
974
+ occurrence_tag = case occurrence.to_s
975
+ when '*' then 'zeroOrMore'
976
+ when '+' then 'oneOrMore'
977
+ when '?' then 'optional'
978
+ end
979
+
980
+ xml.send(occurrence_tag) do
981
+ xml.group do
982
+ process_element_content(xml, item[:group])
983
+ end
984
+ end
985
+ else
986
+ xml.group do
987
+ process_element_content(xml, item[:group])
988
+ end
989
+ end
990
+ elsif item.key?(:mixed_content)
991
+ # Mixed content pattern
992
+ xml.mixed do
993
+ process_element_content(xml, item[:mixed_content])
994
+ end
995
+ elsif item.key?(:ref)
996
+ # Reference to a named pattern
997
+ ref_name = process_identifier(item[:ref])
998
+ raise StandardError, "subtraction operator '-' cannot be used as a pattern" if ref_name == '-'
999
+
1000
+ xml.ref(name: ref_name)
1001
+ elsif item.key?(:prefix) && item.key?(:type)
1002
+ # Datatype reference (e.g., xsd:string { maxLength = "100" })
1003
+ data_attrs = {
1004
+ type: process_identifier(item[:type]),
1005
+ datatypeLibrary: 'http://www.w3.org/2001/XMLSchema-datatypes'
1006
+ }
1007
+ if item[:params]
1008
+ xml.data(data_attrs) do
1009
+ params = item[:params].is_a?(Array) ? item[:params] : [item[:params]]
1010
+ params.each do |param|
1011
+ param_name = process_identifier(param[:param_name])
1012
+ param_value = process_string_literal(param[:param_value])
1013
+ xml.param(param_value, name: param_name)
1014
+ end
1015
+ end
1016
+ else
1017
+ xml.data(data_attrs)
1018
+ end
1019
+ elsif item.key?(:value)
1020
+ # Value literal (string) in element content
1021
+ xml.value(process_string_literal(item[:value]))
1022
+ elsif item.key?(:grammar_block)
1023
+ # Inline grammar block
1024
+ grammar_data = item[:grammar_block]
1025
+ inner = grammar_data[:inner_grammar] || grammar_data
1026
+ xml.grammar(xmlns: 'http://relaxng.org/ns/structure/1.0') do
1027
+ # Process start
1028
+ if inner[:start]
1029
+ xml.start do
1030
+ start_pattern = inner[:start]
1031
+ if start_pattern.is_a?(Hash) && start_pattern.key?(:start_pattern)
1032
+ process_pattern_list(xml,
1033
+ start_pattern[:start_pattern])
1034
+ end
1035
+ end
1036
+ end
1037
+ # Process patterns/definitions
1038
+ patterns = inner[:definitions] || inner[:patterns] || []
1039
+ patterns.each do |pattern|
1040
+ if pattern.is_a?(Hash) && pattern.key?(:name) && pattern.key?(:pattern)
1041
+ # Named pattern (define)
1042
+ define_name = process_identifier(pattern[:name])
1043
+ xml.define(name: define_name) do
1044
+ process_pattern_list(xml, pattern[:pattern])
1045
+ end
1046
+ else
1047
+ process_content_item(xml, pattern)
1048
+ end
1049
+ end
1050
+ end
1051
+ end
1052
+ end
1053
+
1054
+ # Process element content which may have choice_items and sequence_items
1055
+ def process_element_content(xml, content)
1056
+ return unless content
1057
+
1058
+ # Handle new structure: {:first, :choice_items, :sequence_items}
1059
+ if content.is_a?(Hash) && content.key?(:first)
1060
+ first_item = content[:first]
1061
+
1062
+ if content[:interleave_items] && !content[:interleave_items].empty?
1063
+ # This is an interleave
1064
+ xml.interleave do
1065
+ process_content_item(xml, first_item)
1066
+ content[:interleave_items].each do |item|
1067
+ process_content_item(xml, item)
1068
+ end
1069
+ end
1070
+ elsif content[:choice_items] && !content[:choice_items].empty?
1071
+ # This is a choice
1072
+ xml.choice do
1073
+ process_content_item(xml, first_item)
1074
+ content[:choice_items].each do |choice|
1075
+ process_content_item(xml, choice)
1076
+ end
1077
+ # After choice items, if we have sequence_items, process them too
1078
+ first_item[:sequence_items]&.each do |seq|
1079
+ process_content_item(xml, seq)
1080
+ end
1081
+ end
1082
+ elsif content[:sequence_items] && !content[:sequence_items].empty?
1083
+ # This is a sequence - process all items
1084
+ process_content_item(xml, first_item)
1085
+ content[:sequence_items].each { |seq| process_content_item(xml, seq) }
1086
+ elsif first_item.is_a?(Array)
1087
+ # Multiple items in first position - process as sequence
1088
+ first_item.each { |item| process_content_item(xml, item) }
1089
+ else
1090
+ # Single item
1091
+ process_content_item(xml, first_item)
1092
+ end
1093
+ return
1094
+ end
1095
+
1096
+ # Legacy handling: Content might be an array directly or a hash with items
1097
+ items = if content.is_a?(Array)
1098
+ content
1099
+ elsif content.is_a?(Hash)
1100
+ [content]
1101
+ else
1102
+ return
1103
+ end
1104
+
1105
+ # Check if we have choice_items (| separated)
1106
+ first_item = items[0]
1107
+ if first_item.is_a?(Hash) && first_item.key?(:choice_items) && !first_item[:choice_items].empty?
1108
+ # This is a choice
1109
+ xml.choice do
1110
+ process_content_item(xml, first_item)
1111
+ first_item[:choice_items].each do |choice|
1112
+ process_content_item(xml, choice)
1113
+ end
1114
+ # After choice items, if we have sequence_items, process them too
1115
+ first_item[:sequence_items]&.each do |seq|
1116
+ process_content_item(xml, seq)
1117
+ end
1118
+ end
1119
+ return
1120
+ end
1121
+
1122
+ # This is a sequence items handling
1123
+ if first_item.is_a?(Hash) && first_item.key?(:sequence_items) && !first_item[:sequence_items].empty?
1124
+ # This is a sequence - process all items
1125
+ process_content_item(xml, first_item)
1126
+ first_item[:sequence_items].each do |seq|
1127
+ process_content_item(xml, seq)
1128
+ end
1129
+ return
1130
+ end
1131
+
1132
+ # Regular sequence - just process items
1133
+ if items.length == 1
1134
+ process_content_item(xml, items[0])
1135
+ else
1136
+ items.each { |item| process_content_item(xml, item) }
1137
+ end
1138
+ end
1139
+
1140
+ # Process list content - handles datatypes and text with occurrence markers
1141
+ def process_list_content(xml, list_content)
1142
+ return unless list_content
1143
+
1144
+ # Handle new structure: {:first, :sequence_items}
1145
+ if list_content.is_a?(Hash) && list_content.key?(:first)
1146
+ first_item = list_content[:first]
1147
+ items = [first_item]
1148
+ items += list_content[:sequence_items] if list_content[:sequence_items] && !list_content[:sequence_items].empty?
1149
+
1150
+ items.each do |item|
1151
+ process_list_item(xml, item)
1152
+ end
1153
+ else
1154
+ # Single item
1155
+ process_list_item(xml, list_content)
1156
+ end
1157
+ end
1158
+
1159
+ # Process a single list content item (datatype or text with optional occurrence)
1160
+ def process_list_item(xml, item)
1161
+ occurrence = item[:occurrence]&.to_s
1162
+
1163
+ item_block = lambda do |xml_ctx|
1164
+ if item.key?(:text)
1165
+ xml_ctx.parent << Nokogiri::XML::Node.new('text', xml_ctx.doc)
1166
+ elsif item.key?(:prefix)
1167
+ # Datatype reference
1168
+ data_attrs = {
1169
+ type: process_identifier(item[:type]),
1170
+ datatypeLibrary: 'http://www.w3.org/2001/XMLSchema-datatypes'
1171
+ }
1172
+ xml_ctx.data(data_attrs)
1173
+ elsif item.key?(:ref)
1174
+ # Reference to named pattern
1175
+ ref_name = process_identifier(item[:ref])
1176
+ raise StandardError, "subtraction operator '-' cannot be used as a pattern" if ref_name == '-'
1177
+
1178
+ xml_ctx.ref(name: ref_name)
1179
+ end
1180
+ end
1181
+
1182
+ if occurrence
1183
+ occurrence_tag = case occurrence
1184
+ when '*' then 'zeroOrMore'
1185
+ when '+' then 'oneOrMore'
1186
+ when '?' then 'optional'
1187
+ end
1188
+ xml.send(occurrence_tag) do
1189
+ item_block.call(xml)
1190
+ end
1191
+ else
1192
+ item_block.call(xml)
1193
+ end
1194
+ end
1195
+
1196
+ # Process name except clause for wildcards
1197
+ def process_name_except(xml, except_clause, parent_type: nil)
1198
+ # Validate name class subtraction rules
1199
+ validate_name_except(except_clause, parent_type) if parent_type
1200
+
1201
+ # except_clause can be a single qualified_name, ns_name, or multiple names
1202
+ if except_clause.is_a?(Hash) && except_clause.key?(:local_name)
1203
+ # Single name
1204
+ xml.name(process_identifier(except_clause[:local_name]))
1205
+ elsif except_clause.is_a?(Hash) && except_clause.key?(:ns_name)
1206
+ # nsName - namespace-qualified wildcard
1207
+ ns_name_info = except_clause[:ns_name]
1208
+ ns_prefix = process_identifier(ns_name_info[:prefix])
1209
+ xml.nsName(ns: ns_prefix)
1210
+ elsif except_clause.is_a?(Hash) && except_clause.key?(:any_name)
1211
+ # anyName - unprefixed wildcard
1212
+ xml.anyName
1213
+ elsif except_clause.is_a?(Array)
1214
+ # Multiple names - wrap in choice
1215
+ except_clause.each do |name|
1216
+ if name.is_a?(Hash) && name.key?(:local_name)
1217
+ xml.name(process_identifier(name[:local_name]))
1218
+ elsif name.is_a?(Hash) && name.key?(:ns_name)
1219
+ ns_prefix = process_identifier(name[:ns_name][:prefix])
1220
+ xml.nsName(ns: ns_prefix)
1221
+ elsif name.is_a?(Hash) && name.key?(:any_name)
1222
+ xml.anyName
1223
+ end
1224
+ end
1225
+ end
1226
+ end
1227
+
1228
+ def validate_name_except(except_clause, parent_type)
1229
+ items = except_clause.is_a?(Array) ? except_clause : [except_clause]
1230
+ items.each do |item|
1231
+ next unless item.is_a?(Hash)
1232
+
1233
+ if parent_type == :any_name
1234
+ # anyName except must not contain anyName
1235
+ raise StandardError, 'anyName except must not contain anyName' if item.key?(:any_name)
1236
+ elsif parent_type == :ns_name
1237
+ # nsName except must contain only name elements
1238
+ raise StandardError, 'nsName except must not contain anyName' if item.key?(:any_name)
1239
+ raise StandardError, 'nsName except must not contain nsName' if item.key?(:ns_name)
1240
+ end
1241
+ end
1242
+ end
1243
+
1244
+ # Process attribute type content (factored out for wildcard support)
1245
+ def process_attribute_type(xml, type_info)
1246
+ if type_info == 'text' || (type_info.is_a?(Hash) && type_info.key?(:text_type))
1247
+ xml.parent << Nokogiri::XML::Node.new('text', xml.doc)
1248
+ elsif type_info.is_a?(Hash) && type_info.key?(:value_choice)
1249
+ # Choice of value literals
1250
+ xml.choice do
1251
+ # First value (before the choice operator)
1252
+ xml.value(process_string_literal(type_info[:value]))
1253
+ # Remaining values (after | operators)
1254
+ type_info[:value_choice].each do |val|
1255
+ xml.value(process_string_literal(val[:value]))
1256
+ end
1257
+ end
1258
+ elsif type_info.is_a?(Hash) && type_info.key?(:value)
1259
+ # Single value literal
1260
+ xml.value(process_string_literal(type_info[:value]))
1261
+ elsif type_info.is_a?(Hash) && type_info.key?(:prefix)
1262
+ # Datatype reference
1263
+ data_attrs = {
1264
+ type: process_identifier(type_info[:type]),
1265
+ datatypeLibrary: 'http://www.w3.org/2001/XMLSchema-datatypes'
1266
+ }
1267
+
1268
+ # Check if datatype has parameters
1269
+ if type_info[:params]
1270
+ xml.data(data_attrs) do
1271
+ # Process each parameter
1272
+ params = type_info[:params].is_a?(Array) ? type_info[:params] : [type_info[:params]]
1273
+ params.each do |param|
1274
+ param_name = process_identifier(param[:param_name])
1275
+ param_value = process_string_literal(param[:param_value])
1276
+ xml.param(param_value, name: param_name)
1277
+ end
1278
+ end
1279
+ else
1280
+ xml.data(data_attrs)
1281
+ end
1282
+ end
1283
+ end
1284
+
1285
+ # Process data parameters (e.g., maxLength = "100")
1286
+ def process_data_params(xml, params)
1287
+ params = [params] unless params.is_a?(Array)
1288
+ params.each do |param|
1289
+ param_name = process_identifier(param[:param_name])
1290
+ param_value = process_string_literal(param[:param_value])
1291
+ xml.param(param_value, name: param_name)
1292
+ end
1293
+ end
1294
+
1295
+ # Process a div block for documentation and grouping
1296
+ def process_div_block(xml, div_block)
1297
+ xml.div do
1298
+ # Process start if present
1299
+ if div_block[:start] && div_block[:start][:start_pattern]
1300
+ xml.start do
1301
+ process_pattern_list(xml, div_block[:start][:start_pattern])
1302
+ end
1303
+ end
1304
+
1305
+ # Process includes if present
1306
+ div_block[:includes]&.each do |include_item|
1307
+ href = process_string_literal(include_item[:href])
1308
+
1309
+ if include_item[:override]
1310
+ # Include with override block - override is properly scoped
1311
+ override = include_item[:override]
1312
+
1313
+ # Check if override has any content
1314
+ has_content = (override[:start] && override[:start][:start_pattern]) ||
1315
+ (override[:patterns] && !override[:patterns].empty?)
1316
+
1317
+ if has_content
1318
+ xml.include(href: href) do
1319
+ # Process override start if present
1320
+ if override[:start] && override[:start][:start_pattern]
1321
+ xml.start do
1322
+ process_pattern_list(xml,
1323
+ override[:start][:start_pattern])
1324
+ end
1325
+ end
1326
+
1327
+ # Process override patterns (named patterns, div blocks, and top-level elements)
1328
+ override[:patterns]&.each do |pattern_item|
1329
+ if pattern_item.key?(:name)
1330
+ name = process_identifier(pattern_item[:name])
1331
+ operator = pattern_item[:operator] ? extract_string(pattern_item[:operator]) : '='
1332
+ if operator == '='
1333
+ xml.define(name: name) do
1334
+ process_pattern_list(xml, pattern_item[:pattern])
1335
+ end
1336
+ else
1337
+ combine_type = operator == '|=' ? 'choice' : 'interleave'
1338
+ xml.define(name: name, combine: combine_type) do
1339
+ process_pattern_list(xml, pattern_item[:pattern])
1340
+ end
1341
+ end
1342
+ elsif pattern_item.key?(:top_element)
1343
+ process_content_item(xml, pattern_item[:top_element])
1344
+ elsif pattern_item.key?(:div)
1345
+ process_div_block(xml, pattern_item[:div])
1346
+ end
1347
+ end
1348
+ end
1349
+ else
1350
+ # Empty override block
1351
+ xml.include(href: href)
1352
+ end
1353
+ else
1354
+ # Include without override block
1355
+ xml.include(href: href)
1356
+ end
1357
+ end
1358
+
1359
+ # Process patterns (defines, nested divs, and top-level elements)
1360
+ div_block[:patterns]&.each do |pattern_item|
1361
+ if pattern_item.key?(:name)
1362
+ # Named pattern definition
1363
+ name = process_identifier(pattern_item[:name])
1364
+ operator = pattern_item[:operator] ? extract_string(pattern_item[:operator]) : '='
1365
+ if operator == '='
1366
+ xml.define(name: name) do
1367
+ process_pattern_list(xml, pattern_item[:pattern])
1368
+ end
1369
+ else
1370
+ combine_type = operator == '|=' ? 'choice' : 'interleave'
1371
+ xml.define(name: name, combine: combine_type) do
1372
+ process_pattern_list(xml, pattern_item[:pattern])
1373
+ end
1374
+ end
1375
+ elsif pattern_item.key?(:nested_div)
1376
+ # Nested div block
1377
+ process_div_block(xml, pattern_item[:nested_div])
1378
+ elsif pattern_item.key?(:top_element)
1379
+ # Top-level element
1380
+ process_content_item(xml, pattern_item[:top_element])
1381
+ elsif pattern_item.key?(:foreign_name)
1382
+ # Foreign element annotation - emit as foreign element in RNG XML
1383
+ process_foreign_element(xml, pattern_item)
1384
+ end
1385
+ end
1386
+ end
1387
+ end
1388
+
1389
+ # Process a foreign element annotation (e.g., foo [] or rng:foo [ "val" ])
1390
+ # Emits the foreign element directly in the RNG XML
1391
+ def process_foreign_element(xml, pattern_item)
1392
+ name_info = pattern_item[:foreign_name]
1393
+ element_name = process_identifier(name_info)
1394
+
1395
+ # Validate annotations if present (TC 18, 70, 71)
1396
+ if pattern_item[:foreign_annotation]
1397
+ ann_data = pattern_item[:foreign_annotation]
1398
+ # ann_data is {ann: {ann_items: ...}} or could be {ann_items: ...}
1399
+ ann_data = ann_data[:ann] if ann_data.is_a?(Hash) && ann_data[:ann]
1400
+ ann_items = ann_data.is_a?(Hash) ? ann_data[:ann_items] : nil
1401
+ validate_preamble_annotations(ann_items, @seen_ann_attrs ||= {}) if ann_items
1402
+ end
1403
+
1404
+ # Emit as an empty foreign element (content is annotation-only, ignored in RNG)
1405
+ xml.parent.add_child("<#{element_name}/>")
1406
+ end
1407
+ end
1408
+ end