rng 0.1.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/docs.yml +63 -0
  3. data/.github/workflows/release.yml +8 -3
  4. data/.gitignore +11 -0
  5. data/.rubocop.yml +10 -7
  6. data/.rubocop_todo.yml +229 -23
  7. data/CHANGELOG.md +317 -0
  8. data/CLAUDE.md +139 -0
  9. data/Gemfile +11 -12
  10. data/README.adoc +1538 -11
  11. data/Rakefile +11 -3
  12. data/docs/Gemfile +8 -0
  13. data/docs/_config.yml +23 -0
  14. data/docs/getting-started/index.adoc +75 -0
  15. data/docs/guides/error-handling.adoc +137 -0
  16. data/docs/guides/external-references.adoc +128 -0
  17. data/docs/guides/index.adoc +24 -0
  18. data/docs/guides/parsing-rnc.adoc +141 -0
  19. data/docs/guides/parsing-rng-xml.adoc +81 -0
  20. data/docs/guides/rng-to-rnc.adoc +101 -0
  21. data/docs/guides/validation.adoc +85 -0
  22. data/docs/index.adoc +52 -0
  23. data/docs/reference/api.adoc +126 -0
  24. data/docs/reference/cli.adoc +182 -0
  25. data/docs/understanding/architecture.adoc +58 -0
  26. data/docs/understanding/rng-vs-rnc.adoc +118 -0
  27. data/exe/rng +5 -0
  28. data/lib/rng/any_name.rb +10 -8
  29. data/lib/rng/attribute.rb +28 -26
  30. data/lib/rng/choice.rb +24 -24
  31. data/lib/rng/cli.rb +607 -0
  32. data/lib/rng/data.rb +10 -10
  33. data/lib/rng/datatype_declaration.rb +26 -0
  34. data/lib/rng/define.rb +44 -41
  35. data/lib/rng/div.rb +36 -0
  36. data/lib/rng/documentation.rb +9 -0
  37. data/lib/rng/element.rb +39 -37
  38. data/lib/rng/empty.rb +7 -7
  39. data/lib/rng/except.rb +25 -25
  40. data/lib/rng/external_ref.rb +8 -8
  41. data/lib/rng/external_ref_resolver.rb +602 -0
  42. data/lib/rng/foreign_attribute.rb +26 -0
  43. data/lib/rng/foreign_element.rb +33 -0
  44. data/lib/rng/grammar.rb +14 -12
  45. data/lib/rng/group.rb +26 -24
  46. data/lib/rng/include.rb +5 -6
  47. data/lib/rng/include_processor.rb +461 -0
  48. data/lib/rng/interleave.rb +23 -23
  49. data/lib/rng/list.rb +22 -22
  50. data/lib/rng/mixed.rb +23 -23
  51. data/lib/rng/name.rb +6 -7
  52. data/lib/rng/namespace_declaration.rb +47 -0
  53. data/lib/rng/namespaces.rb +15 -0
  54. data/lib/rng/not_allowed.rb +7 -7
  55. data/lib/rng/ns_name.rb +9 -9
  56. data/lib/rng/one_or_more.rb +23 -23
  57. data/lib/rng/optional.rb +23 -23
  58. data/lib/rng/param.rb +7 -8
  59. data/lib/rng/parent_ref.rb +8 -8
  60. data/lib/rng/parse_tree_processor.rb +695 -0
  61. data/lib/rng/pattern.rb +7 -7
  62. data/lib/rng/ref.rb +8 -8
  63. data/lib/rng/rnc_builder.rb +927 -0
  64. data/lib/rng/rnc_parser.rb +605 -305
  65. data/lib/rng/rnc_to_rng_converter.rb +1408 -0
  66. data/lib/rng/schema_preamble.rb +73 -0
  67. data/lib/rng/schema_validator.rb +1622 -0
  68. data/lib/rng/start.rb +27 -25
  69. data/lib/rng/test_suite_parser.rb +168 -0
  70. data/lib/rng/text.rb +11 -8
  71. data/lib/rng/to_rnc.rb +4 -35
  72. data/lib/rng/value.rb +6 -7
  73. data/lib/rng/version.rb +1 -1
  74. data/lib/rng/zero_or_more.rb +23 -23
  75. data/lib/rng.rb +68 -17
  76. data/rng.gemspec +18 -19
  77. data/scripts/extract_spectest_resources.rb +96 -0
  78. data/spec/fixtures/compacttest.xml +2511 -0
  79. data/spec/fixtures/external/circular_a.rng +7 -0
  80. data/spec/fixtures/external/circular_b.rng +7 -0
  81. data/spec/fixtures/external/circular_main.rng +7 -0
  82. data/spec/fixtures/external/external_ref_lib.rng +7 -0
  83. data/spec/fixtures/external/external_ref_main.rng +7 -0
  84. data/spec/fixtures/external/include_lib.rng +7 -0
  85. data/spec/fixtures/external/include_main.rng +3 -0
  86. data/spec/fixtures/external/nested_chain.rng +6 -0
  87. data/spec/fixtures/external/nested_leaf.rng +7 -0
  88. data/spec/fixtures/external/nested_mid.rng +8 -0
  89. data/spec/fixtures/metanorma/3gpp.rnc +35 -0
  90. data/spec/fixtures/metanorma/3gpp.rng +105 -0
  91. data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
  92. data/spec/fixtures/metanorma/bipm.rnc +148 -0
  93. data/spec/fixtures/metanorma/bipm.rng +376 -0
  94. data/spec/fixtures/metanorma/bsi.rnc +104 -0
  95. data/spec/fixtures/metanorma/bsi.rng +332 -0
  96. data/spec/fixtures/metanorma/csa.rnc +45 -0
  97. data/spec/fixtures/metanorma/csa.rng +131 -0
  98. data/spec/fixtures/metanorma/csd.rnc +43 -0
  99. data/spec/fixtures/metanorma/csd.rng +132 -0
  100. data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
  101. data/spec/fixtures/metanorma/gbstandard.rng +316 -0
  102. data/spec/fixtures/metanorma/iec.rnc +49 -0
  103. data/spec/fixtures/metanorma/iec.rng +193 -0
  104. data/spec/fixtures/metanorma/ietf.rnc +275 -0
  105. data/spec/fixtures/metanorma/ietf.rng +925 -0
  106. data/spec/fixtures/metanorma/iho.rnc +58 -0
  107. data/spec/fixtures/metanorma/iho.rng +179 -0
  108. data/spec/fixtures/metanorma/isodoc.rnc +873 -0
  109. data/spec/fixtures/metanorma/isodoc.rng +2704 -0
  110. data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
  111. data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
  112. data/spec/fixtures/metanorma/isostandard.rnc +166 -0
  113. data/spec/fixtures/metanorma/isostandard.rng +494 -0
  114. data/spec/fixtures/metanorma/itu.rnc +122 -0
  115. data/spec/fixtures/metanorma/itu.rng +377 -0
  116. data/spec/fixtures/metanorma/m3d.rnc +41 -0
  117. data/spec/fixtures/metanorma/m3d.rng +122 -0
  118. data/spec/fixtures/metanorma/mpfd.rnc +36 -0
  119. data/spec/fixtures/metanorma/mpfd.rng +95 -0
  120. data/spec/fixtures/metanorma/nist.rnc +77 -0
  121. data/spec/fixtures/metanorma/nist.rng +216 -0
  122. data/spec/fixtures/metanorma/ogc.rnc +51 -0
  123. data/spec/fixtures/metanorma/ogc.rng +151 -0
  124. data/spec/fixtures/metanorma/reqt.rnc +6 -0
  125. data/spec/fixtures/metanorma/rsd.rnc +36 -0
  126. data/spec/fixtures/metanorma/rsd.rng +95 -0
  127. data/spec/fixtures/metanorma/un.rnc +103 -0
  128. data/spec/fixtures/metanorma/un.rng +367 -0
  129. data/spec/fixtures/rnc/base.rnc +4 -0
  130. data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
  131. data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
  132. data/spec/fixtures/rnc/main_with_include.rnc +5 -0
  133. data/spec/fixtures/rnc/test_augment.rnc +10 -0
  134. data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
  135. data/spec/fixtures/rnc/top_level_include.rnc +8 -0
  136. data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
  137. data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
  138. data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
  139. data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
  140. data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
  141. data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
  142. data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
  143. data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
  144. data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
  145. data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
  146. data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
  147. data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
  148. data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
  149. data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
  150. data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
  151. data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
  152. data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
  153. data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
  154. data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
  155. data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
  156. data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
  157. data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
  158. data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
  159. data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
  160. data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
  161. data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
  162. data/spec/fixtures/spectest_external/resources.json +149 -0
  163. data/spec/rng/advanced_rnc_spec.rb +101 -0
  164. data/spec/rng/compacttest_spec.rb +197 -0
  165. data/spec/rng/datatype_declaration_spec.rb +28 -0
  166. data/spec/rng/div_spec.rb +207 -0
  167. data/spec/rng/external_ref_resolver_spec.rb +122 -0
  168. data/spec/rng/metanorma_conversion_spec.rb +159 -0
  169. data/spec/rng/namespace_declaration_spec.rb +60 -0
  170. data/spec/rng/namespace_support_spec.rb +199 -0
  171. data/spec/rng/rnc_parser_spec.rb +498 -22
  172. data/spec/rng/rnc_roundtrip_spec.rb +96 -82
  173. data/spec/rng/rng_generation_spec.rb +288 -0
  174. data/spec/rng/roundtrip_spec.rb +342 -0
  175. data/spec/rng/schema_preamble_spec.rb +145 -0
  176. data/spec/rng/schema_spec.rb +68 -64
  177. data/spec/rng/spectest_spec.rb +168 -90
  178. data/spec/rng_spec.rb +2 -2
  179. data/spec/spec_helper.rb +7 -42
  180. metadata +141 -8
@@ -0,0 +1,695 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rng
4
+ # Normalizes parse tree structure into consistent grammar format
5
+ #
6
+ # Handles three different RNC file structures:
7
+ # 1. Top-level includes (Metanorma-style)
8
+ # 2. Grammar block wrapper
9
+ # 3. Flat grammar
10
+ #
11
+ # @example Basic Usage
12
+ # tree = parser.parse(rnc_content)
13
+ # processor = ParseTreeProcessor.new(tree)
14
+ # normalized = processor.normalize
15
+ # grammar_tree = normalized.grammar_tree
16
+ # namespace = normalized.namespace
17
+ #
18
+ class ParseTreeProcessor
19
+ attr_reader :tree, :namespace, :preamble, :grammar_tree
20
+
21
+ # Initialize with parse tree
22
+ #
23
+ # @param tree [Hash] Raw parse tree from Parslet parser
24
+ def initialize(tree)
25
+ @tree = tree
26
+ @namespace = nil
27
+ @preamble = nil # NEW: SchemaPreamble object
28
+ @grammar_tree = nil
29
+ end
30
+
31
+ # Normalize the parse tree
32
+ #
33
+ # Extracts namespace and builds consistent grammar structure
34
+ # regardless of input format. Processes raw override blocks.
35
+ #
36
+ # @return [self] Returns self for chaining
37
+ def normalize
38
+ @preamble = extract_preamble_section # NEW: Extract preamble first
39
+ @namespace = extract_namespace # KEEP: Legacy namespace extraction
40
+ @grammar_tree = build_grammar_tree
41
+ process_raw_overrides!(@grammar_tree)
42
+ add_metadata_to_grammar # MODIFIED: Add both old and new metadata
43
+ self
44
+ end
45
+
46
+ private
47
+
48
+ # Extract namespace from parse tree
49
+ #
50
+ # @return [String, nil] Namespace URI if present
51
+ def extract_namespace
52
+ @tree[:namespace]
53
+ end
54
+
55
+ # Extract preamble section and build SchemaPreamble object
56
+ #
57
+ # @return [SchemaPreamble, nil] Preamble object or nil if no preamble
58
+ def extract_preamble_section
59
+ return nil unless @tree[:preamble_items]
60
+
61
+ preamble = SchemaPreamble.new
62
+
63
+ items = @tree[:preamble_items]
64
+ items = [items] unless items.is_a?(Array)
65
+
66
+ items.each do |item|
67
+ # Skip non-Hash items (e.g., Parslet::Slice from annotation content)
68
+ next unless item.is_a?(Hash)
69
+
70
+ if item[:default_ns] || item[:default_prefixed_ns] || item[:prefixed_ns]
71
+ process_namespace_declaration(preamble, item)
72
+ elsif item[:prefix] && item[:uri]
73
+ # Datatype declaration
74
+ process_datatype_declaration(preamble, item)
75
+ end
76
+ end
77
+
78
+ preamble.empty? ? nil : preamble
79
+ end
80
+
81
+ # Process a single namespace declaration and add to preamble
82
+ #
83
+ # @param preamble [SchemaPreamble] Preamble to add to
84
+ # @param item [Hash] Namespace declaration from parse tree
85
+ def process_namespace_declaration(preamble, item)
86
+ if item[:default_ns]
87
+ # Default namespace (unprefixed): default namespace = "uri"
88
+ ns_data = item[:default_ns]
89
+ uri = extract_string_literal(ns_data[:uri])
90
+ preamble.add_namespace(
91
+ NamespaceDeclaration.new(uri: uri, is_default: true)
92
+ )
93
+ elsif item[:default_prefixed_ns]
94
+ # Default namespace (prefixed): default namespace prefix = "uri"
95
+ ns_data = item[:default_prefixed_ns]
96
+ prefix = extract_identifier(ns_data[:prefix])
97
+ uri = extract_string_literal(ns_data[:uri])
98
+ preamble.add_namespace(
99
+ NamespaceDeclaration.new(prefix: prefix, uri: uri, is_default: true)
100
+ )
101
+ elsif item[:prefixed_ns]
102
+ # Prefixed namespace: namespace prefix = "uri"
103
+ ns_data = item[:prefixed_ns]
104
+ prefix = extract_identifier(ns_data[:prefix])
105
+ uri = extract_string_literal(ns_data[:uri])
106
+ preamble.add_namespace(
107
+ NamespaceDeclaration.new(prefix: prefix, uri: uri)
108
+ )
109
+ end
110
+ end
111
+
112
+ # Process a single datatype library declaration and add to preamble
113
+ #
114
+ # @param preamble [SchemaPreamble] Preamble to add to
115
+ # @param item [Hash] Datatype declaration from parse tree
116
+ def process_datatype_declaration(preamble, item)
117
+ prefix = extract_identifier(item[:prefix])
118
+ uri = extract_string_literal(item[:uri])
119
+ preamble.add_datatype(
120
+ DatatypeDeclaration.new(prefix: prefix, uri: uri)
121
+ )
122
+ end
123
+
124
+ # Extract identifier from identifier parts
125
+ #
126
+ # @param id [Hash] Identifier with :identifier_parts
127
+ # @return [String] Extracted identifier
128
+ def extract_identifier(id)
129
+ return '' unless id && id[:identifier_parts]
130
+
131
+ id[:identifier_parts].map do |part|
132
+ if part[:char]
133
+ extract_parslet_string(part[:char])
134
+ elsif part[:hex_escape]
135
+ # Handle hex escape: \x{HEX}
136
+ hex_str = extract_parslet_string(part[:hex_escape][:hex])
137
+ [hex_str.to_i(16)].pack('U')
138
+ else
139
+ ''
140
+ end
141
+ end.join
142
+ end
143
+
144
+ # Extract string literal with concatenations
145
+ #
146
+ # @param lit [Hash] String literal with :string_parts and :concatenations
147
+ # @return [String] Extracted string
148
+ def extract_string_literal(lit)
149
+ return '' unless lit
150
+
151
+ result = extract_string_parts(lit[:string_parts])
152
+
153
+ # Handle concatenations if present
154
+ if lit[:concatenations].is_a?(Array)
155
+ lit[:concatenations].each do |concat|
156
+ result += extract_string_parts(concat[:concat_string_parts])
157
+ end
158
+ end
159
+
160
+ result
161
+ end
162
+
163
+ # Extract documentation comments from parse tree node
164
+ #
165
+ # @param node [Hash] Node that may contain :docs
166
+ # @return [String, nil] Documentation text or nil
167
+ def extract_documentation(node)
168
+ return nil unless node.is_a?(Hash) && node[:docs]
169
+
170
+ doc_lines = node[:docs][:documentation]
171
+ return nil unless doc_lines
172
+
173
+ doc_lines = [doc_lines] unless doc_lines.is_a?(Array)
174
+
175
+ doc_lines.map do |line|
176
+ if line[:doc_line]
177
+ extract_parslet_string(line[:doc_line])
178
+ else
179
+ ''
180
+ end
181
+ end.join("\n").strip
182
+ end
183
+
184
+ # Extract annotations from parse tree node
185
+ #
186
+ # @param node [Hash] Node that may contain :annotations
187
+ # @return [Hash] Hash with :attributes and :elements arrays
188
+ RNG_NAMESPACE = 'http://relaxng.org/ns/structure/1.0'
189
+
190
+ def extract_annotations(node)
191
+ result = { attributes: [], elements: [] }
192
+ return result unless node.is_a?(Hash) && node[:annotations]
193
+
194
+ # Get first annotation and additional ones
195
+ annotations = []
196
+ ann_block = node[:annotations]
197
+
198
+ # First annotation item (if present)
199
+ if ann_block.is_a?(Hash)
200
+ first_ann = ann_block.except(:more_annotations)
201
+ annotations << first_ann unless first_ann.empty?
202
+
203
+ # Additional annotations
204
+ if ann_block[:more_annotations]
205
+ more = ann_block[:more_annotations]
206
+ more = [more] unless more.is_a?(Array)
207
+ annotations.concat(more)
208
+ end
209
+ end
210
+
211
+ # Track seen attribute names for duplicate detection (TC 11-12)
212
+ seen_attrs = {}
213
+
214
+ # Process each annotation
215
+ annotations.each do |ann|
216
+ next unless ann.is_a?(Hash) && ann[:ann_name]
217
+
218
+ name_parts = extract_qualified_name(ann[:ann_name])
219
+
220
+ if ann[:attr_value]
221
+ # Foreign attribute
222
+ value = extract_string_literal(ann[:attr_value])
223
+
224
+ # TC 11-12: Check for duplicate annotation attributes
225
+ attr_key = "#{name_parts[:prefix]}:#{name_parts[:local]}"
226
+ raise StandardError, "duplicate annotation attribute '#{attr_key}'" if seen_attrs.key?(attr_key)
227
+
228
+ seen_attrs[attr_key] = true
229
+
230
+ # TC 18: xmlns attribute is forbidden in annotations
231
+ if name_parts[:local] == 'xmlns' && name_parts[:prefix].nil?
232
+ raise StandardError,
233
+ 'xmlns attribute is not allowed in annotations'
234
+ end
235
+
236
+ # TC 70-71: RNG namespace attributes forbidden
237
+ if name_parts[:prefix] && @namespace_prefixes
238
+ ns_uri = @namespace_prefixes[name_parts[:prefix]]
239
+ raise StandardError, 'attributes in the RELAX NG namespace are not allowed' if ns_uri == RNG_NAMESPACE
240
+ end
241
+
242
+ result[:attributes] << {
243
+ name: name_parts[:local],
244
+ namespace: name_parts[:prefix],
245
+ value: value
246
+ }
247
+ elsif ann.key?(:elem_content)
248
+ # Foreign element
249
+ content_data = extract_annotation_content(ann[:elem_content])
250
+
251
+ # TC 70-71: RNG namespace elements forbidden
252
+ if name_parts[:prefix] && @namespace_prefixes
253
+ ns_uri = @namespace_prefixes[name_parts[:prefix]]
254
+ raise StandardError, 'elements in the RELAX NG namespace are not allowed' if ns_uri == RNG_NAMESPACE
255
+ end
256
+
257
+ result[:elements] << {
258
+ name: name_parts[:local],
259
+ namespace: name_parts[:prefix],
260
+ content: content_data[:text],
261
+ attributes: content_data[:attributes],
262
+ elements: content_data[:elements]
263
+ }
264
+ end
265
+ end
266
+
267
+ result
268
+ end
269
+
270
+ # Extract qualified name (prefix:local or just local)
271
+ #
272
+ # @param qname [Hash] Qualified name from parse tree
273
+ # @return [Hash] Hash with :prefix and :local keys
274
+ def extract_qualified_name(qname)
275
+ return { prefix: nil, local: '' } unless qname
276
+
277
+ prefix = nil
278
+ prefix = extract_identifier(qname[:prefix]) if qname[:prefix]
279
+
280
+ local = extract_identifier(qname[:local_name])
281
+
282
+ { prefix: prefix, local: local }
283
+ end
284
+
285
+ # Extract annotation content (text and nested items)
286
+ #
287
+ # @param content [Hash, nil] Annotation content from parse tree
288
+ # @return [Hash] Hash with :text, :attributes, :elements
289
+ def extract_annotation_content(content)
290
+ result = { text: '', attributes: [], elements: [] }
291
+ return result if content.nil?
292
+
293
+ items = []
294
+
295
+ # Get first item
296
+ items << content[:first] if content[:first]
297
+
298
+ # Get rest of items
299
+ if content[:rest]
300
+ rest = content[:rest]
301
+ rest = [rest] unless rest.is_a?(Array)
302
+ items.concat(rest)
303
+ end
304
+
305
+ # Process each item
306
+ text_parts = []
307
+ items.each do |item|
308
+ if item[:text]
309
+ # String literal
310
+ text_parts << extract_string_literal(item[:text])
311
+ elsif item[:ann_name]
312
+ # Nested annotation item
313
+ nested = extract_annotations({ annotations: item })
314
+ result[:attributes].concat(nested[:attributes])
315
+ result[:elements].concat(nested[:elements])
316
+ end
317
+ end
318
+
319
+ result[:text] = text_parts.join unless text_parts.empty?
320
+ result
321
+ end
322
+
323
+ # Extract string from string_parts array
324
+ #
325
+ # @param parts [Array, String] String parts
326
+ # @return [String] Extracted string
327
+ def extract_string_parts(parts)
328
+ return '' unless parts
329
+ return parts if parts.is_a?(String)
330
+ return parts.str if parts.respond_to?(:str)
331
+
332
+ return '' unless parts.is_a?(Array)
333
+
334
+ parts.map do |part|
335
+ if part.is_a?(String)
336
+ part
337
+ elsif part.respond_to?(:str)
338
+ part.str
339
+ elsif part[:hex_escape]
340
+ # Handle \x{HEX}
341
+ hex_str = extract_parslet_string(part[:hex_escape][:hex])
342
+ [hex_str.to_i(16)].pack('U')
343
+ elsif part[:char_escape]
344
+ # Handle \", \\, \n, \r, \t, and RELAX NG class escapes \i, \c, \d, \w
345
+ char = extract_parslet_string(part[:char_escape][:char])
346
+ case char
347
+ when '"' then '"'
348
+ when '\\' then '\\'
349
+ when 'n' then "\n"
350
+ when 'r' then "\r"
351
+ when 't' then "\t"
352
+ when 'i' then '\\i'
353
+ when 'c' then '\\c'
354
+ when 'd' then '\\d'
355
+ when 'w' then '\\w'
356
+ else char
357
+ end
358
+ elsif part[:char]
359
+ # Regular character (plain char in string literal)
360
+ extract_parslet_string(part[:char])
361
+ else
362
+ part.to_s
363
+ end
364
+ end.join
365
+ end
366
+
367
+ # Extract string from Parslet::Slice or String
368
+ #
369
+ # @param obj [Parslet::Slice, String] Object to extract
370
+ # @return [String] Extracted string
371
+ def extract_parslet_string(obj)
372
+ obj.respond_to?(:str) ? obj.str : obj.to_s
373
+ end
374
+
375
+ # Build normalized grammar tree
376
+ #
377
+ # Handles different tree structures:
378
+ # - Top-level includes: Creates empty grammar
379
+ # - Grammar block: Extracts inner grammar
380
+ # - Flat: Uses tree as-is
381
+ #
382
+ # @return [Hash] Normalized grammar tree
383
+ def build_grammar_tree
384
+ # Process raw_trailing if present (needs to happen before tree building)
385
+ process_raw_trailing!(@tree) if @tree[:raw_trailing]
386
+
387
+ if top_level_includes?
388
+ build_top_level_includes_grammar
389
+ elsif grammar_block?
390
+ build_grammar_block_grammar
391
+ else
392
+ build_flat_grammar
393
+ end
394
+ end
395
+
396
+ # Check if tree has top-level includes
397
+ #
398
+ # @return [Boolean]
399
+ def top_level_includes?
400
+ @tree.key?(:top_includes)
401
+ end
402
+
403
+ # Check if tree has grammar block wrapper
404
+ #
405
+ # @return [Boolean]
406
+ def grammar_block?
407
+ @tree.key?(:inner_grammar)
408
+ end
409
+
410
+ # Build grammar for top-level includes structure
411
+ #
412
+ # @return [Hash]
413
+ def build_top_level_includes_grammar
414
+ definitions = []
415
+
416
+ # Add the top-level includes first
417
+ definitions.concat(@tree[:top_includes]) if @tree[:top_includes]
418
+
419
+ # Then add any trailing definitions
420
+ definitions.concat(@tree[:trailing_definitions]) if @tree[:trailing_definitions]
421
+
422
+ {
423
+ start: nil,
424
+ includes: @tree[:top_includes] || [],
425
+ definitions: definitions
426
+ }
427
+ end
428
+
429
+ # Build grammar for grammar block structure
430
+ #
431
+ # @return [Hash]
432
+ def build_grammar_block_grammar
433
+ grammar = @tree[:inner_grammar].dup
434
+
435
+ # Normalize :includes and :patterns into :definitions
436
+ if grammar.key?(:includes) || grammar.key?(:patterns)
437
+ definitions = []
438
+ definitions.concat(grammar.delete(:includes)) if grammar[:includes]
439
+ definitions.concat(grammar.delete(:patterns)) if grammar[:patterns]
440
+ grammar[:definitions] = definitions unless definitions.empty?
441
+ end
442
+
443
+ merge_trailing_definitions(grammar)
444
+ grammar
445
+ end
446
+
447
+ # Build grammar for flat structure
448
+ #
449
+ # @return [Hash]
450
+ def build_flat_grammar
451
+ grammar = @tree.dup
452
+
453
+ # Normalize :includes and :patterns into :definitions for flat grammars too
454
+ if grammar.key?(:includes) || grammar.key?(:patterns)
455
+ definitions = []
456
+ definitions.concat(grammar.delete(:includes)) if grammar[:includes]
457
+ definitions.concat(grammar.delete(:patterns)) if grammar[:patterns]
458
+ grammar[:definitions] = definitions unless definitions.empty?
459
+ end
460
+
461
+ grammar
462
+ end
463
+
464
+ # Merge trailing definitions into grammar
465
+ #
466
+ # @param grammar [Hash] Grammar to merge into
467
+ def merge_trailing_definitions(grammar)
468
+ return unless @tree[:trailing_definitions] && !@tree[:trailing_definitions].empty?
469
+
470
+ grammar[:definitions] ||= []
471
+ grammar[:definitions].concat(@tree[:trailing_definitions])
472
+ end
473
+
474
+ # Process raw override and grammar blocks recursively
475
+ #
476
+ # @param node [Hash, Array] Tree node to process
477
+ def process_raw_overrides!(node)
478
+ case node
479
+ when Hash
480
+ # Check for raw_override that needs parsing
481
+ parse_and_replace_override!(node) if node[:override]&.dig(:raw_override)
482
+
483
+ # Check for raw_grammar that needs parsing (in grammar_block)
484
+ parse_and_replace_grammar!(node) if node[:raw_grammar]
485
+
486
+ # Check for raw_patterns that need parsing (in flat grammar)
487
+ parse_and_replace_patterns!(node) if node[:raw_patterns]
488
+
489
+ # Recursively process all hash values
490
+ node.each_value { |v| process_raw_overrides!(v) }
491
+ when Array
492
+ # Recursively process array elements
493
+ node.each { |item| process_raw_overrides!(item) }
494
+ end
495
+ end
496
+
497
+ # Parse raw override and replace in-place
498
+ #
499
+ # @param node [Hash] Node containing :override with :raw_override
500
+ def parse_and_replace_override!(node)
501
+ raw = node[:override][:raw_override]
502
+ text = extract_raw_text(raw)
503
+
504
+ if text.strip.empty?
505
+ # Empty override - remove it
506
+ node.delete(:override)
507
+ else
508
+ # Parse with proper scoping
509
+ parsed = parse_override_with_scope(text)
510
+ node[:override] = parsed
511
+ end
512
+ end
513
+
514
+ # Extract text from raw_override (array of Parslet::Slice objects)
515
+ #
516
+ # @param raw [Array, Parslet::Slice, String] Raw content
517
+ # @return [String] Extracted text
518
+ def extract_raw_text(raw)
519
+ case raw
520
+ when Array
521
+ raw.map { |item| item.respond_to?(:str) ? item.str : item.to_s }.join
522
+ when String
523
+ raw
524
+ else
525
+ raw.respond_to?(:str) ? raw.str : raw.to_s
526
+ end
527
+ end
528
+
529
+ # Parse and replace raw grammar block
530
+ #
531
+ # @param node [Hash] Node containing :raw_grammar
532
+ def parse_and_replace_grammar!(node)
533
+ raw = node[:raw_grammar]
534
+ text = extract_raw_text(raw)
535
+
536
+ # Remove raw_grammar first
537
+ node.delete(:raw_grammar)
538
+
539
+ if text.strip.empty?
540
+ # Empty grammar - use empty structure
541
+ node.merge!(start: nil, includes: [], patterns: [])
542
+ else
543
+ # Parse with proper scoping
544
+ parsed = parse_grammar_with_scope(text)
545
+ # If the node is already an inner_grammar (has raw_grammar as its only key),
546
+ # merge parsed result directly into the node instead of nesting
547
+ if node.empty?
548
+ node.merge!(parsed)
549
+ else
550
+ node[:inner_grammar] = parsed
551
+ end
552
+ end
553
+ end
554
+
555
+ # Parse override content with proper scoping
556
+ #
557
+ # Uses a scoped grammar: start + patterns (no includes)
558
+ #
559
+ # @param text [String] Override block content
560
+ # @return [Hash] Parsed structure with :start and :patterns
561
+ def parse_override_with_scope(text)
562
+ # Create temporary parser with override-specific root
563
+ parser = Rng::RncParser.new
564
+
565
+ # Parse using grammar rule (which is what override contains)
566
+ # Grammar contains: start (optional) + includes (skip) + patterns
567
+ result = parser.grammar.parse(text.strip)
568
+
569
+ {
570
+ start: result[:start],
571
+ patterns: result[:patterns] || []
572
+ }
573
+ rescue Parslet::ParseFailed => e
574
+ # Graceful fallback for parse errors
575
+ # Warnings suppressed by default as fallback behavior is correct and intentional
576
+ # Set RNG_VERBOSE=1 to enable warnings for debugging
577
+ warn "Warning: Failed to parse override block: #{e.message}" if ENV['RNG_VERBOSE']
578
+ { start: nil, patterns: [] }
579
+ end
580
+
581
+ # Parse grammar content with proper scoping
582
+ #
583
+ # Uses full grammar rule: start + includes + patterns
584
+ #
585
+ # @param text [String] Grammar block content
586
+ # @return [Hash] Parsed structure
587
+ def parse_grammar_with_scope(text)
588
+ parser = Rng::RncParser.new
589
+
590
+ # Parse using grammar rule
591
+ parser.grammar.parse(text.strip)
592
+
593
+ # Return grammar structure
594
+ rescue Parslet::ParseFailed => e
595
+ # Graceful fallback for parse errors
596
+ # Warnings suppressed by default as fallback behavior is correct and intentional
597
+ # Set RNG_VERBOSE=1 to enable warnings for debugging
598
+ warn "Warning: Failed to parse grammar block: #{e.message}" if ENV['RNG_VERBOSE']
599
+ { start: nil, includes: [], patterns: [] }
600
+ end
601
+
602
+ # Parse and replace raw patterns in flat grammar
603
+ #
604
+ # @param node [Hash] Node containing :raw_patterns
605
+ def parse_and_replace_patterns!(node)
606
+ raw = node[:raw_patterns]
607
+ text = extract_raw_text(raw)
608
+
609
+ if text.strip.empty?
610
+ # Empty patterns
611
+ node[:patterns] = []
612
+ else
613
+ # Parse patterns content with proper scoping
614
+ parsed = parse_patterns_with_scope(text)
615
+ node[:patterns] = parsed
616
+ end
617
+
618
+ # Remove raw_patterns after processing
619
+ node.delete(:raw_patterns)
620
+ end
621
+
622
+ # Parse patterns content with proper scoping
623
+ #
624
+ # Parses multiple patterns (named_pattern | div | element)*
625
+ #
626
+ # @param text [String] Patterns content
627
+ # @return [Array] Parsed patterns
628
+ def parse_patterns_with_scope(text)
629
+ parser = Rng::RncParser.new
630
+
631
+ # Create a custom rule for patterns only
632
+ # We need to parse: (named_pattern | div_block | element_def)*
633
+ patterns_rule = (
634
+ (parser.named_pattern | parser.div_block.as(:div) | parser.element_def.as(:top_element)) >>
635
+ parser.whitespace
636
+ ).repeat
637
+
638
+ result = patterns_rule.parse(text.strip)
639
+
640
+ # Result should be an array of patterns
641
+ result.is_a?(Array) ? result : [result]
642
+ rescue Parslet::ParseFailed => e
643
+ # Graceful fallback for parse errors
644
+ # Warnings suppressed by default as fallback behavior is correct and intentional
645
+ # Set RNG_VERBOSE=1 to enable warnings for debugging
646
+ warn "Warning: Failed to parse patterns: #{e.message}" if ENV['RNG_VERBOSE']
647
+ []
648
+ end
649
+
650
+ # Process and replace raw_trailing content
651
+ #
652
+ # @param node [Hash] Node containing :raw_trailing
653
+ def process_raw_trailing!(node)
654
+ raw = node[:raw_trailing]
655
+ text = extract_raw_text(raw)
656
+
657
+ if text.strip.empty?
658
+ node[:trailing_definitions] = []
659
+ else
660
+ # Parse trailing definitions
661
+ parsed = parse_patterns_with_scope(text)
662
+ node[:trailing_definitions] = parsed
663
+ end
664
+
665
+ # Remove raw_trailing after processing
666
+ node.delete(:raw_trailing)
667
+ end
668
+
669
+ # Add metadata (both legacy and new) to grammar tree
670
+ def add_metadata_to_grammar
671
+ # Legacy namespace (backward compatibility)
672
+ @grammar_tree[:namespace] = @namespace if @namespace
673
+
674
+ # New preamble metadata (if present)
675
+ return unless @preamble
676
+
677
+ if @preamble.default_namespace
678
+ @grammar_tree[:default_namespace] =
679
+ @preamble.default_namespace
680
+ # Also set legacy namespace format for converter
681
+ @grammar_tree[:namespace] = {
682
+ namespace_uri: @preamble.default_namespace
683
+ }
684
+ end
685
+ unless @preamble.namespace_map.empty?
686
+ @grammar_tree[:namespace_map] =
687
+ @preamble.namespace_map
688
+ end
689
+ return if @preamble.datatype_map.empty?
690
+
691
+ @grammar_tree[:datatype_map] =
692
+ @preamble.datatype_map
693
+ end
694
+ end
695
+ end