rng 0.1.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +63 -0
- data/.github/workflows/release.yml +8 -3
- data/.gitignore +11 -0
- data/.rubocop.yml +10 -7
- data/.rubocop_todo.yml +229 -23
- data/CHANGELOG.md +317 -0
- data/CLAUDE.md +139 -0
- data/Gemfile +11 -12
- data/README.adoc +1538 -11
- data/Rakefile +11 -3
- data/docs/Gemfile +8 -0
- data/docs/_config.yml +23 -0
- data/docs/getting-started/index.adoc +75 -0
- data/docs/guides/error-handling.adoc +137 -0
- data/docs/guides/external-references.adoc +128 -0
- data/docs/guides/index.adoc +24 -0
- data/docs/guides/parsing-rnc.adoc +141 -0
- data/docs/guides/parsing-rng-xml.adoc +81 -0
- data/docs/guides/rng-to-rnc.adoc +101 -0
- data/docs/guides/validation.adoc +85 -0
- data/docs/index.adoc +52 -0
- data/docs/reference/api.adoc +126 -0
- data/docs/reference/cli.adoc +182 -0
- data/docs/understanding/architecture.adoc +58 -0
- data/docs/understanding/rng-vs-rnc.adoc +118 -0
- data/exe/rng +5 -0
- data/lib/rng/any_name.rb +10 -8
- data/lib/rng/attribute.rb +28 -26
- data/lib/rng/choice.rb +24 -24
- data/lib/rng/cli.rb +607 -0
- data/lib/rng/data.rb +10 -10
- data/lib/rng/datatype_declaration.rb +26 -0
- data/lib/rng/define.rb +44 -41
- data/lib/rng/div.rb +36 -0
- data/lib/rng/documentation.rb +9 -0
- data/lib/rng/element.rb +39 -37
- data/lib/rng/empty.rb +7 -7
- data/lib/rng/except.rb +25 -25
- data/lib/rng/external_ref.rb +8 -8
- data/lib/rng/external_ref_resolver.rb +602 -0
- data/lib/rng/foreign_attribute.rb +26 -0
- data/lib/rng/foreign_element.rb +33 -0
- data/lib/rng/grammar.rb +14 -12
- data/lib/rng/group.rb +26 -24
- data/lib/rng/include.rb +5 -6
- data/lib/rng/include_processor.rb +461 -0
- data/lib/rng/interleave.rb +23 -23
- data/lib/rng/list.rb +22 -22
- data/lib/rng/mixed.rb +23 -23
- data/lib/rng/name.rb +6 -7
- data/lib/rng/namespace_declaration.rb +47 -0
- data/lib/rng/namespaces.rb +15 -0
- data/lib/rng/not_allowed.rb +7 -7
- data/lib/rng/ns_name.rb +9 -9
- data/lib/rng/one_or_more.rb +23 -23
- data/lib/rng/optional.rb +23 -23
- data/lib/rng/param.rb +7 -8
- data/lib/rng/parent_ref.rb +8 -8
- data/lib/rng/parse_tree_processor.rb +695 -0
- data/lib/rng/pattern.rb +7 -7
- data/lib/rng/ref.rb +8 -8
- data/lib/rng/rnc_builder.rb +927 -0
- data/lib/rng/rnc_parser.rb +605 -305
- data/lib/rng/rnc_to_rng_converter.rb +1408 -0
- data/lib/rng/schema_preamble.rb +73 -0
- data/lib/rng/schema_validator.rb +1622 -0
- data/lib/rng/start.rb +27 -25
- data/lib/rng/test_suite_parser.rb +168 -0
- data/lib/rng/text.rb +11 -8
- data/lib/rng/to_rnc.rb +4 -35
- data/lib/rng/value.rb +6 -7
- data/lib/rng/version.rb +1 -1
- data/lib/rng/zero_or_more.rb +23 -23
- data/lib/rng.rb +68 -17
- data/rng.gemspec +18 -19
- data/scripts/extract_spectest_resources.rb +96 -0
- data/spec/fixtures/compacttest.xml +2511 -0
- data/spec/fixtures/external/circular_a.rng +7 -0
- data/spec/fixtures/external/circular_b.rng +7 -0
- data/spec/fixtures/external/circular_main.rng +7 -0
- data/spec/fixtures/external/external_ref_lib.rng +7 -0
- data/spec/fixtures/external/external_ref_main.rng +7 -0
- data/spec/fixtures/external/include_lib.rng +7 -0
- data/spec/fixtures/external/include_main.rng +3 -0
- data/spec/fixtures/external/nested_chain.rng +6 -0
- data/spec/fixtures/external/nested_leaf.rng +7 -0
- data/spec/fixtures/external/nested_mid.rng +8 -0
- data/spec/fixtures/metanorma/3gpp.rnc +35 -0
- data/spec/fixtures/metanorma/3gpp.rng +105 -0
- data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
- data/spec/fixtures/metanorma/bipm.rnc +148 -0
- data/spec/fixtures/metanorma/bipm.rng +376 -0
- data/spec/fixtures/metanorma/bsi.rnc +104 -0
- data/spec/fixtures/metanorma/bsi.rng +332 -0
- data/spec/fixtures/metanorma/csa.rnc +45 -0
- data/spec/fixtures/metanorma/csa.rng +131 -0
- data/spec/fixtures/metanorma/csd.rnc +43 -0
- data/spec/fixtures/metanorma/csd.rng +132 -0
- data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
- data/spec/fixtures/metanorma/gbstandard.rng +316 -0
- data/spec/fixtures/metanorma/iec.rnc +49 -0
- data/spec/fixtures/metanorma/iec.rng +193 -0
- data/spec/fixtures/metanorma/ietf.rnc +275 -0
- data/spec/fixtures/metanorma/ietf.rng +925 -0
- data/spec/fixtures/metanorma/iho.rnc +58 -0
- data/spec/fixtures/metanorma/iho.rng +179 -0
- data/spec/fixtures/metanorma/isodoc.rnc +873 -0
- data/spec/fixtures/metanorma/isodoc.rng +2704 -0
- data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
- data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
- data/spec/fixtures/metanorma/isostandard.rnc +166 -0
- data/spec/fixtures/metanorma/isostandard.rng +494 -0
- data/spec/fixtures/metanorma/itu.rnc +122 -0
- data/spec/fixtures/metanorma/itu.rng +377 -0
- data/spec/fixtures/metanorma/m3d.rnc +41 -0
- data/spec/fixtures/metanorma/m3d.rng +122 -0
- data/spec/fixtures/metanorma/mpfd.rnc +36 -0
- data/spec/fixtures/metanorma/mpfd.rng +95 -0
- data/spec/fixtures/metanorma/nist.rnc +77 -0
- data/spec/fixtures/metanorma/nist.rng +216 -0
- data/spec/fixtures/metanorma/ogc.rnc +51 -0
- data/spec/fixtures/metanorma/ogc.rng +151 -0
- data/spec/fixtures/metanorma/reqt.rnc +6 -0
- data/spec/fixtures/metanorma/rsd.rnc +36 -0
- data/spec/fixtures/metanorma/rsd.rng +95 -0
- data/spec/fixtures/metanorma/un.rnc +103 -0
- data/spec/fixtures/metanorma/un.rng +367 -0
- data/spec/fixtures/rnc/base.rnc +4 -0
- data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
- data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
- data/spec/fixtures/rnc/main_with_include.rnc +5 -0
- data/spec/fixtures/rnc/test_augment.rnc +10 -0
- data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
- data/spec/fixtures/rnc/top_level_include.rnc +8 -0
- data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
- data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
- data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
- data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
- data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
- data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
- data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
- data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
- data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
- data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
- data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
- data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
- data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
- data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
- data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
- data/spec/fixtures/spectest_external/resources.json +149 -0
- data/spec/rng/advanced_rnc_spec.rb +101 -0
- data/spec/rng/compacttest_spec.rb +197 -0
- data/spec/rng/datatype_declaration_spec.rb +28 -0
- data/spec/rng/div_spec.rb +207 -0
- data/spec/rng/external_ref_resolver_spec.rb +122 -0
- data/spec/rng/metanorma_conversion_spec.rb +159 -0
- data/spec/rng/namespace_declaration_spec.rb +60 -0
- data/spec/rng/namespace_support_spec.rb +199 -0
- data/spec/rng/rnc_parser_spec.rb +498 -22
- data/spec/rng/rnc_roundtrip_spec.rb +96 -82
- data/spec/rng/rng_generation_spec.rb +288 -0
- data/spec/rng/roundtrip_spec.rb +342 -0
- data/spec/rng/schema_preamble_spec.rb +145 -0
- data/spec/rng/schema_spec.rb +68 -64
- data/spec/rng/spectest_spec.rb +168 -90
- data/spec/rng_spec.rb +2 -2
- data/spec/spec_helper.rb +7 -42
- metadata +141 -8
|
@@ -0,0 +1,695 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rng
|
|
4
|
+
# Normalizes parse tree structure into consistent grammar format
|
|
5
|
+
#
|
|
6
|
+
# Handles three different RNC file structures:
|
|
7
|
+
# 1. Top-level includes (Metanorma-style)
|
|
8
|
+
# 2. Grammar block wrapper
|
|
9
|
+
# 3. Flat grammar
|
|
10
|
+
#
|
|
11
|
+
# @example Basic Usage
|
|
12
|
+
# tree = parser.parse(rnc_content)
|
|
13
|
+
# processor = ParseTreeProcessor.new(tree)
|
|
14
|
+
# normalized = processor.normalize
|
|
15
|
+
# grammar_tree = normalized.grammar_tree
|
|
16
|
+
# namespace = normalized.namespace
|
|
17
|
+
#
|
|
18
|
+
class ParseTreeProcessor
|
|
19
|
+
attr_reader :tree, :namespace, :preamble, :grammar_tree
|
|
20
|
+
|
|
21
|
+
# Initialize with parse tree
|
|
22
|
+
#
|
|
23
|
+
# @param tree [Hash] Raw parse tree from Parslet parser
|
|
24
|
+
def initialize(tree)
|
|
25
|
+
@tree = tree
|
|
26
|
+
@namespace = nil
|
|
27
|
+
@preamble = nil # NEW: SchemaPreamble object
|
|
28
|
+
@grammar_tree = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Normalize the parse tree
|
|
32
|
+
#
|
|
33
|
+
# Extracts namespace and builds consistent grammar structure
|
|
34
|
+
# regardless of input format. Processes raw override blocks.
|
|
35
|
+
#
|
|
36
|
+
# @return [self] Returns self for chaining
|
|
37
|
+
def normalize
|
|
38
|
+
@preamble = extract_preamble_section # NEW: Extract preamble first
|
|
39
|
+
@namespace = extract_namespace # KEEP: Legacy namespace extraction
|
|
40
|
+
@grammar_tree = build_grammar_tree
|
|
41
|
+
process_raw_overrides!(@grammar_tree)
|
|
42
|
+
add_metadata_to_grammar # MODIFIED: Add both old and new metadata
|
|
43
|
+
self
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
# Extract namespace from parse tree
|
|
49
|
+
#
|
|
50
|
+
# @return [String, nil] Namespace URI if present
|
|
51
|
+
def extract_namespace
|
|
52
|
+
@tree[:namespace]
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Extract preamble section and build SchemaPreamble object
|
|
56
|
+
#
|
|
57
|
+
# @return [SchemaPreamble, nil] Preamble object or nil if no preamble
|
|
58
|
+
def extract_preamble_section
|
|
59
|
+
return nil unless @tree[:preamble_items]
|
|
60
|
+
|
|
61
|
+
preamble = SchemaPreamble.new
|
|
62
|
+
|
|
63
|
+
items = @tree[:preamble_items]
|
|
64
|
+
items = [items] unless items.is_a?(Array)
|
|
65
|
+
|
|
66
|
+
items.each do |item|
|
|
67
|
+
# Skip non-Hash items (e.g., Parslet::Slice from annotation content)
|
|
68
|
+
next unless item.is_a?(Hash)
|
|
69
|
+
|
|
70
|
+
if item[:default_ns] || item[:default_prefixed_ns] || item[:prefixed_ns]
|
|
71
|
+
process_namespace_declaration(preamble, item)
|
|
72
|
+
elsif item[:prefix] && item[:uri]
|
|
73
|
+
# Datatype declaration
|
|
74
|
+
process_datatype_declaration(preamble, item)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
preamble.empty? ? nil : preamble
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Process a single namespace declaration and add to preamble
|
|
82
|
+
#
|
|
83
|
+
# @param preamble [SchemaPreamble] Preamble to add to
|
|
84
|
+
# @param item [Hash] Namespace declaration from parse tree
|
|
85
|
+
def process_namespace_declaration(preamble, item)
|
|
86
|
+
if item[:default_ns]
|
|
87
|
+
# Default namespace (unprefixed): default namespace = "uri"
|
|
88
|
+
ns_data = item[:default_ns]
|
|
89
|
+
uri = extract_string_literal(ns_data[:uri])
|
|
90
|
+
preamble.add_namespace(
|
|
91
|
+
NamespaceDeclaration.new(uri: uri, is_default: true)
|
|
92
|
+
)
|
|
93
|
+
elsif item[:default_prefixed_ns]
|
|
94
|
+
# Default namespace (prefixed): default namespace prefix = "uri"
|
|
95
|
+
ns_data = item[:default_prefixed_ns]
|
|
96
|
+
prefix = extract_identifier(ns_data[:prefix])
|
|
97
|
+
uri = extract_string_literal(ns_data[:uri])
|
|
98
|
+
preamble.add_namespace(
|
|
99
|
+
NamespaceDeclaration.new(prefix: prefix, uri: uri, is_default: true)
|
|
100
|
+
)
|
|
101
|
+
elsif item[:prefixed_ns]
|
|
102
|
+
# Prefixed namespace: namespace prefix = "uri"
|
|
103
|
+
ns_data = item[:prefixed_ns]
|
|
104
|
+
prefix = extract_identifier(ns_data[:prefix])
|
|
105
|
+
uri = extract_string_literal(ns_data[:uri])
|
|
106
|
+
preamble.add_namespace(
|
|
107
|
+
NamespaceDeclaration.new(prefix: prefix, uri: uri)
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Process a single datatype library declaration and add to preamble
|
|
113
|
+
#
|
|
114
|
+
# @param preamble [SchemaPreamble] Preamble to add to
|
|
115
|
+
# @param item [Hash] Datatype declaration from parse tree
|
|
116
|
+
def process_datatype_declaration(preamble, item)
|
|
117
|
+
prefix = extract_identifier(item[:prefix])
|
|
118
|
+
uri = extract_string_literal(item[:uri])
|
|
119
|
+
preamble.add_datatype(
|
|
120
|
+
DatatypeDeclaration.new(prefix: prefix, uri: uri)
|
|
121
|
+
)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Extract identifier from identifier parts
|
|
125
|
+
#
|
|
126
|
+
# @param id [Hash] Identifier with :identifier_parts
|
|
127
|
+
# @return [String] Extracted identifier
|
|
128
|
+
def extract_identifier(id)
|
|
129
|
+
return '' unless id && id[:identifier_parts]
|
|
130
|
+
|
|
131
|
+
id[:identifier_parts].map do |part|
|
|
132
|
+
if part[:char]
|
|
133
|
+
extract_parslet_string(part[:char])
|
|
134
|
+
elsif part[:hex_escape]
|
|
135
|
+
# Handle hex escape: \x{HEX}
|
|
136
|
+
hex_str = extract_parslet_string(part[:hex_escape][:hex])
|
|
137
|
+
[hex_str.to_i(16)].pack('U')
|
|
138
|
+
else
|
|
139
|
+
''
|
|
140
|
+
end
|
|
141
|
+
end.join
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Extract string literal with concatenations
|
|
145
|
+
#
|
|
146
|
+
# @param lit [Hash] String literal with :string_parts and :concatenations
|
|
147
|
+
# @return [String] Extracted string
|
|
148
|
+
def extract_string_literal(lit)
|
|
149
|
+
return '' unless lit
|
|
150
|
+
|
|
151
|
+
result = extract_string_parts(lit[:string_parts])
|
|
152
|
+
|
|
153
|
+
# Handle concatenations if present
|
|
154
|
+
if lit[:concatenations].is_a?(Array)
|
|
155
|
+
lit[:concatenations].each do |concat|
|
|
156
|
+
result += extract_string_parts(concat[:concat_string_parts])
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
result
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Extract documentation comments from parse tree node
|
|
164
|
+
#
|
|
165
|
+
# @param node [Hash] Node that may contain :docs
|
|
166
|
+
# @return [String, nil] Documentation text or nil
|
|
167
|
+
def extract_documentation(node)
|
|
168
|
+
return nil unless node.is_a?(Hash) && node[:docs]
|
|
169
|
+
|
|
170
|
+
doc_lines = node[:docs][:documentation]
|
|
171
|
+
return nil unless doc_lines
|
|
172
|
+
|
|
173
|
+
doc_lines = [doc_lines] unless doc_lines.is_a?(Array)
|
|
174
|
+
|
|
175
|
+
doc_lines.map do |line|
|
|
176
|
+
if line[:doc_line]
|
|
177
|
+
extract_parslet_string(line[:doc_line])
|
|
178
|
+
else
|
|
179
|
+
''
|
|
180
|
+
end
|
|
181
|
+
end.join("\n").strip
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Extract annotations from parse tree node
|
|
185
|
+
#
|
|
186
|
+
# @param node [Hash] Node that may contain :annotations
|
|
187
|
+
# @return [Hash] Hash with :attributes and :elements arrays
|
|
188
|
+
RNG_NAMESPACE = 'http://relaxng.org/ns/structure/1.0'
|
|
189
|
+
|
|
190
|
+
def extract_annotations(node)
|
|
191
|
+
result = { attributes: [], elements: [] }
|
|
192
|
+
return result unless node.is_a?(Hash) && node[:annotations]
|
|
193
|
+
|
|
194
|
+
# Get first annotation and additional ones
|
|
195
|
+
annotations = []
|
|
196
|
+
ann_block = node[:annotations]
|
|
197
|
+
|
|
198
|
+
# First annotation item (if present)
|
|
199
|
+
if ann_block.is_a?(Hash)
|
|
200
|
+
first_ann = ann_block.except(:more_annotations)
|
|
201
|
+
annotations << first_ann unless first_ann.empty?
|
|
202
|
+
|
|
203
|
+
# Additional annotations
|
|
204
|
+
if ann_block[:more_annotations]
|
|
205
|
+
more = ann_block[:more_annotations]
|
|
206
|
+
more = [more] unless more.is_a?(Array)
|
|
207
|
+
annotations.concat(more)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Track seen attribute names for duplicate detection (TC 11-12)
|
|
212
|
+
seen_attrs = {}
|
|
213
|
+
|
|
214
|
+
# Process each annotation
|
|
215
|
+
annotations.each do |ann|
|
|
216
|
+
next unless ann.is_a?(Hash) && ann[:ann_name]
|
|
217
|
+
|
|
218
|
+
name_parts = extract_qualified_name(ann[:ann_name])
|
|
219
|
+
|
|
220
|
+
if ann[:attr_value]
|
|
221
|
+
# Foreign attribute
|
|
222
|
+
value = extract_string_literal(ann[:attr_value])
|
|
223
|
+
|
|
224
|
+
# TC 11-12: Check for duplicate annotation attributes
|
|
225
|
+
attr_key = "#{name_parts[:prefix]}:#{name_parts[:local]}"
|
|
226
|
+
raise StandardError, "duplicate annotation attribute '#{attr_key}'" if seen_attrs.key?(attr_key)
|
|
227
|
+
|
|
228
|
+
seen_attrs[attr_key] = true
|
|
229
|
+
|
|
230
|
+
# TC 18: xmlns attribute is forbidden in annotations
|
|
231
|
+
if name_parts[:local] == 'xmlns' && name_parts[:prefix].nil?
|
|
232
|
+
raise StandardError,
|
|
233
|
+
'xmlns attribute is not allowed in annotations'
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# TC 70-71: RNG namespace attributes forbidden
|
|
237
|
+
if name_parts[:prefix] && @namespace_prefixes
|
|
238
|
+
ns_uri = @namespace_prefixes[name_parts[:prefix]]
|
|
239
|
+
raise StandardError, 'attributes in the RELAX NG namespace are not allowed' if ns_uri == RNG_NAMESPACE
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
result[:attributes] << {
|
|
243
|
+
name: name_parts[:local],
|
|
244
|
+
namespace: name_parts[:prefix],
|
|
245
|
+
value: value
|
|
246
|
+
}
|
|
247
|
+
elsif ann.key?(:elem_content)
|
|
248
|
+
# Foreign element
|
|
249
|
+
content_data = extract_annotation_content(ann[:elem_content])
|
|
250
|
+
|
|
251
|
+
# TC 70-71: RNG namespace elements forbidden
|
|
252
|
+
if name_parts[:prefix] && @namespace_prefixes
|
|
253
|
+
ns_uri = @namespace_prefixes[name_parts[:prefix]]
|
|
254
|
+
raise StandardError, 'elements in the RELAX NG namespace are not allowed' if ns_uri == RNG_NAMESPACE
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
result[:elements] << {
|
|
258
|
+
name: name_parts[:local],
|
|
259
|
+
namespace: name_parts[:prefix],
|
|
260
|
+
content: content_data[:text],
|
|
261
|
+
attributes: content_data[:attributes],
|
|
262
|
+
elements: content_data[:elements]
|
|
263
|
+
}
|
|
264
|
+
end
|
|
265
|
+
end
|
|
266
|
+
|
|
267
|
+
result
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Extract qualified name (prefix:local or just local)
|
|
271
|
+
#
|
|
272
|
+
# @param qname [Hash] Qualified name from parse tree
|
|
273
|
+
# @return [Hash] Hash with :prefix and :local keys
|
|
274
|
+
def extract_qualified_name(qname)
|
|
275
|
+
return { prefix: nil, local: '' } unless qname
|
|
276
|
+
|
|
277
|
+
prefix = nil
|
|
278
|
+
prefix = extract_identifier(qname[:prefix]) if qname[:prefix]
|
|
279
|
+
|
|
280
|
+
local = extract_identifier(qname[:local_name])
|
|
281
|
+
|
|
282
|
+
{ prefix: prefix, local: local }
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
# Extract annotation content (text and nested items)
|
|
286
|
+
#
|
|
287
|
+
# @param content [Hash, nil] Annotation content from parse tree
|
|
288
|
+
# @return [Hash] Hash with :text, :attributes, :elements
|
|
289
|
+
def extract_annotation_content(content)
|
|
290
|
+
result = { text: '', attributes: [], elements: [] }
|
|
291
|
+
return result if content.nil?
|
|
292
|
+
|
|
293
|
+
items = []
|
|
294
|
+
|
|
295
|
+
# Get first item
|
|
296
|
+
items << content[:first] if content[:first]
|
|
297
|
+
|
|
298
|
+
# Get rest of items
|
|
299
|
+
if content[:rest]
|
|
300
|
+
rest = content[:rest]
|
|
301
|
+
rest = [rest] unless rest.is_a?(Array)
|
|
302
|
+
items.concat(rest)
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
# Process each item
|
|
306
|
+
text_parts = []
|
|
307
|
+
items.each do |item|
|
|
308
|
+
if item[:text]
|
|
309
|
+
# String literal
|
|
310
|
+
text_parts << extract_string_literal(item[:text])
|
|
311
|
+
elsif item[:ann_name]
|
|
312
|
+
# Nested annotation item
|
|
313
|
+
nested = extract_annotations({ annotations: item })
|
|
314
|
+
result[:attributes].concat(nested[:attributes])
|
|
315
|
+
result[:elements].concat(nested[:elements])
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
result[:text] = text_parts.join unless text_parts.empty?
|
|
320
|
+
result
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Extract string from string_parts array
|
|
324
|
+
#
|
|
325
|
+
# @param parts [Array, String] String parts
|
|
326
|
+
# @return [String] Extracted string
|
|
327
|
+
def extract_string_parts(parts)
|
|
328
|
+
return '' unless parts
|
|
329
|
+
return parts if parts.is_a?(String)
|
|
330
|
+
return parts.str if parts.respond_to?(:str)
|
|
331
|
+
|
|
332
|
+
return '' unless parts.is_a?(Array)
|
|
333
|
+
|
|
334
|
+
parts.map do |part|
|
|
335
|
+
if part.is_a?(String)
|
|
336
|
+
part
|
|
337
|
+
elsif part.respond_to?(:str)
|
|
338
|
+
part.str
|
|
339
|
+
elsif part[:hex_escape]
|
|
340
|
+
# Handle \x{HEX}
|
|
341
|
+
hex_str = extract_parslet_string(part[:hex_escape][:hex])
|
|
342
|
+
[hex_str.to_i(16)].pack('U')
|
|
343
|
+
elsif part[:char_escape]
|
|
344
|
+
# Handle \", \\, \n, \r, \t, and RELAX NG class escapes \i, \c, \d, \w
|
|
345
|
+
char = extract_parslet_string(part[:char_escape][:char])
|
|
346
|
+
case char
|
|
347
|
+
when '"' then '"'
|
|
348
|
+
when '\\' then '\\'
|
|
349
|
+
when 'n' then "\n"
|
|
350
|
+
when 'r' then "\r"
|
|
351
|
+
when 't' then "\t"
|
|
352
|
+
when 'i' then '\\i'
|
|
353
|
+
when 'c' then '\\c'
|
|
354
|
+
when 'd' then '\\d'
|
|
355
|
+
when 'w' then '\\w'
|
|
356
|
+
else char
|
|
357
|
+
end
|
|
358
|
+
elsif part[:char]
|
|
359
|
+
# Regular character (plain char in string literal)
|
|
360
|
+
extract_parslet_string(part[:char])
|
|
361
|
+
else
|
|
362
|
+
part.to_s
|
|
363
|
+
end
|
|
364
|
+
end.join
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Extract string from Parslet::Slice or String
|
|
368
|
+
#
|
|
369
|
+
# @param obj [Parslet::Slice, String] Object to extract
|
|
370
|
+
# @return [String] Extracted string
|
|
371
|
+
def extract_parslet_string(obj)
|
|
372
|
+
obj.respond_to?(:str) ? obj.str : obj.to_s
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
# Build normalized grammar tree
|
|
376
|
+
#
|
|
377
|
+
# Handles different tree structures:
|
|
378
|
+
# - Top-level includes: Creates empty grammar
|
|
379
|
+
# - Grammar block: Extracts inner grammar
|
|
380
|
+
# - Flat: Uses tree as-is
|
|
381
|
+
#
|
|
382
|
+
# @return [Hash] Normalized grammar tree
|
|
383
|
+
def build_grammar_tree
|
|
384
|
+
# Process raw_trailing if present (needs to happen before tree building)
|
|
385
|
+
process_raw_trailing!(@tree) if @tree[:raw_trailing]
|
|
386
|
+
|
|
387
|
+
if top_level_includes?
|
|
388
|
+
build_top_level_includes_grammar
|
|
389
|
+
elsif grammar_block?
|
|
390
|
+
build_grammar_block_grammar
|
|
391
|
+
else
|
|
392
|
+
build_flat_grammar
|
|
393
|
+
end
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
# Check if tree has top-level includes
|
|
397
|
+
#
|
|
398
|
+
# @return [Boolean]
|
|
399
|
+
def top_level_includes?
|
|
400
|
+
@tree.key?(:top_includes)
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
# Check if tree has grammar block wrapper
|
|
404
|
+
#
|
|
405
|
+
# @return [Boolean]
|
|
406
|
+
def grammar_block?
|
|
407
|
+
@tree.key?(:inner_grammar)
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Build grammar for top-level includes structure
|
|
411
|
+
#
|
|
412
|
+
# @return [Hash]
|
|
413
|
+
def build_top_level_includes_grammar
|
|
414
|
+
definitions = []
|
|
415
|
+
|
|
416
|
+
# Add the top-level includes first
|
|
417
|
+
definitions.concat(@tree[:top_includes]) if @tree[:top_includes]
|
|
418
|
+
|
|
419
|
+
# Then add any trailing definitions
|
|
420
|
+
definitions.concat(@tree[:trailing_definitions]) if @tree[:trailing_definitions]
|
|
421
|
+
|
|
422
|
+
{
|
|
423
|
+
start: nil,
|
|
424
|
+
includes: @tree[:top_includes] || [],
|
|
425
|
+
definitions: definitions
|
|
426
|
+
}
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Build grammar for grammar block structure
|
|
430
|
+
#
|
|
431
|
+
# @return [Hash]
|
|
432
|
+
def build_grammar_block_grammar
|
|
433
|
+
grammar = @tree[:inner_grammar].dup
|
|
434
|
+
|
|
435
|
+
# Normalize :includes and :patterns into :definitions
|
|
436
|
+
if grammar.key?(:includes) || grammar.key?(:patterns)
|
|
437
|
+
definitions = []
|
|
438
|
+
definitions.concat(grammar.delete(:includes)) if grammar[:includes]
|
|
439
|
+
definitions.concat(grammar.delete(:patterns)) if grammar[:patterns]
|
|
440
|
+
grammar[:definitions] = definitions unless definitions.empty?
|
|
441
|
+
end
|
|
442
|
+
|
|
443
|
+
merge_trailing_definitions(grammar)
|
|
444
|
+
grammar
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
# Build grammar for flat structure
|
|
448
|
+
#
|
|
449
|
+
# @return [Hash]
|
|
450
|
+
def build_flat_grammar
|
|
451
|
+
grammar = @tree.dup
|
|
452
|
+
|
|
453
|
+
# Normalize :includes and :patterns into :definitions for flat grammars too
|
|
454
|
+
if grammar.key?(:includes) || grammar.key?(:patterns)
|
|
455
|
+
definitions = []
|
|
456
|
+
definitions.concat(grammar.delete(:includes)) if grammar[:includes]
|
|
457
|
+
definitions.concat(grammar.delete(:patterns)) if grammar[:patterns]
|
|
458
|
+
grammar[:definitions] = definitions unless definitions.empty?
|
|
459
|
+
end
|
|
460
|
+
|
|
461
|
+
grammar
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
# Merge trailing definitions into grammar
|
|
465
|
+
#
|
|
466
|
+
# @param grammar [Hash] Grammar to merge into
|
|
467
|
+
def merge_trailing_definitions(grammar)
|
|
468
|
+
return unless @tree[:trailing_definitions] && !@tree[:trailing_definitions].empty?
|
|
469
|
+
|
|
470
|
+
grammar[:definitions] ||= []
|
|
471
|
+
grammar[:definitions].concat(@tree[:trailing_definitions])
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# Process raw override and grammar blocks recursively
|
|
475
|
+
#
|
|
476
|
+
# @param node [Hash, Array] Tree node to process
|
|
477
|
+
def process_raw_overrides!(node)
|
|
478
|
+
case node
|
|
479
|
+
when Hash
|
|
480
|
+
# Check for raw_override that needs parsing
|
|
481
|
+
parse_and_replace_override!(node) if node[:override]&.dig(:raw_override)
|
|
482
|
+
|
|
483
|
+
# Check for raw_grammar that needs parsing (in grammar_block)
|
|
484
|
+
parse_and_replace_grammar!(node) if node[:raw_grammar]
|
|
485
|
+
|
|
486
|
+
# Check for raw_patterns that need parsing (in flat grammar)
|
|
487
|
+
parse_and_replace_patterns!(node) if node[:raw_patterns]
|
|
488
|
+
|
|
489
|
+
# Recursively process all hash values
|
|
490
|
+
node.each_value { |v| process_raw_overrides!(v) }
|
|
491
|
+
when Array
|
|
492
|
+
# Recursively process array elements
|
|
493
|
+
node.each { |item| process_raw_overrides!(item) }
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
# Parse raw override and replace in-place
|
|
498
|
+
#
|
|
499
|
+
# @param node [Hash] Node containing :override with :raw_override
|
|
500
|
+
def parse_and_replace_override!(node)
|
|
501
|
+
raw = node[:override][:raw_override]
|
|
502
|
+
text = extract_raw_text(raw)
|
|
503
|
+
|
|
504
|
+
if text.strip.empty?
|
|
505
|
+
# Empty override - remove it
|
|
506
|
+
node.delete(:override)
|
|
507
|
+
else
|
|
508
|
+
# Parse with proper scoping
|
|
509
|
+
parsed = parse_override_with_scope(text)
|
|
510
|
+
node[:override] = parsed
|
|
511
|
+
end
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
# Extract text from raw_override (array of Parslet::Slice objects)
|
|
515
|
+
#
|
|
516
|
+
# @param raw [Array, Parslet::Slice, String] Raw content
|
|
517
|
+
# @return [String] Extracted text
|
|
518
|
+
def extract_raw_text(raw)
|
|
519
|
+
case raw
|
|
520
|
+
when Array
|
|
521
|
+
raw.map { |item| item.respond_to?(:str) ? item.str : item.to_s }.join
|
|
522
|
+
when String
|
|
523
|
+
raw
|
|
524
|
+
else
|
|
525
|
+
raw.respond_to?(:str) ? raw.str : raw.to_s
|
|
526
|
+
end
|
|
527
|
+
end
|
|
528
|
+
|
|
529
|
+
# Parse and replace raw grammar block
|
|
530
|
+
#
|
|
531
|
+
# @param node [Hash] Node containing :raw_grammar
|
|
532
|
+
def parse_and_replace_grammar!(node)
|
|
533
|
+
raw = node[:raw_grammar]
|
|
534
|
+
text = extract_raw_text(raw)
|
|
535
|
+
|
|
536
|
+
# Remove raw_grammar first
|
|
537
|
+
node.delete(:raw_grammar)
|
|
538
|
+
|
|
539
|
+
if text.strip.empty?
|
|
540
|
+
# Empty grammar - use empty structure
|
|
541
|
+
node.merge!(start: nil, includes: [], patterns: [])
|
|
542
|
+
else
|
|
543
|
+
# Parse with proper scoping
|
|
544
|
+
parsed = parse_grammar_with_scope(text)
|
|
545
|
+
# If the node is already an inner_grammar (has raw_grammar as its only key),
|
|
546
|
+
# merge parsed result directly into the node instead of nesting
|
|
547
|
+
if node.empty?
|
|
548
|
+
node.merge!(parsed)
|
|
549
|
+
else
|
|
550
|
+
node[:inner_grammar] = parsed
|
|
551
|
+
end
|
|
552
|
+
end
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
# Parse override content with proper scoping
|
|
556
|
+
#
|
|
557
|
+
# Uses a scoped grammar: start + patterns (no includes)
|
|
558
|
+
#
|
|
559
|
+
# @param text [String] Override block content
|
|
560
|
+
# @return [Hash] Parsed structure with :start and :patterns
|
|
561
|
+
def parse_override_with_scope(text)
|
|
562
|
+
# Create temporary parser with override-specific root
|
|
563
|
+
parser = Rng::RncParser.new
|
|
564
|
+
|
|
565
|
+
# Parse using grammar rule (which is what override contains)
|
|
566
|
+
# Grammar contains: start (optional) + includes (skip) + patterns
|
|
567
|
+
result = parser.grammar.parse(text.strip)
|
|
568
|
+
|
|
569
|
+
{
|
|
570
|
+
start: result[:start],
|
|
571
|
+
patterns: result[:patterns] || []
|
|
572
|
+
}
|
|
573
|
+
rescue Parslet::ParseFailed => e
|
|
574
|
+
# Graceful fallback for parse errors
|
|
575
|
+
# Warnings suppressed by default as fallback behavior is correct and intentional
|
|
576
|
+
# Set RNG_VERBOSE=1 to enable warnings for debugging
|
|
577
|
+
warn "Warning: Failed to parse override block: #{e.message}" if ENV['RNG_VERBOSE']
|
|
578
|
+
{ start: nil, patterns: [] }
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
# Parse grammar content with proper scoping
|
|
582
|
+
#
|
|
583
|
+
# Uses full grammar rule: start + includes + patterns
|
|
584
|
+
#
|
|
585
|
+
# @param text [String] Grammar block content
|
|
586
|
+
# @return [Hash] Parsed structure
|
|
587
|
+
def parse_grammar_with_scope(text)
|
|
588
|
+
parser = Rng::RncParser.new
|
|
589
|
+
|
|
590
|
+
# Parse using grammar rule
|
|
591
|
+
parser.grammar.parse(text.strip)
|
|
592
|
+
|
|
593
|
+
# Return grammar structure
|
|
594
|
+
rescue Parslet::ParseFailed => e
|
|
595
|
+
# Graceful fallback for parse errors
|
|
596
|
+
# Warnings suppressed by default as fallback behavior is correct and intentional
|
|
597
|
+
# Set RNG_VERBOSE=1 to enable warnings for debugging
|
|
598
|
+
warn "Warning: Failed to parse grammar block: #{e.message}" if ENV['RNG_VERBOSE']
|
|
599
|
+
{ start: nil, includes: [], patterns: [] }
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
# Parse and replace raw patterns in flat grammar
|
|
603
|
+
#
|
|
604
|
+
# @param node [Hash] Node containing :raw_patterns
|
|
605
|
+
def parse_and_replace_patterns!(node)
|
|
606
|
+
raw = node[:raw_patterns]
|
|
607
|
+
text = extract_raw_text(raw)
|
|
608
|
+
|
|
609
|
+
if text.strip.empty?
|
|
610
|
+
# Empty patterns
|
|
611
|
+
node[:patterns] = []
|
|
612
|
+
else
|
|
613
|
+
# Parse patterns content with proper scoping
|
|
614
|
+
parsed = parse_patterns_with_scope(text)
|
|
615
|
+
node[:patterns] = parsed
|
|
616
|
+
end
|
|
617
|
+
|
|
618
|
+
# Remove raw_patterns after processing
|
|
619
|
+
node.delete(:raw_patterns)
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
# Parse patterns content with proper scoping
|
|
623
|
+
#
|
|
624
|
+
# Parses multiple patterns (named_pattern | div | element)*
|
|
625
|
+
#
|
|
626
|
+
# @param text [String] Patterns content
|
|
627
|
+
# @return [Array] Parsed patterns
|
|
628
|
+
def parse_patterns_with_scope(text)
|
|
629
|
+
parser = Rng::RncParser.new
|
|
630
|
+
|
|
631
|
+
# Create a custom rule for patterns only
|
|
632
|
+
# We need to parse: (named_pattern | div_block | element_def)*
|
|
633
|
+
patterns_rule = (
|
|
634
|
+
(parser.named_pattern | parser.div_block.as(:div) | parser.element_def.as(:top_element)) >>
|
|
635
|
+
parser.whitespace
|
|
636
|
+
).repeat
|
|
637
|
+
|
|
638
|
+
result = patterns_rule.parse(text.strip)
|
|
639
|
+
|
|
640
|
+
# Result should be an array of patterns
|
|
641
|
+
result.is_a?(Array) ? result : [result]
|
|
642
|
+
rescue Parslet::ParseFailed => e
|
|
643
|
+
# Graceful fallback for parse errors
|
|
644
|
+
# Warnings suppressed by default as fallback behavior is correct and intentional
|
|
645
|
+
# Set RNG_VERBOSE=1 to enable warnings for debugging
|
|
646
|
+
warn "Warning: Failed to parse patterns: #{e.message}" if ENV['RNG_VERBOSE']
|
|
647
|
+
[]
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
# Process and replace raw_trailing content
|
|
651
|
+
#
|
|
652
|
+
# @param node [Hash] Node containing :raw_trailing
|
|
653
|
+
def process_raw_trailing!(node)
|
|
654
|
+
raw = node[:raw_trailing]
|
|
655
|
+
text = extract_raw_text(raw)
|
|
656
|
+
|
|
657
|
+
if text.strip.empty?
|
|
658
|
+
node[:trailing_definitions] = []
|
|
659
|
+
else
|
|
660
|
+
# Parse trailing definitions
|
|
661
|
+
parsed = parse_patterns_with_scope(text)
|
|
662
|
+
node[:trailing_definitions] = parsed
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
# Remove raw_trailing after processing
|
|
666
|
+
node.delete(:raw_trailing)
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
# Add metadata (both legacy and new) to grammar tree
|
|
670
|
+
def add_metadata_to_grammar
|
|
671
|
+
# Legacy namespace (backward compatibility)
|
|
672
|
+
@grammar_tree[:namespace] = @namespace if @namespace
|
|
673
|
+
|
|
674
|
+
# New preamble metadata (if present)
|
|
675
|
+
return unless @preamble
|
|
676
|
+
|
|
677
|
+
if @preamble.default_namespace
|
|
678
|
+
@grammar_tree[:default_namespace] =
|
|
679
|
+
@preamble.default_namespace
|
|
680
|
+
# Also set legacy namespace format for converter
|
|
681
|
+
@grammar_tree[:namespace] = {
|
|
682
|
+
namespace_uri: @preamble.default_namespace
|
|
683
|
+
}
|
|
684
|
+
end
|
|
685
|
+
unless @preamble.namespace_map.empty?
|
|
686
|
+
@grammar_tree[:namespace_map] =
|
|
687
|
+
@preamble.namespace_map
|
|
688
|
+
end
|
|
689
|
+
return if @preamble.datatype_map.empty?
|
|
690
|
+
|
|
691
|
+
@grammar_tree[:datatype_map] =
|
|
692
|
+
@preamble.datatype_map
|
|
693
|
+
end
|
|
694
|
+
end
|
|
695
|
+
end
|