rng 0.1.2 → 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/docs.yml +63 -0
- data/.github/workflows/release.yml +8 -3
- data/.gitignore +11 -0
- data/.rubocop.yml +10 -7
- data/.rubocop_todo.yml +229 -23
- data/CHANGELOG.md +317 -0
- data/CLAUDE.md +139 -0
- data/Gemfile +11 -12
- data/README.adoc +1538 -11
- data/Rakefile +11 -3
- data/docs/Gemfile +8 -0
- data/docs/_config.yml +23 -0
- data/docs/getting-started/index.adoc +75 -0
- data/docs/guides/error-handling.adoc +137 -0
- data/docs/guides/external-references.adoc +128 -0
- data/docs/guides/index.adoc +24 -0
- data/docs/guides/parsing-rnc.adoc +141 -0
- data/docs/guides/parsing-rng-xml.adoc +81 -0
- data/docs/guides/rng-to-rnc.adoc +101 -0
- data/docs/guides/validation.adoc +85 -0
- data/docs/index.adoc +52 -0
- data/docs/reference/api.adoc +126 -0
- data/docs/reference/cli.adoc +182 -0
- data/docs/understanding/architecture.adoc +58 -0
- data/docs/understanding/rng-vs-rnc.adoc +118 -0
- data/exe/rng +5 -0
- data/lib/rng/any_name.rb +10 -8
- data/lib/rng/attribute.rb +28 -26
- data/lib/rng/choice.rb +24 -24
- data/lib/rng/cli.rb +607 -0
- data/lib/rng/data.rb +10 -10
- data/lib/rng/datatype_declaration.rb +26 -0
- data/lib/rng/define.rb +44 -41
- data/lib/rng/div.rb +36 -0
- data/lib/rng/documentation.rb +9 -0
- data/lib/rng/element.rb +39 -37
- data/lib/rng/empty.rb +7 -7
- data/lib/rng/except.rb +25 -25
- data/lib/rng/external_ref.rb +8 -8
- data/lib/rng/external_ref_resolver.rb +602 -0
- data/lib/rng/foreign_attribute.rb +26 -0
- data/lib/rng/foreign_element.rb +33 -0
- data/lib/rng/grammar.rb +14 -12
- data/lib/rng/group.rb +26 -24
- data/lib/rng/include.rb +5 -6
- data/lib/rng/include_processor.rb +461 -0
- data/lib/rng/interleave.rb +23 -23
- data/lib/rng/list.rb +22 -22
- data/lib/rng/mixed.rb +23 -23
- data/lib/rng/name.rb +6 -7
- data/lib/rng/namespace_declaration.rb +47 -0
- data/lib/rng/namespaces.rb +15 -0
- data/lib/rng/not_allowed.rb +7 -7
- data/lib/rng/ns_name.rb +9 -9
- data/lib/rng/one_or_more.rb +23 -23
- data/lib/rng/optional.rb +23 -23
- data/lib/rng/param.rb +7 -8
- data/lib/rng/parent_ref.rb +8 -8
- data/lib/rng/parse_tree_processor.rb +695 -0
- data/lib/rng/pattern.rb +7 -7
- data/lib/rng/ref.rb +8 -8
- data/lib/rng/rnc_builder.rb +927 -0
- data/lib/rng/rnc_parser.rb +605 -305
- data/lib/rng/rnc_to_rng_converter.rb +1408 -0
- data/lib/rng/schema_preamble.rb +73 -0
- data/lib/rng/schema_validator.rb +1622 -0
- data/lib/rng/start.rb +27 -25
- data/lib/rng/test_suite_parser.rb +168 -0
- data/lib/rng/text.rb +11 -8
- data/lib/rng/to_rnc.rb +4 -35
- data/lib/rng/value.rb +6 -7
- data/lib/rng/version.rb +1 -1
- data/lib/rng/zero_or_more.rb +23 -23
- data/lib/rng.rb +68 -17
- data/rng.gemspec +18 -19
- data/scripts/extract_spectest_resources.rb +96 -0
- data/spec/fixtures/compacttest.xml +2511 -0
- data/spec/fixtures/external/circular_a.rng +7 -0
- data/spec/fixtures/external/circular_b.rng +7 -0
- data/spec/fixtures/external/circular_main.rng +7 -0
- data/spec/fixtures/external/external_ref_lib.rng +7 -0
- data/spec/fixtures/external/external_ref_main.rng +7 -0
- data/spec/fixtures/external/include_lib.rng +7 -0
- data/spec/fixtures/external/include_main.rng +3 -0
- data/spec/fixtures/external/nested_chain.rng +6 -0
- data/spec/fixtures/external/nested_leaf.rng +7 -0
- data/spec/fixtures/external/nested_mid.rng +8 -0
- data/spec/fixtures/metanorma/3gpp.rnc +35 -0
- data/spec/fixtures/metanorma/3gpp.rng +105 -0
- data/spec/fixtures/metanorma/basicdoc.rnc +11 -0
- data/spec/fixtures/metanorma/bipm.rnc +148 -0
- data/spec/fixtures/metanorma/bipm.rng +376 -0
- data/spec/fixtures/metanorma/bsi.rnc +104 -0
- data/spec/fixtures/metanorma/bsi.rng +332 -0
- data/spec/fixtures/metanorma/csa.rnc +45 -0
- data/spec/fixtures/metanorma/csa.rng +131 -0
- data/spec/fixtures/metanorma/csd.rnc +43 -0
- data/spec/fixtures/metanorma/csd.rng +132 -0
- data/spec/fixtures/metanorma/gbstandard.rnc +99 -0
- data/spec/fixtures/metanorma/gbstandard.rng +316 -0
- data/spec/fixtures/metanorma/iec.rnc +49 -0
- data/spec/fixtures/metanorma/iec.rng +193 -0
- data/spec/fixtures/metanorma/ietf.rnc +275 -0
- data/spec/fixtures/metanorma/ietf.rng +925 -0
- data/spec/fixtures/metanorma/iho.rnc +58 -0
- data/spec/fixtures/metanorma/iho.rng +179 -0
- data/spec/fixtures/metanorma/isodoc.rnc +873 -0
- data/spec/fixtures/metanorma/isodoc.rng +2704 -0
- data/spec/fixtures/metanorma/isostandard-amd.rnc +43 -0
- data/spec/fixtures/metanorma/isostandard-amd.rng +108 -0
- data/spec/fixtures/metanorma/isostandard.rnc +166 -0
- data/spec/fixtures/metanorma/isostandard.rng +494 -0
- data/spec/fixtures/metanorma/itu.rnc +122 -0
- data/spec/fixtures/metanorma/itu.rng +377 -0
- data/spec/fixtures/metanorma/m3d.rnc +41 -0
- data/spec/fixtures/metanorma/m3d.rng +122 -0
- data/spec/fixtures/metanorma/mpfd.rnc +36 -0
- data/spec/fixtures/metanorma/mpfd.rng +95 -0
- data/spec/fixtures/metanorma/nist.rnc +77 -0
- data/spec/fixtures/metanorma/nist.rng +216 -0
- data/spec/fixtures/metanorma/ogc.rnc +51 -0
- data/spec/fixtures/metanorma/ogc.rng +151 -0
- data/spec/fixtures/metanorma/reqt.rnc +6 -0
- data/spec/fixtures/metanorma/rsd.rnc +36 -0
- data/spec/fixtures/metanorma/rsd.rng +95 -0
- data/spec/fixtures/metanorma/un.rnc +103 -0
- data/spec/fixtures/metanorma/un.rng +367 -0
- data/spec/fixtures/rnc/base.rnc +4 -0
- data/spec/fixtures/rnc/grammar_with_trailing.rnc +8 -0
- data/spec/fixtures/rnc/main_include_trailing.rnc +3 -0
- data/spec/fixtures/rnc/main_with_include.rnc +5 -0
- data/spec/fixtures/rnc/test_augment.rnc +10 -0
- data/spec/fixtures/rnc/test_isodoc_simple.rnc +9 -0
- data/spec/fixtures/rnc/top_level_include.rnc +8 -0
- data/spec/fixtures/spectest_external/case_10_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_10_4.7/y +7 -0
- data/spec/fixtures/spectest_external/case_11_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_12_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_13_4.7/x +3 -0
- data/spec/fixtures/spectest_external/case_13_4.7/y +3 -0
- data/spec/fixtures/spectest_external/case_14_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_15_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_16_4.7/x +5 -0
- data/spec/fixtures/spectest_external/case_17_4.7/x +5 -0
- data/spec/fixtures/spectest_external/case_18_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_19_4.7/level1.rng +9 -0
- data/spec/fixtures/spectest_external/case_19_4.7/level2.rng +7 -0
- data/spec/fixtures/spectest_external/case_1_4.5/sub1/x +3 -0
- data/spec/fixtures/spectest_external/case_1_4.5/sub3/x +3 -0
- data/spec/fixtures/spectest_external/case_1_4.5/x +3 -0
- data/spec/fixtures/spectest_external/case_20_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_2_4.5/x +3 -0
- data/spec/fixtures/spectest_external/case_3_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_4_4.6/x +3 -0
- data/spec/fixtures/spectest_external/case_5_4.6/x +1 -0
- data/spec/fixtures/spectest_external/case_6_4.6/x +5 -0
- data/spec/fixtures/spectest_external/case_7_4.6/x +1 -0
- data/spec/fixtures/spectest_external/case_7_4.6/y +1 -0
- data/spec/fixtures/spectest_external/case_8_4.7/x +7 -0
- data/spec/fixtures/spectest_external/case_9_4.7/x +7 -0
- data/spec/fixtures/spectest_external/resources.json +149 -0
- data/spec/rng/advanced_rnc_spec.rb +101 -0
- data/spec/rng/compacttest_spec.rb +197 -0
- data/spec/rng/datatype_declaration_spec.rb +28 -0
- data/spec/rng/div_spec.rb +207 -0
- data/spec/rng/external_ref_resolver_spec.rb +122 -0
- data/spec/rng/metanorma_conversion_spec.rb +159 -0
- data/spec/rng/namespace_declaration_spec.rb +60 -0
- data/spec/rng/namespace_support_spec.rb +199 -0
- data/spec/rng/rnc_parser_spec.rb +498 -22
- data/spec/rng/rnc_roundtrip_spec.rb +96 -82
- data/spec/rng/rng_generation_spec.rb +288 -0
- data/spec/rng/roundtrip_spec.rb +342 -0
- data/spec/rng/schema_preamble_spec.rb +145 -0
- data/spec/rng/schema_spec.rb +68 -64
- data/spec/rng/spectest_spec.rb +168 -90
- data/spec/rng_spec.rb +2 -2
- data/spec/spec_helper.rb +7 -42
- metadata +141 -8
data/lib/rng/rnc_parser.rb
CHANGED
|
@@ -1,393 +1,693 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
4
|
-
require
|
|
5
|
-
|
|
3
|
+
require 'parslet'
|
|
4
|
+
require 'nokogiri'
|
|
5
|
+
require 'set'
|
|
6
6
|
|
|
7
7
|
module Rng
|
|
8
8
|
class RncParser < Parslet::Parser
|
|
9
|
+
# Helper method to extract clean string without Parslet position markers
|
|
10
|
+
def self.extract_string(obj)
|
|
11
|
+
if obj.respond_to?(:str)
|
|
12
|
+
# Parslet::Slice - use .str to get clean string
|
|
13
|
+
obj.str
|
|
14
|
+
elsif obj.is_a?(String)
|
|
15
|
+
obj
|
|
16
|
+
else
|
|
17
|
+
obj.to_s
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Comments
|
|
22
|
+
# Regular comment: single #
|
|
23
|
+
rule(:comment) { str('#') >> str('#').absent? >> match('[^\n]').repeat >> (str("\n") | any.absent?) }
|
|
24
|
+
rule(:comment?) { comment.maybe }
|
|
25
|
+
|
|
26
|
+
# Documentation comment: ##
|
|
27
|
+
rule(:doc_comment) { str('##') >> match('[^\n]').repeat.as(:doc_line) >> (str("\n") | any.absent?) }
|
|
28
|
+
rule(:doc_comments) { (doc_comment >> (whitespace.maybe >> doc_comment).repeat).as(:documentation) }
|
|
29
|
+
|
|
30
|
+
# Whitespace (including comments)
|
|
9
31
|
rule(:space) { match('\s').repeat(1) }
|
|
10
32
|
rule(:space?) { space.maybe }
|
|
11
33
|
rule(:newline) { (str("\r").maybe >> str("\n")).repeat(1) }
|
|
12
34
|
rule(:newline?) { newline.maybe }
|
|
13
|
-
|
|
14
|
-
rule(:
|
|
35
|
+
# Only regular comments in whitespace - doc comments are captured by pattern rules
|
|
36
|
+
rule(:whitespace) { (space | newline | comment).repeat }
|
|
37
|
+
rule(:comma) { str(',') }
|
|
15
38
|
rule(:comma?) { (whitespace >> comma >> whitespace).maybe }
|
|
16
39
|
|
|
17
|
-
|
|
18
|
-
|
|
40
|
+
# Escape sequences support
|
|
41
|
+
# Unicode code point: \x{HHHHHH} (1-6 hex digits)
|
|
42
|
+
rule(:hex_escape) do
|
|
43
|
+
str('\\x{') >> match('[0-9a-fA-F]').repeat(1, 6).as(:hex) >> str('}')
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Match a keyword that may contain hex escapes
|
|
47
|
+
# Hex escapes are resolved in pre-processing, so keywords match literally here
|
|
48
|
+
# But we still need to handle the case where pre-processing didn't happen
|
|
49
|
+
def keyword(kw)
|
|
50
|
+
str(kw)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Character escapes for strings: \", \\, \n, \r, \t, and RELAX NG class escapes: \i, \c, \d, \w
|
|
54
|
+
rule(:char_escape) do
|
|
55
|
+
str('\\') >> match('["\\\\ntricdw]').as(:char)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Identifier can contain regular chars, dots, hex escapes, or backslash escapes
|
|
59
|
+
rule(:identifier_char) do
|
|
60
|
+
hex_escape.as(:hex_escape) |
|
|
61
|
+
(str('\\') >> str('\\').as(:escaped_backslash)).as(:backslash_escape) |
|
|
62
|
+
(str('\\') >> (match('[a-zA-Z0-9_.-]').as(:escaped_char) | match('[a-zA-Z]').as(:escaped_keyword))).as(:backslash_escape) |
|
|
63
|
+
match('[a-zA-Z0-9_.-]').as(:char)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
rule(:identifier) { identifier_char.repeat(1).as(:identifier_parts) }
|
|
67
|
+
rule(:namespace_prefix) { identifier.as(:prefix) >> str(':') }
|
|
19
68
|
rule(:namespace_prefix?) { namespace_prefix.maybe }
|
|
20
69
|
rule(:qualified_name) { namespace_prefix? >> identifier.as(:local_name) }
|
|
21
70
|
|
|
22
|
-
|
|
71
|
+
# Name wildcards for anyName and nsName patterns
|
|
23
72
|
|
|
24
|
-
|
|
73
|
+
# anyName wildcard: * or * - exceptName
|
|
74
|
+
rule(:any_name_pattern) do
|
|
75
|
+
str('*') >>
|
|
76
|
+
(space >> str('-') >> space >> name_class_except).maybe.as(:except)
|
|
77
|
+
end
|
|
25
78
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
str("{") >>
|
|
31
|
-
whitespace >>
|
|
32
|
-
content.maybe.as(:content) >>
|
|
33
|
-
whitespace >>
|
|
34
|
-
str("}") >>
|
|
35
|
-
(str("*") | str("+") | str("?")).maybe.as(:occurrence)
|
|
79
|
+
# nsName wildcard: prefix:* or prefix:* - exceptName
|
|
80
|
+
rule(:ns_name_pattern) do
|
|
81
|
+
namespace_prefix >> str('*') >>
|
|
82
|
+
(space >> str('-') >> space >> name_class_except).maybe.as(:except)
|
|
36
83
|
end
|
|
37
84
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
(datatype_ref | str("text")).as(:type) >>
|
|
45
|
-
whitespace >>
|
|
46
|
-
str("}")
|
|
85
|
+
# Except clause can be a single name or multiple names in parentheses
|
|
86
|
+
rule(:name_class_except) do
|
|
87
|
+
(str('(') >> space? >> name_class >>
|
|
88
|
+
(space? >> str('|') >> space? >> name_class).repeat >>
|
|
89
|
+
space? >> str(')')) |
|
|
90
|
+
name_class
|
|
47
91
|
end
|
|
48
92
|
|
|
49
|
-
rule
|
|
50
|
-
|
|
93
|
+
# Name_class rule is useful for EBNF generation of a name_class.
|
|
94
|
+
# It can parse a qualified name or the anyName/namespaceRef/externalRef patterns.
|
|
95
|
+
# !!!!!!!!!
|
|
96
|
+
# GENERAL RULE WALKING TO ANY BYTES WILL CONSUME FROM INPUT; ALL RULER CALLS (HIERARCHY) SHOULD FINALIZE
|
|
97
|
+
# OTHERWISE THE BACKPROP TO DISQUALIFY THE PATTERN WONT WORK.
|
|
98
|
+
# !!!!!!!!!
|
|
99
|
+
# Try wildcards first (more specific), then fall back to qualified names
|
|
100
|
+
rule(:name_class) do
|
|
101
|
+
ns_name_pattern.as(:ns_name) |
|
|
102
|
+
any_name_pattern.as(:any_name) |
|
|
103
|
+
(qualified_name >> (space? >> str('|') >> space? >> qualified_name).repeat(1).as(:name_choice_items)).as(:name_choice) |
|
|
104
|
+
qualified_name.as(:name)
|
|
51
105
|
end
|
|
52
106
|
|
|
53
|
-
|
|
54
|
-
rule(:
|
|
107
|
+
# Datatype library declaration (same as datatype_library but different name for clarity)
|
|
108
|
+
rule(:datatype_decl) do
|
|
109
|
+
keyword('datatypes') >> space >>
|
|
110
|
+
identifier.as(:prefix) >> space? >>
|
|
111
|
+
str('=') >> space? >>
|
|
112
|
+
string_literal.as(:uri)
|
|
113
|
+
end
|
|
55
114
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
(str(
|
|
115
|
+
# String literal with optional concatenation using ~ operator
|
|
116
|
+
# Supports escape sequences: \x{HEX}, \", \\, \n, \r, \t
|
|
117
|
+
# Control characters (0x00-0x1F, 0x7F) must be escaped
|
|
118
|
+
rule(:string_char) do
|
|
119
|
+
hex_escape.as(:hex_escape) |
|
|
120
|
+
char_escape.as(:char_escape) |
|
|
121
|
+
(str('\\').absent? >> str('"').absent? >>
|
|
122
|
+
match('[\u0000-\u001F\u007F]').absent? >> any).as(:char)
|
|
63
123
|
end
|
|
64
124
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
125
|
+
# String chars for single-quote strings (same escapes, different delimiter)
|
|
126
|
+
rule(:single_string_char) do
|
|
127
|
+
hex_escape.as(:hex_escape) |
|
|
128
|
+
char_escape.as(:char_escape) |
|
|
129
|
+
(str('\\').absent? >> str("'").absent? >>
|
|
130
|
+
match('[\u0000-\u001F\u007F]').absent? >> any).as(:char)
|
|
68
131
|
end
|
|
69
132
|
|
|
70
|
-
rule(:
|
|
71
|
-
|
|
133
|
+
rule(:string_literal) do
|
|
134
|
+
# Multi-line strings: """...""" (can span multiple lines)
|
|
135
|
+
# Content is any char except """
|
|
136
|
+
# Use a helper: char is content if """ is NOT at this position
|
|
137
|
+
multi_line_double = str('"""') >>
|
|
138
|
+
(str('"""').absent? >> any).repeat.as(:multi_line_parts) >>
|
|
139
|
+
str('"""')
|
|
140
|
+
# Multi-line strings: '''...''' (can span multiple lines)
|
|
141
|
+
multi_line_single = str("'''") >>
|
|
142
|
+
(str("'''").absent? >> any).repeat.as(:multi_line_parts) >>
|
|
143
|
+
str("'''")
|
|
144
|
+
# Single-line double-quote strings with concatenation: "..." ~ "..."
|
|
145
|
+
double_string = str('"') >> string_char.repeat.as(:string_parts) >> str('"')
|
|
146
|
+
# Single-line single-quote strings with concatenation: '...' ~ '...'
|
|
147
|
+
single_string = str("'") >> single_string_char.repeat.as(:string_parts) >> str("'")
|
|
148
|
+
concat_part = whitespace >> str('~') >> whitespace >>
|
|
149
|
+
str('"') >> string_char.repeat.as(:concat_string_parts) >> str('"')
|
|
150
|
+
single_concat_part = whitespace >> str('~') >> whitespace >>
|
|
151
|
+
str("'") >> single_string_char.repeat.as(:concat_string_parts) >> str("'")
|
|
152
|
+
|
|
153
|
+
multi_line_concat_double = whitespace >> str('~') >> whitespace >>
|
|
154
|
+
str('"""') >>
|
|
155
|
+
(str('"""').absent? >> any).repeat.as(:concat_multi_line_parts) >>
|
|
156
|
+
str('"""')
|
|
157
|
+
multi_line_concat_single = whitespace >> str('~') >> whitespace >>
|
|
158
|
+
str("'''") >>
|
|
159
|
+
(str("'''").absent? >> any).repeat.as(:concat_multi_line_parts) >>
|
|
160
|
+
str("'''")
|
|
161
|
+
|
|
162
|
+
# Ordered choice: try with concatenation first, then bare multi-line fallback
|
|
163
|
+
(multi_line_double >> (concat_part | single_concat_part | multi_line_concat_double | multi_line_concat_single).repeat.maybe.as(:concatenations)) |
|
|
164
|
+
(multi_line_single >> (concat_part | single_concat_part | multi_line_concat_double | multi_line_concat_single).repeat.maybe.as(:concatenations)) |
|
|
165
|
+
(double_string >> (concat_part | single_concat_part | multi_line_concat_double | multi_line_concat_single).repeat.maybe.as(:concatenations)) |
|
|
166
|
+
(single_string >> (concat_part | single_concat_part | multi_line_concat_double | multi_line_concat_single).repeat.maybe.as(:concatenations))
|
|
72
167
|
end
|
|
73
168
|
|
|
74
|
-
|
|
75
|
-
|
|
169
|
+
# Value pattern for literal values
|
|
170
|
+
rule(:value_literal) { string_literal.as(:value) }
|
|
171
|
+
|
|
172
|
+
# Mixed content pattern
|
|
173
|
+
rule(:mixed_pattern) do
|
|
174
|
+
keyword('mixed') >> whitespace >> str('{') >> whitespace >>
|
|
175
|
+
content.as(:mixed_content) >> whitespace >> str('}')
|
|
76
176
|
end
|
|
77
177
|
|
|
78
|
-
|
|
79
|
-
|
|
178
|
+
# Namespace declarations
|
|
179
|
+
# Default namespace (unprefixed): default namespace = "uri"
|
|
180
|
+
rule(:default_namespace_decl) do
|
|
181
|
+
keyword('default') >> space >> keyword('namespace') >> space? >>
|
|
182
|
+
str('=') >> space? >> string_literal.as(:uri)
|
|
80
183
|
end
|
|
81
184
|
|
|
82
|
-
|
|
83
|
-
|
|
185
|
+
# Default namespace (prefixed): default namespace prefix = "uri"
|
|
186
|
+
rule(:default_prefixed_namespace_decl) do
|
|
187
|
+
keyword('default') >> space >> keyword('namespace') >> space >>
|
|
188
|
+
identifier.as(:prefix) >> space? >>
|
|
189
|
+
str('=') >> space? >> string_literal.as(:uri)
|
|
84
190
|
end
|
|
85
191
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
(
|
|
91
|
-
|
|
192
|
+
# Prefixed namespace: namespace prefix = "uri"
|
|
193
|
+
rule(:prefixed_namespace_decl) do
|
|
194
|
+
keyword('namespace') >> space >>
|
|
195
|
+
identifier.as(:prefix) >> space? >>
|
|
196
|
+
str('=') >> space? >> string_literal.as(:uri)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
# Any namespace declaration
|
|
200
|
+
rule(:namespace_decl) do
|
|
201
|
+
default_prefixed_namespace_decl.as(:default_prefixed_ns) |
|
|
202
|
+
default_namespace_decl.as(:default_ns) |
|
|
203
|
+
prefixed_namespace_decl.as(:prefixed_ns)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Annotation element inner content (recursive for nested brackets)
|
|
207
|
+
rule(:annotation_inner_content) do
|
|
208
|
+
(
|
|
209
|
+
# Nested annotation brackets
|
|
210
|
+
(str('[') >> annotation_inner_content >> str(']')) |
|
|
211
|
+
# String literal (don't let brackets inside strings confuse us)
|
|
212
|
+
string_literal |
|
|
213
|
+
# Any char that's not a bracket, quote
|
|
214
|
+
(str('[').absent? >> str(']').absent? >> str('"').absent? >> str("'").absent? >>
|
|
215
|
+
any)
|
|
216
|
+
).repeat
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Annotation attribute: prefix:local = "value" or local = "value"
|
|
220
|
+
rule(:annotation_attr) do
|
|
221
|
+
(((namespace_prefix >> identifier) | identifier).as(:ann_name) >>
|
|
222
|
+
whitespace >> str('=') >> whitespace >>
|
|
223
|
+
string_literal.as(:attr_value)).as(:ann_attr)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Annotation element: prefix:local [ content ] or local [ content ]
|
|
227
|
+
rule(:annotation_elem) do
|
|
228
|
+
(((namespace_prefix >> identifier) | identifier).as(:elem_name) >>
|
|
229
|
+
whitespace >> str('[') >> whitespace >>
|
|
230
|
+
annotation_inner_content.as(:inner_content) >> whitespace >>
|
|
231
|
+
str(']')).as(:ann_elem)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# A single annotation item (attribute or element)
|
|
235
|
+
rule(:annotation_item) do
|
|
236
|
+
annotation_elem | annotation_attr
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Annotation content: sequence of annotation items OR empty OR raw content (comments, etc.)
|
|
240
|
+
# Raw content matches any character that is NOT a bracket or quote
|
|
241
|
+
rule(:annotation_content) do
|
|
242
|
+
(annotation_item >> (whitespace >> annotation_item).repeat >> whitespace).as(:ann_items) |
|
|
243
|
+
(str('[').absent? >> str(']').absent? >> str('"').absent? >> str("'").absent? >> any).repeat.as(:raw_content) |
|
|
92
244
|
whitespace
|
|
93
245
|
end
|
|
94
246
|
|
|
95
|
-
|
|
247
|
+
# Single annotation: [ content ] where content can contain nested brackets, strings, etc.
|
|
248
|
+
# Appears before patterns, definitions, and within annotation elements
|
|
249
|
+
# Handles both empty [] and content-bearing [x = "y"] annotations
|
|
250
|
+
rule(:annotation) do
|
|
251
|
+
str('[') >> whitespace >>
|
|
252
|
+
(
|
|
253
|
+
(annotation_content >> whitespace >> str(']')).as(:ann) |
|
|
254
|
+
str(']').as(:ann)
|
|
255
|
+
)
|
|
256
|
+
end
|
|
96
257
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
convert_to_rng(tree)
|
|
258
|
+
# One or more annotations preceding a pattern
|
|
259
|
+
rule(:annotations) do
|
|
260
|
+
(whitespace >> annotation).repeat(1)
|
|
101
261
|
end
|
|
102
262
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
263
|
+
# Notation/annotation: [ key = "value" ] or just [ ... ]
|
|
264
|
+
# Notations are only valid when attached to patterns using >>, not as standalone preamble items
|
|
265
|
+
rule(:notation) do
|
|
266
|
+
annotation
|
|
107
267
|
end
|
|
108
268
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
process_content_item(xml, tree[:root][:start])
|
|
120
|
-
end
|
|
269
|
+
rule(:element_def) do
|
|
270
|
+
(doc_comments >> whitespace).maybe.as(:docs) >>
|
|
271
|
+
annotations.maybe.as(:annotations) >>
|
|
272
|
+
keyword('element') >> whitespace >>
|
|
273
|
+
name_class.as(:name) >>
|
|
274
|
+
whitespace >>
|
|
275
|
+
str('{') >> whitespace >>
|
|
276
|
+
content.maybe.as(:content) >> whitespace >>
|
|
277
|
+
str('}') >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)
|
|
278
|
+
end
|
|
121
279
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
280
|
+
rule(:attribute_def) do
|
|
281
|
+
(doc_comments >> whitespace).maybe.as(:docs) >>
|
|
282
|
+
annotations.maybe.as(:annotations) >>
|
|
283
|
+
keyword('attribute') >> whitespace >>
|
|
284
|
+
name_class.as(:name) >>
|
|
285
|
+
whitespace >>
|
|
286
|
+
str('{') >>
|
|
287
|
+
whitespace >>
|
|
288
|
+
attribute_content.as(:type) >>
|
|
289
|
+
whitespace >>
|
|
290
|
+
str('}') >>
|
|
291
|
+
(str('*') | str('+') | str('?')).maybe.as(:occurrence)
|
|
292
|
+
end
|
|
125
293
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
294
|
+
# Attribute content can be: parenthesized choice, datatype_ref, text, value literal, or choice of values
|
|
295
|
+
rule(:attribute_content) do
|
|
296
|
+
# Parenthesized choice: ( "a" | "b" | "c" ) or ( ref1 | ref2 )
|
|
297
|
+
(str('(') >> whitespace >>
|
|
298
|
+
(value_literal | identifier.as(:ref)) >>
|
|
299
|
+
(whitespace >> str('|') >> whitespace >> (value_literal | identifier.as(:ref))).repeat(1) >>
|
|
300
|
+
whitespace >> str(')')).as(:paren_choice) |
|
|
301
|
+
# Datatype except: prefix:type - ( "a" | "b" ) or type - ( "a" | "b" )
|
|
302
|
+
(identifier.as(:datatype_prefix) >> str(':') >> identifier.as(:datatype_type) >>
|
|
303
|
+
whitespace >> str('-') >> whitespace >>
|
|
304
|
+
str('(') >> whitespace >>
|
|
305
|
+
value_literal >>
|
|
306
|
+
(whitespace >> str('|') >> whitespace >> value_literal).repeat(1) >>
|
|
307
|
+
whitespace >> str(')')).as(:datatype_except) |
|
|
308
|
+
(identifier.as(:datatype_type) >>
|
|
309
|
+
whitespace >> str('-') >> whitespace >>
|
|
310
|
+
str('(') >> whitespace >>
|
|
311
|
+
value_literal >>
|
|
312
|
+
(whitespace >> str('|') >> whitespace >> value_literal).repeat(1) >>
|
|
313
|
+
whitespace >> str(')')).as(:datatype_except) |
|
|
314
|
+
# Non-parenthesized choice of value literals: "a" | "b" | "c"
|
|
315
|
+
(value_literal >> (whitespace >> str('|') >> whitespace >> value_literal).repeat(1).as(:value_choice)) |
|
|
316
|
+
value_literal |
|
|
317
|
+
datatype_ref |
|
|
318
|
+
keyword('text').as(:text_type) |
|
|
319
|
+
identifier.as(:ref)
|
|
320
|
+
end
|
|
136
321
|
|
|
137
|
-
|
|
322
|
+
rule(:datatype_ref) do
|
|
323
|
+
identifier.as(:prefix) >> str(':') >> identifier.as(:type) >>
|
|
324
|
+
(whitespace >> str('{') >> whitespace >>
|
|
325
|
+
param_list.as(:params) >> whitespace >> str('}')).maybe
|
|
138
326
|
end
|
|
139
327
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
attrs[:name] = item[:name][:local_name][:identifier].to_s
|
|
328
|
+
# Parameter list for datatypes (e.g., pattern = "value", minLength = "1")
|
|
329
|
+
rule(:param_list) do
|
|
330
|
+
param_item >> (whitespace >> param_item).repeat
|
|
331
|
+
end
|
|
145
332
|
|
|
146
|
-
|
|
333
|
+
# Single parameter (e.g., pattern = "value")
|
|
334
|
+
rule(:param_item) do
|
|
335
|
+
identifier.as(:param_name) >> whitespace >> str('=') >> whitespace >>
|
|
336
|
+
string_literal.as(:param_value)
|
|
337
|
+
end
|
|
147
338
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
process_content_item(xml, content_item)
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
end
|
|
339
|
+
# Word boundary - ensure keywords are not followed by identifier characters
|
|
340
|
+
# This prevents "text" from matching "textarea", etc.
|
|
341
|
+
rule(:word_boundary) { match('[a-zA-Z0-9_-]').absent? }
|
|
155
342
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
xml.parent.name = "zeroOrMore"
|
|
161
|
-
when "+"
|
|
162
|
-
xml.parent.name = "oneOrMore"
|
|
163
|
-
when "?"
|
|
164
|
-
xml.parent.name = "optional"
|
|
165
|
-
end
|
|
166
|
-
end
|
|
167
|
-
elsif item.key?(:attr_name)
|
|
168
|
-
# Attribute definition
|
|
169
|
-
attrs = {}
|
|
170
|
-
attrs[:name] = item[:attr_name][:local_name][:identifier].to_s
|
|
171
|
-
|
|
172
|
-
attrs[:ns] = item[:attr_name][:prefix][:identifier].to_s if item[:attr_name][:prefix]
|
|
173
|
-
|
|
174
|
-
xml.attribute(attrs) do
|
|
175
|
-
if item[:type] == "text"
|
|
176
|
-
xml.text
|
|
177
|
-
elsif item[:type].key?(:prefix)
|
|
178
|
-
xml.data(type: item[:type][:type][:identifier].to_s,
|
|
179
|
-
datatypeLibrary: "http://www.w3.org/2001/XMLSchema-datatypes")
|
|
180
|
-
end
|
|
181
|
-
end
|
|
182
|
-
elsif item.key?(:text)
|
|
183
|
-
xml.text
|
|
184
|
-
elsif item.key?(:empty)
|
|
185
|
-
xml.empty
|
|
186
|
-
elsif item.key?(:group)
|
|
187
|
-
xml.group do
|
|
188
|
-
item[:group][:items].each do |group_item|
|
|
189
|
-
process_content_item(xml, group_item)
|
|
190
|
-
end
|
|
191
|
-
end
|
|
343
|
+
# Keyword patterns with word boundaries
|
|
344
|
+
rule(:text_def) { (keyword('text') >> word_boundary).as(:text) }
|
|
345
|
+
rule(:empty_def) { (keyword('empty') >> word_boundary).as(:empty) }
|
|
346
|
+
rule(:not_allowed_def) { (keyword('notAllowed') >> word_boundary).as(:not_allowed) }
|
|
192
347
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
when "*"
|
|
197
|
-
xml.parent.name = "zeroOrMore"
|
|
198
|
-
when "+"
|
|
199
|
-
xml.parent.name = "oneOrMore"
|
|
200
|
-
when "?"
|
|
201
|
-
xml.parent.name = "optional"
|
|
202
|
-
end
|
|
203
|
-
end
|
|
204
|
-
elsif item.key?(:first) && item.key?(:rest)
|
|
205
|
-
# Choice definition
|
|
206
|
-
xml.choice do
|
|
207
|
-
process_content_item(xml, item[:first])
|
|
208
|
-
item[:rest].each do |choice_item|
|
|
209
|
-
process_content_item(xml, choice_item[:second])
|
|
210
|
-
end
|
|
211
|
-
end
|
|
212
|
-
elsif item.key?(:ref)
|
|
213
|
-
# Reference to a named pattern
|
|
214
|
-
xml.ref(name: item[:ref][:identifier].to_s)
|
|
215
|
-
end
|
|
348
|
+
rule(:list_pattern) do
|
|
349
|
+
keyword('list') >> whitespace >> str('{') >> whitespace >>
|
|
350
|
+
list_content.as(:list_content) >> whitespace >> str('}')
|
|
216
351
|
end
|
|
217
|
-
end
|
|
218
352
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
# Simple element pattern
|
|
223
|
-
build_element(schema.element)
|
|
224
|
-
else
|
|
225
|
-
# Grammar with named patterns
|
|
226
|
-
result = []
|
|
353
|
+
rule(:parent_ref) do
|
|
354
|
+
keyword('parent') >> whitespace >> identifier.as(:parent_pattern)
|
|
355
|
+
end
|
|
227
356
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
result << ""
|
|
232
|
-
end
|
|
357
|
+
rule(:external_ref) do
|
|
358
|
+
keyword('external') >> space >> string_literal.as(:external_href)
|
|
359
|
+
end
|
|
233
360
|
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
361
|
+
# List content can be: text, datatype references, or other patterns with occurrence markers
|
|
362
|
+
rule(:list_content_item) do
|
|
363
|
+
(datatype_ref | text_def | identifier.as(:ref)) >>
|
|
364
|
+
(str('*') | str('+') | str('?')).maybe.as(:occurrence)
|
|
365
|
+
end
|
|
239
366
|
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
result << ""
|
|
245
|
-
end
|
|
246
|
-
end
|
|
367
|
+
rule(:list_content) do
|
|
368
|
+
list_content_item.as(:first) >>
|
|
369
|
+
(comma? >> list_content_item).repeat.as(:sequence_items).maybe
|
|
370
|
+
end
|
|
247
371
|
|
|
248
|
-
|
|
249
|
-
|
|
372
|
+
rule(:group_def) do
|
|
373
|
+
str('(') >>
|
|
374
|
+
whitespace >>
|
|
375
|
+
content.as(:group) >>
|
|
376
|
+
whitespace >>
|
|
377
|
+
str(')') >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)
|
|
250
378
|
end
|
|
251
379
|
|
|
252
|
-
|
|
380
|
+
# Named pattern definition (e.g., "myPattern = element foo { text }")
|
|
381
|
+
rule(:named_pattern) do
|
|
382
|
+
(doc_comments >> whitespace).maybe.as(:docs) >>
|
|
383
|
+
annotations.maybe >>
|
|
384
|
+
identifier.as(:name) >> whitespace >>
|
|
385
|
+
(str('|=') | str('&=') | str('=')).as(:operator) >> whitespace >>
|
|
386
|
+
pattern_list.as(:pattern)
|
|
387
|
+
end
|
|
253
388
|
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
389
|
+
# Start pattern definition
|
|
390
|
+
rule(:start_def) do
|
|
391
|
+
(doc_comments >> whitespace).maybe.as(:docs) >>
|
|
392
|
+
annotations.maybe >>
|
|
393
|
+
keyword('start') >> whitespace >>
|
|
394
|
+
(str('|=') | str('&=') | str('=')).as(:operator) >> whitespace >>
|
|
395
|
+
pattern_list.as(:start_pattern)
|
|
259
396
|
end
|
|
260
397
|
|
|
261
|
-
|
|
262
|
-
|
|
398
|
+
# Pattern list - similar to content but without being wrapped in element/attribute
|
|
399
|
+
rule(:pattern_list) do
|
|
400
|
+
content_item.as(:first) >>
|
|
401
|
+
(
|
|
402
|
+
(whitespace >> str('&') >> whitespace >> content_item).repeat(1).as(:interleave_items) |
|
|
403
|
+
(whitespace >> str('|') >> whitespace >> content_item).repeat(1).as(:choice_items) |
|
|
404
|
+
(comma? >> content_item).repeat(1).as(:sequence_items)
|
|
405
|
+
).maybe
|
|
406
|
+
end
|
|
263
407
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
408
|
+
# Choice is handled at content level, not as separate pattern
|
|
409
|
+
rule(:content_item) do
|
|
410
|
+
annotations.maybe >>
|
|
411
|
+
(element_def | attribute_def |
|
|
412
|
+
# Datatype subtraction: identifier - ( value|identifier|choice|annotated )
|
|
413
|
+
(identifier.as(:datatype_name) >>
|
|
414
|
+
whitespace >> str('-') >> whitespace >>
|
|
415
|
+
str('(') >> whitespace >>
|
|
416
|
+
datatype_except_value >>
|
|
417
|
+
(whitespace >> str('|') >> whitespace >> datatype_except_value).repeat.as(:more_except) >>
|
|
418
|
+
whitespace >> str(')')).as(:datatype_subtraction) |
|
|
419
|
+
(text_def >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
420
|
+
(empty_def >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
421
|
+
(not_allowed_def >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
422
|
+
list_pattern | parent_ref | external_ref | group_def | mixed_pattern |
|
|
423
|
+
grammar_block.as(:grammar_block) |
|
|
424
|
+
(value_literal >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
425
|
+
(datatype_ref >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
426
|
+
(identifier.as(:ref) >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)))
|
|
427
|
+
end
|
|
274
428
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
429
|
+
# Value that can appear in a datatype except clause
|
|
430
|
+
# Includes string literals, identifiers (for datatype names), annotated values,
|
|
431
|
+
# and parenthesized content (for nested groups with annotations)
|
|
432
|
+
rule(:datatype_except_value) do
|
|
433
|
+
# Annotated parenthesized content: group_def >> annotation
|
|
434
|
+
(group_def >>
|
|
435
|
+
whitespace >> str('>>') >> whitespace >>
|
|
436
|
+
(foreign_element | annotation).as(:annotation)).as(:annotated_except_value) |
|
|
437
|
+
# Annotated value: value_literal >> identifier [] or value_literal >> [ ... ]
|
|
438
|
+
((value_literal | identifier.as(:datatype_name)) >>
|
|
439
|
+
whitespace >> str('>>') >> whitespace >>
|
|
440
|
+
(foreign_element | annotation).as(:annotation)).as(:annotated_except_value) |
|
|
441
|
+
# Regular parenthesized content (without annotation)
|
|
442
|
+
group_def |
|
|
443
|
+
# Regular value literal or identifier
|
|
444
|
+
value_literal |
|
|
445
|
+
identifier.as(:datatype_name)
|
|
446
|
+
end
|
|
285
447
|
|
|
286
|
-
|
|
287
|
-
|
|
448
|
+
# Content can be interleaved with &, a sequence with commas, or alternatives with |
|
|
449
|
+
rule(:content) do
|
|
450
|
+
content_item.as(:first) >>
|
|
451
|
+
(
|
|
452
|
+
# Annotation attachment: pattern >> identifier [] or pattern >> [ content ]
|
|
453
|
+
(whitespace >> str('>>') >> whitespace >> (foreign_element | annotation).as(:annotation_attached)).repeat(1).as(:annotation_chain) |
|
|
454
|
+
(whitespace >> str('&') >> whitespace >> content_item).repeat(1).as(:interleave_items) |
|
|
455
|
+
(whitespace >> str('|') >> whitespace >> content_item).repeat(1).as(:choice_items) |
|
|
456
|
+
(comma? >> content_item).repeat(1).as(:sequence_items)
|
|
457
|
+
).maybe
|
|
458
|
+
end
|
|
288
459
|
|
|
289
|
-
|
|
290
|
-
|
|
460
|
+
# Parse balanced braces content - matches everything inside {} including nested {}
|
|
461
|
+
rule(:balanced_braces) do
|
|
462
|
+
(
|
|
463
|
+
(str('{') >> balanced_braces >> str('}')) |
|
|
464
|
+
(str('{').absent? >> str('}').absent? >> any)
|
|
465
|
+
).repeat
|
|
466
|
+
end
|
|
291
467
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
choice_parts << build_pattern(node.choice)
|
|
301
|
-
end
|
|
302
|
-
content_parts << choice_parts.join(" | ")
|
|
303
|
-
end
|
|
468
|
+
# Include directive - capture override as raw text to avoid backtracking
|
|
469
|
+
# Will be parsed with proper scoping in post-processing
|
|
470
|
+
rule(:include_directive) do
|
|
471
|
+
keyword('include') >> space >> string_literal.as(:href) >> whitespace >>
|
|
472
|
+
(str('{') >> whitespace >>
|
|
473
|
+
balanced_braces.as(:raw_override) >>
|
|
474
|
+
whitespace >> str('}')).maybe.as(:override)
|
|
475
|
+
end
|
|
304
476
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
end
|
|
312
|
-
else
|
|
313
|
-
group_parts << build_pattern(node.group)
|
|
314
|
-
end
|
|
315
|
-
content_parts << "(#{group_parts.join(", ")})"
|
|
316
|
-
end
|
|
477
|
+
# Include directive - legacy layout with start_def first
|
|
478
|
+
rule(:include_directive_legacy) do
|
|
479
|
+
keyword('include') >> space >> string_literal.as(:href) >> whitespace >>
|
|
480
|
+
start_def.maybe.as(:start) >> whitespace >>
|
|
481
|
+
(named_pattern | element_def.as(:top_element)).repeat.as(:definitions)
|
|
482
|
+
end
|
|
317
483
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
content_parts << node.ref.name
|
|
326
|
-
end
|
|
327
|
-
end
|
|
484
|
+
# Foreign element at grammar/div level: name [annotation-content]
|
|
485
|
+
# e.g., foo [] or rng:foo [ "val" ] or foo [ bar [ "baz" ] ]
|
|
486
|
+
# These are annotation elements that appear as standalone items
|
|
487
|
+
rule(:foreign_element) do
|
|
488
|
+
((namespace_prefix >> identifier) | identifier).as(:foreign_name) >>
|
|
489
|
+
whitespace >> annotation.as(:foreign_annotation)
|
|
490
|
+
end
|
|
328
491
|
|
|
329
|
-
|
|
330
|
-
|
|
492
|
+
# Div block for documentation and grouping
|
|
493
|
+
rule(:div_block) do
|
|
494
|
+
keyword('div') >> whitespace >> str('{') >> whitespace >>
|
|
495
|
+
(start_def.maybe.as(:start) >>
|
|
496
|
+
whitespace >>
|
|
497
|
+
(include_directive >> whitespace).repeat.as(:includes) >>
|
|
498
|
+
((named_pattern | foreign_element | div_block.as(:nested_div) | element_def.as(:top_element)) >> whitespace).repeat.as(:patterns)) >>
|
|
499
|
+
whitespace >> str('}')
|
|
500
|
+
end
|
|
331
501
|
|
|
332
|
-
|
|
333
|
-
|
|
502
|
+
# Standalone pattern - like content_item but without element_def/attribute_def
|
|
503
|
+
# These are patterns that can appear at grammar level without being definitions
|
|
504
|
+
rule(:standalone_pattern) do
|
|
505
|
+
text_def | empty_def | not_allowed_def |
|
|
506
|
+
list_pattern | parent_ref | external_ref | group_def | mixed_pattern |
|
|
507
|
+
datatype_ref |
|
|
508
|
+
value_literal |
|
|
509
|
+
(identifier.as(:ref) >> (str('*') | str('+') | str('?')).maybe.as(:occurrence)) |
|
|
510
|
+
(str('*') >> (str('-') >> space >> name_class).maybe.as(:any_name_except)).as(:bare_any_name)
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# Grammar-level choice: allows element foo { empty } | element bar { empty }
|
|
514
|
+
# at the top level of a grammar
|
|
515
|
+
rule(:grammar_choice) do
|
|
516
|
+
(element_def | standalone_pattern).as(:first) >>
|
|
517
|
+
(whitespace >> str('|') >> whitespace >>
|
|
518
|
+
(element_def | standalone_pattern)).repeat(1).as(:choice_items)
|
|
519
|
+
end
|
|
520
|
+
|
|
521
|
+
# Grammar can have optional datatype library, start, then multiple named patterns and elements
|
|
522
|
+
# Allow standalone patterns (like 'foo', 'text', 'empty', etc.) as a fallback
|
|
523
|
+
# Allow grammar-level choice: element foo { empty } | element bar { empty }
|
|
524
|
+
rule(:grammar) do
|
|
525
|
+
start_def.maybe.as(:start) >>
|
|
526
|
+
whitespace >>
|
|
527
|
+
(include_directive >> whitespace).repeat.as(:includes) >>
|
|
528
|
+
((named_pattern | foreign_element | div_block.as(:div) | grammar_choice.as(:top_choice) |
|
|
529
|
+
element_def.as(:top_element) |
|
|
530
|
+
standalone_pattern.as(:standalone)) >> whitespace).repeat.as(:patterns)
|
|
531
|
+
end
|
|
334
532
|
|
|
335
|
-
|
|
336
|
-
|
|
533
|
+
# Grammar block wrapper - capture content as raw text to avoid backtracking
|
|
534
|
+
# Will be parsed with proper scoping in post-processing
|
|
535
|
+
rule(:grammar_block) do
|
|
536
|
+
keyword('grammar') >> whitespace >> str('{') >> whitespace >>
|
|
537
|
+
balanced_braces.as(:raw_grammar) >>
|
|
538
|
+
whitespace >> str('}')
|
|
539
|
+
end
|
|
337
540
|
|
|
338
|
-
|
|
541
|
+
# Included file - more flexible than grammar_wrapper
|
|
542
|
+
# Can be:
|
|
543
|
+
# 1. Just a flat grammar (patterns only)
|
|
544
|
+
# 2. Grammar block
|
|
545
|
+
# 3. Grammar block with trailing definitions
|
|
546
|
+
# 4. Preamble + grammar/grammar_block
|
|
547
|
+
# 5. Empty file
|
|
548
|
+
rule(:included_file) do
|
|
549
|
+
whitespace >>
|
|
550
|
+
preamble.maybe >>
|
|
551
|
+
whitespace >>
|
|
552
|
+
(
|
|
553
|
+
# Grammar block with optional trailing definitions
|
|
554
|
+
(grammar_block.as(:inner_grammar) >>
|
|
555
|
+
(whitespace >> (named_pattern | element_def.as(:top_element))).repeat.as(:trailing_definitions)) |
|
|
556
|
+
# Flat grammar (no wrapper)
|
|
557
|
+
grammar |
|
|
558
|
+
# Empty file is also valid
|
|
559
|
+
str('')
|
|
560
|
+
) >>
|
|
561
|
+
whitespace
|
|
339
562
|
end
|
|
340
563
|
|
|
341
|
-
|
|
342
|
-
|
|
564
|
+
# Schema preamble - namespace and datatype declarations
|
|
565
|
+
# Annotations [ key = "value" ] are also allowed in preamble for documentation
|
|
566
|
+
rule(:preamble_item) do
|
|
567
|
+
(namespace_decl | datatype_decl | notation) >> whitespace
|
|
568
|
+
end
|
|
343
569
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
else
|
|
348
|
-
"text"
|
|
349
|
-
end
|
|
350
|
-
else
|
|
351
|
-
"text"
|
|
352
|
-
end
|
|
570
|
+
rule(:preamble) do
|
|
571
|
+
preamble_item.repeat.as(:preamble_items)
|
|
572
|
+
end
|
|
353
573
|
|
|
354
|
-
|
|
355
|
-
|
|
574
|
+
# Root can be a grammar block with optional definitions after, OR plain grammar (for flat RNC files), with optional preamble at top
|
|
575
|
+
root(:grammar_wrapper)
|
|
576
|
+
rule(:grammar_wrapper) do
|
|
577
|
+
whitespace >>
|
|
578
|
+
preamble.maybe >>
|
|
579
|
+
whitespace >>
|
|
580
|
+
(
|
|
581
|
+
# Try in order from most specific to least specific
|
|
582
|
+
# 1. Grammar block (starts with literal "grammar {")
|
|
583
|
+
(grammar_block.as(:inner_grammar) >>
|
|
584
|
+
(whitespace >> (named_pattern | element_def.as(:top_element))).repeat.as(:trailing_definitions)) |
|
|
585
|
+
# 2. Top-level includes (for Metanorma-style schemas) - use raw capture for trailing
|
|
586
|
+
((include_directive >> whitespace).repeat(1).as(:top_includes) >>
|
|
587
|
+
whitespace >> any.repeat.as(:raw_trailing)) |
|
|
588
|
+
# 3. Flat grammar (default - most flexible) - raw_patterns handled internally
|
|
589
|
+
grammar
|
|
590
|
+
) >>
|
|
591
|
+
whitespace
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
# Class method to parse a file with include resolution
|
|
595
|
+
def self.parse_file(file_path, base_dir = nil, visited_files = Set.new)
|
|
596
|
+
IncludeProcessor.new.parse_file(file_path, base_dir, visited_files)
|
|
356
597
|
end
|
|
357
598
|
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
599
|
+
# Pre-process RNC input to resolve hex escapes (\x{HHHHHH})
|
|
600
|
+
# outside of string literals. This allows keywords to contain hex escapes
|
|
601
|
+
# (e.g., \x{65}l\x{00065}ment = "element").
|
|
602
|
+
# String literals keep their hex escapes for the parser to handle,
|
|
603
|
+
# because control characters like \x{A} (newline) are forbidden inside
|
|
604
|
+
# single-line quoted strings and must remain escaped.
|
|
605
|
+
def self.preprocess_hex_escapes(input)
|
|
606
|
+
result = +''
|
|
607
|
+
i = 0
|
|
608
|
+
while i < input.length
|
|
609
|
+
# Triple-quoted strings: copy verbatim
|
|
610
|
+
if input[i, 3] == '"""'
|
|
611
|
+
end_idx = input.index('"""', i + 3)
|
|
612
|
+
end_idx ||= input.length - 3
|
|
613
|
+
result << input[i..(end_idx + 2)]
|
|
614
|
+
i = end_idx + 3
|
|
615
|
+
elsif input[i, 3] == "'''"
|
|
616
|
+
end_idx = input.index("'''", i + 3)
|
|
617
|
+
end_idx ||= input.length - 3
|
|
618
|
+
result << input[i..(end_idx + 2)]
|
|
619
|
+
i = end_idx + 3
|
|
620
|
+
# Single-line double-quoted string: copy verbatim
|
|
621
|
+
elsif input[i] == '"'
|
|
622
|
+
j = i + 1
|
|
623
|
+
while j < input.length && input[j] != '"'
|
|
624
|
+
j += 1 if input[j] == '\\' && j + 1 < input.length # skip escaped char
|
|
625
|
+
j += 1
|
|
368
626
|
end
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
group_parts << build_pattern(group)
|
|
627
|
+
result << input[i..j]
|
|
628
|
+
i = j + 1
|
|
629
|
+
# Single-line single-quoted string: copy verbatim
|
|
630
|
+
elsif input[i] == "'"
|
|
631
|
+
j = i + 1
|
|
632
|
+
while j < input.length && input[j] != "'"
|
|
633
|
+
j += 1 if input[j] == '\\' && j + 1 < input.length # skip escaped char
|
|
634
|
+
j += 1
|
|
378
635
|
end
|
|
636
|
+
result << input[i..j]
|
|
637
|
+
i = j + 1
|
|
638
|
+
# Comment: copy verbatim to end of line
|
|
639
|
+
elsif input[i] == '#'
|
|
640
|
+
j = input.index("\n", i) || input.length
|
|
641
|
+
result << input[i...j]
|
|
642
|
+
i = j
|
|
643
|
+
# Hex escape outside string: decode it
|
|
644
|
+
elsif input[i] == '\\' && input[i + 1] == 'x' && input[i + 2] == '{'
|
|
645
|
+
end_brace = input.index('}', i + 3)
|
|
646
|
+
if end_brace
|
|
647
|
+
hex = input[(i + 3)...end_brace]
|
|
648
|
+
if hex.match?(/\A[0-9a-fA-F]{1,6}\z/)
|
|
649
|
+
code_point = hex.to_i(16)
|
|
650
|
+
if code_point <= 0x10FFFF && !code_point.between?(0xD800, 0xDFFF) &&
|
|
651
|
+
code_point >= 0x20 # Reject control characters outside strings
|
|
652
|
+
result << [code_point].pack('U')
|
|
653
|
+
i = end_brace + 1
|
|
654
|
+
next
|
|
655
|
+
end
|
|
656
|
+
end
|
|
657
|
+
end
|
|
658
|
+
# Not a valid hex escape, copy as-is
|
|
659
|
+
result << input[i]
|
|
660
|
+
i += 1
|
|
379
661
|
else
|
|
380
|
-
|
|
662
|
+
result << input[i]
|
|
663
|
+
i += 1
|
|
381
664
|
end
|
|
382
|
-
"(#{group_parts.join(", ")})"
|
|
383
|
-
elsif node.text
|
|
384
|
-
"text"
|
|
385
|
-
elsif node.empty
|
|
386
|
-
"empty"
|
|
387
|
-
else
|
|
388
|
-
# Default case
|
|
389
|
-
""
|
|
390
665
|
end
|
|
666
|
+
result
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
def self.parse(input)
|
|
670
|
+
parser = new
|
|
671
|
+
preprocessed = preprocess_hex_escapes(input.strip)
|
|
672
|
+
tree = parser.parse(preprocessed)
|
|
673
|
+
|
|
674
|
+
# Normalize parse tree
|
|
675
|
+
processor = ParseTreeProcessor.new(tree)
|
|
676
|
+
normalized = processor.normalize
|
|
677
|
+
|
|
678
|
+
# Convert to RNG XML and Grammar object
|
|
679
|
+
rng_xml = convert_to_rng(normalized.grammar_tree)
|
|
680
|
+
Grammar.from_xml(rng_xml)
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
# Convert RNG schema to RNC
|
|
684
|
+
def self.to_rnc(schema)
|
|
685
|
+
RncBuilder.new.build(schema)
|
|
686
|
+
end
|
|
687
|
+
|
|
688
|
+
# Convert parse tree to RNG XML
|
|
689
|
+
def self.convert_to_rng(tree)
|
|
690
|
+
RncToRngConverter.new.convert(tree)
|
|
391
691
|
end
|
|
392
692
|
end
|
|
393
693
|
end
|