ebnf 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,138 @@
1
+ (* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *)
2
+ (* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *)
3
+
4
+ syntax = syntax_rule, {syntax_rule} ;
5
+
6
+ syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol
7
+ (* A <syntax rule> defines the sequences of
8
+ symbols represented by a <meta identifier> *);
9
+
10
+ definitions_list = single_definition, {definition_separator_symbol, definitions_list}
11
+ (* | separates alternative <single definitions> *);
12
+
13
+ single_definition = term, {',', term}
14
+ (* , separates successive <terms> *);
15
+
16
+ term = factor, ['-', exception]
17
+ (* A <term> represents any sequence of symbols that is defined by the <factor> but
18
+ not defined by the <exception> *);
19
+
20
+ exception = factor
21
+ (* A <factor> may be used as an <exception>
22
+ if it could be replaced by a <factor>
23
+ containingno<metaidentifiers> *);
24
+
25
+ factor = [integer, '*'], primary
26
+ (* The <integer> specifies the number of repetitions of the <primary> *);
27
+
28
+ primary = optional_sequence
29
+ | repeated_sequence
30
+ | special_sequence
31
+ | grouped_sequence
32
+ | meta_identifier
33
+ | terminal_string
34
+ | empty
35
+ ;
36
+
37
+ optional_sequence = start_option_symbol, definitions_list, end_option_symbol
38
+ (* The brackets [ and ] enclose symbols which are optional *);
39
+
40
+ repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol
41
+ (* The brackets { and } enclose symbols
42
+ which may be repeated any number of times *);
43
+
44
+ grouped_sequence = '(', definitions_list, ')'
45
+ (* The brackets ( and ) allow any <definitions list> to be a <primary> *);
46
+
47
+ terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'")
48
+ | ('"', second_terminal_character, {second_terminal_character}, '"')
49
+ (* A <terminal string> represents the
50
+ <characters> between the quote symbols '_' or "_" *);
51
+
52
+ meta_identifier = letter, {meta_identifier_character}
53
+ (* A <meta identifier> is the name of a syntactic element of the language being defined *);
54
+
55
+ integer = decimal_digit, {decimal_digit} ;
56
+
57
+ special_sequence = '?', {special_sequence_character}, '?'
58
+ (* The meaning of a <special sequence> is not defined in the standard metalanguage. *);
59
+
60
+ comment = '(*', {comment_symbol}, '*)'
61
+ (* A comment is allowed anywhere outside a
62
+ <terminal string>, <meta identifier>,
63
+ <integer> or <special sequence> *);
64
+
65
+ comment_symbol = comment | commentless_symbol | other_character ;
66
+
67
+ commentless_symbol = terminal_character | meta_identifier | integer
68
+ | terminal_string | special_sequence
69
+ ;
70
+
71
+ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
72
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
73
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
74
+ | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
75
+ | "c" | "d" | "e" | "f" | "g" | "h" | "i"
76
+ | "j" | "k" | "l" | "m" | "n" | "o" | "p"
77
+ | "q" | "r" | "s" | "t" | "u" | "v" | "w"
78
+ | "x" | "y" | "z"
79
+ ;
80
+
81
+ decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
82
+
83
+ (* Extended to allow '_' *)
84
+ meta_identifier_character = letter | decimal_digit | '_' ;
85
+
86
+ first_terminal_character = terminal_character - "'" ;
87
+
88
+ second_terminal_character = terminal_character - '"' ;
89
+
90
+ special_sequence_character = terminal_character - '?' ;
91
+
92
+ terminal_character = letter
93
+ | decimal_digit
94
+ | concatenate_symbol
95
+ | defining_symbol
96
+ | definition_separator_symbol
97
+ | end_comment_symbol
98
+ | end_group_symbol
99
+ | end_option_symbol
100
+ | end_repeat_symbol
101
+ | except_symbol
102
+ | first_quote_symbol
103
+ | repetition_symbol
104
+ | second_quote_symbol
105
+ | special_sequence_symbol
106
+ | start_comment_symbol
107
+ | start_group_symbol
108
+ | start_option_symbol
109
+ | start_repeat_symbol
110
+ | terminator_symbol
111
+ | other_character
112
+ ;
113
+
114
+ other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&'
115
+ | '#' | '$' | '<' | '>' | '\' | '^' | '`'
116
+ | '~' ;
117
+
118
+ empty = ;
119
+
120
+ concatenate_symbol = ',' ;
121
+ repetition_symbol = '*' ;
122
+ except_symbol = '-' ;
123
+ first_quote_symbol = "'" ;
124
+ second_quote_symbol = '"' ;
125
+ start_comment_symbol = '(*' ;
126
+ end_comment_symbol = '*)' ;
127
+ start_group_symbol = '(' ;
128
+ end_group_symbol = ')' ;
129
+ special_sequence_symbol = '?' ;
130
+
131
+ (* Simple terminals that are often extended *)
132
+ defining_symbol = '=' | ':' ;
133
+ definition_separator_symbol = '|' | '/' | '!' ;
134
+ terminator_symbol = ';' | '.' ;
135
+ start_option_symbol = '[' ;
136
+ end_option_symbol = ']' ;
137
+ start_repeat_symbol = '{' | '(:' ;
138
+ end_repeat_symbol = '}' | ':)' ;
@@ -0,0 +1,65 @@
1
+ (
2
+ (rule syntax (star syntax_rule))
3
+ (rule syntax_rule
4
+ (seq meta_identifier defining_symbol definitions_list terminator_symbol))
5
+ (rule definitions_list
6
+ (seq single_definition (star (seq definition_separator_symbol definitions_list))))
7
+ (rule single_definition (seq term (star (seq "," term))))
8
+ (rule term (seq factor (opt (seq "-" exception))))
9
+ (rule exception (seq factor))
10
+ (rule factor (seq (opt (seq integer "*")) primary))
11
+ (rule primary
12
+ (alt optional_sequence repeated_sequence special_sequence grouped_sequence
13
+ meta_identifier terminal_string empty ))
14
+ (rule optional_sequence
15
+ (seq start_option_symbol definitions_list end_option_symbol))
16
+ (rule repeated_sequence
17
+ (seq start_repeat_symbol definitions_list end_repeat_symbol))
18
+ (rule grouped_sequence (seq "(" definitions_list ")"))
19
+ (terminals _terminals (seq))
20
+ (terminal terminal_string
21
+ (alt
22
+ (seq "'" (plus first_terminal_character) "'")
23
+ (seq "\"" (plus second_terminal_character) "\"")) )
24
+ (terminal meta_identifier (seq letter (star meta_identifier_character)))
25
+ (terminal integer (plus decimal_digit))
26
+ (terminal special_sequence (seq "?" (star special_sequence_character) "?"))
27
+ (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol))
28
+ (terminal comment_symbol (alt comment commentless_symbol other_character))
29
+ (terminal commentless_symbol
30
+ (alt terminal_character meta_identifier integer terminal_string special_sequence))
31
+ (terminal letter (range "a-zA-Z"))
32
+ (terminal decimal_digit (range "0-9"))
33
+ (terminal meta_identifier_character (alt letter decimal_digit "_"))
34
+ (terminal first_terminal_character (diff terminal_character "'"))
35
+ (terminal second_terminal_character (diff terminal_character "\""))
36
+ (terminal special_sequence_character (diff terminal_character "?"))
37
+ (terminal terminal_character
38
+ (alt letter decimal_digit concatenate_symbol defining_symbol
39
+ definition_separator_symbol end_comment_symbol end_group_symbol
40
+ end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
41
+ repetition_symbol second_quote_symbol special_sequence_symbol
42
+ start_comment_symbol start_group_symbol start_option_symbol
43
+ start_repeat_symbol terminator_symbol other_character ))
44
+ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\"))
45
+ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
46
+ (pass _pass (alt (plus gap_separator) comment))
47
+ (terminal empty (seq ""))
48
+ (terminal concatenate_symbol (seq ","))
49
+ (terminal repetition_symbol (seq "*"))
50
+ (terminal except_symbol (seq "-"))
51
+ (terminal first_quote_symbol (seq "'"))
52
+ (terminal second_quote_symbol (seq "\""))
53
+ (terminal start_comment_symbol (seq "(*"))
54
+ (terminal end_comment_symbol (seq "*)"))
55
+ (terminal start_group_symbol (seq "("))
56
+ (terminal end_group_symbol (seq ")"))
57
+ (terminal special_sequence_symbol (seq "?"))
58
+ (terminal defining_symbol (alt "=" ":"))
59
+ (terminal definition_separator_symbol (alt "|" "/" "!"))
60
+ (terminal terminator_symbol (alt ";" "."))
61
+ (terminal start_option_symbol (seq "["))
62
+ (terminal end_option_symbol (seq "]"))
63
+ (terminal start_repeat_symbol (alt "{" "(:"))
64
+ (terminal end_repeat_symbol (alt "}" ":)"))
65
+ (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)))
@@ -243,13 +243,13 @@
243
243
 
244
244
  @terminals
245
245
 
246
- [139] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
246
+ [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
247
247
  [140] PNAME_NS ::= PN_PREFIX? ':'
248
248
  [141] PNAME_LN ::= PNAME_NS PN_LOCAL
249
249
  [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
250
250
  [143] VAR1 ::= '?' VARNAME
251
251
  [144] VAR2 ::= '$' VARNAME
252
- [145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
252
+ [145] LANGTAG ::= '@' ([a-zA-Z])+ ('-' ([a-zA-Z0-9])+)*
253
253
  [146] INTEGER ::= [0-9]+
254
254
  [147] DECIMAL ::= [0-9]* '.' [0-9]+
255
255
  [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT
@@ -269,7 +269,7 @@
269
269
  [161] NIL ::= '(' WS* ')'
270
270
  [162] WS ::= #x20 | #x9 | #xD | #xA
271
271
  [163] ANON ::= '[' WS* ']'
272
- [164] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6]
272
+ [164] PN_CHARS_BASE ::= [A-Za-z] | [#x00C0-#x00D6]
273
273
  | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D]
274
274
  | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F]
275
275
  | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF]
@@ -283,6 +283,6 @@
283
283
  [169] PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
284
284
  [170] PLX ::= PERCENT | PN_LOCAL_ESC
285
285
  [171] PERCENT ::= '%' HEX HEX
286
- [172] HEX ::= [0-9] | [A-F] | [a-f]
286
+ [172] HEX ::= [0-9A-Fa-f]
287
287
  [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
288
288
  | '/' | '?' | '#' | '@' | '%' )
@@ -282,7 +282,9 @@
282
282
  (rule iri "136" (alt IRIREF PrefixedName))
283
283
  (rule PrefixedName "137" (alt PNAME_LN PNAME_NS))
284
284
  (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON))
285
- (terminal IRIREF "139" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
285
+ (terminals _terminals (seq))
286
+ (terminal IRIREF "139"
287
+ (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">"))
286
288
  (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":"))
287
289
  (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL))
288
290
  (terminal BLANK_NODE_LABEL "142"
@@ -310,17 +312,16 @@
310
312
  (terminal STRING_LITERAL2 "157"
311
313
  (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\""))
312
314
  (terminal STRING_LITERAL_LONG1 "158"
313
- (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR ))* \"'''\""))))
315
+ (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''"))
314
316
  (terminal STRING_LITERAL_LONG2 "159"
315
- (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR ))* '\"\"\"'"))))
316
- (terminal ECHAR "160" (seq "\\" (range "tbnrf\"'")))
317
+ (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\""))
318
+ (terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'")))
317
319
  (terminal NIL "161" (seq "(" (star WS) ")"))
318
320
  (terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
319
321
  (terminal ANON "163" (seq "[" (star WS) "]"))
320
322
  (terminal PN_CHARS_BASE "164"
321
323
  (alt
322
- (range "A-Z")
323
- (range "a-z")
324
+ (range "A-Za-z")
324
325
  (range "#x00C0-#x00D6")
325
326
  (range "#x00D8-#x00F6")
326
327
  (range "#x00F8-#x02FF")
@@ -355,7 +356,7 @@
355
356
  (seq (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
356
357
  (terminal PLX "170" (alt PERCENT PN_LOCAL_ESC))
357
358
  (terminal PERCENT "171" (seq "%" HEX HEX))
358
- (terminal HEX "172" (alt (range "0-9") (range "A-F") (range "a-f")))
359
+ (terminal HEX "172" (range "0-9A-Fa-f"))
359
360
  (terminal PN_LOCAL_ESC "173"
360
361
  (seq "\\"
361
362
  (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
@@ -26,11 +26,11 @@
26
26
 
27
27
  @terminals
28
28
 
29
- [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
29
+ [18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'
30
30
  [139s] PNAME_NS ::= PN_PREFIX? ":"
31
31
  [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
32
32
  [141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
33
- [144s] LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )*
33
+ [144s] LANGTAG ::= "@" ([a-zA-Z])+ ( "-" ([a-zA-Z0-9])+ )*
34
34
  [19] INTEGER ::= [+-]? [0-9]+
35
35
  [20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
36
36
  [21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
@@ -65,6 +65,6 @@
65
65
  [168s] PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
66
66
  [169s] PLX ::= PERCENT | PN_LOCAL_ESC
67
67
  [170s] PERCENT ::= '%' HEX HEX
68
- [171s] HEX ::= [0-9] | [A-F] | [a-f]
68
+ [171s] HEX ::= [0-9A-Fa-f]
69
69
  [172s] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
70
70
  | '/' | '?' | '#' | '@' | '%' )
@@ -4,6 +4,8 @@
4
4
  (rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
5
5
  (rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
6
6
  (rule base "5" (seq "@base" IRIREF "."))
7
+ (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
8
+ (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
7
9
  (rule triples "6"
8
10
  (alt
9
11
  (seq subject predicateObjectList)
@@ -19,10 +21,23 @@
19
21
  (rule blankNodePropertyList "14" (seq "[" predicateObjectList "]"))
20
22
  (rule collection "15" (seq "(" (star object) ")"))
21
23
  (rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
24
+ (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
25
+ (rule BooleanLiteral "133s" (alt "true" "false"))
22
26
  (rule String "17"
23
27
  (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
24
28
  STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
25
- (terminal IRIREF "18" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
29
+ (rule iri "135s" (alt IRIREF PrefixedName))
30
+ (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
31
+ (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
32
+ (terminals _terminals (seq))
33
+ (terminal IRIREF "18"
34
+ (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">"))
35
+ (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
36
+ (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
37
+ (terminal BLANK_NODE_LABEL "141s"
38
+ (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
39
+ (terminal LANGTAG "144s"
40
+ (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
26
41
  (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
27
42
  (terminal DECIMAL "20"
28
43
  (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
@@ -33,34 +48,21 @@
33
48
  (seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
34
49
  (seq "." (plus (range "0-9")) EXPONENT)
35
50
  (seq (plus (range "0-9")) EXPONENT)) ))
51
+ (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
36
52
  (terminal STRING_LITERAL_QUOTE "22"
37
53
  (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
38
54
  (terminal STRING_LITERAL_SINGLE_QUOTE "23"
39
55
  (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
40
56
  (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
41
- (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\""))))
57
+ (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''"))
42
58
  (terminal STRING_LITERAL_LONG_QUOTE "25"
43
- (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'"))))
59
+ (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\""))
44
60
  (terminal UCHAR "26"
45
- (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
46
- (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
61
+ (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX)))
62
+ (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'")))
47
63
  (terminal SPARQL_PREFIX "28t"
48
64
  (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
49
65
  (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))
50
- (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
51
- (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
52
- (rule BooleanLiteral "133s" (alt "true" "false"))
53
- (rule iri "135s" (alt IRIREF PrefixedName))
54
- (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
55
- (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
56
- (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
57
- (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
58
- (terminal BLANK_NODE_LABEL "141s"
59
- (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
60
- (terminal LANGTAG "144s"
61
- (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
62
- (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
63
- (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
64
66
  (terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
65
67
  (terminal ANON "162s" (seq "[" (star WS) "]"))
66
68
  (terminal PN_CHARS_BASE "163s"
@@ -94,7 +96,7 @@
94
96
  (opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
95
97
  (terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
96
98
  (terminal PERCENT "170s" (seq "%" HEX HEX))
97
- (terminal HEX "171s" (alt (range "0-9") (range "A-F") (range "a-f")))
99
+ (terminal HEX "171s" (range "0-9A-Fa-f"))
98
100
  (terminal PN_LOCAL_ESC "172s"
99
101
  (seq "\\"
100
102
  (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
@@ -1,7 +1,10 @@
1
1
  module EBNF
2
+ autoload :ABNF, "ebnf/abnf"
2
3
  autoload :Base, "ebnf/base"
3
4
  autoload :BNF, "ebnf/bnf"
5
+ autoload :ISOEBNF, "ebnf/isoebnf"
4
6
  autoload :LL1, "ebnf/ll1"
7
+ autoload :Native, "ebnf/native"
5
8
  autoload :Parser, "ebnf/parser"
6
9
  autoload :PEG, "ebnf/peg"
7
10
  autoload :Rule, "ebnf/rule"
@@ -0,0 +1,301 @@
1
+ require_relative 'abnf/core'
2
+ require_relative 'abnf/meta'
3
+ require 'logger'
4
+
5
+ # ABNF parser
6
+ # Parses ABNF into an array of {EBNF::Rule}.
7
+ module EBNF
8
+ class ABNF
9
+ include EBNF::PEG::Parser
10
+
11
+ # Regular expressions for both "Core" and ABNF-specific terminals.
12
+ ALPHA = %r{[\x41-\x5A\x61-\x7A]}
13
+ VCHAR = %r{[\x20-\x7E]}
14
+ WSP = %r{[\x20\x09]}
15
+ CRLF = %r{\x0D?\x0A}
16
+ COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
17
+ C_NL = %r{#{COMMENT}|#{CRLF}}
18
+ C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
19
+
20
+ ##
21
+ # Hash of generated {EBNF::Rule} objects by symbol
22
+ #
23
+ # @return [Hash{Symbol => EBNF::Rule}]
24
+ attr_reader :parsed_rules
25
+
26
+ ##
27
+ # The following ABNF grammar rules are treated as terminals.
28
+
29
+ # `rulename ::= ALPHA (ALPHA | DIGIT | "-")*`
30
+ terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
31
+ value.to_sym
32
+ end
33
+
34
+ # `defined_as ::= c_wsp* ("=" | "=/") c_wsp*`
35
+ terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
36
+
37
+ # `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE`
38
+ terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
39
+ value[1..-2]
40
+ end
41
+
42
+ # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?`
43
+ terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
44
+ if value.include?('.')
45
+ # Interpret segments in binary creating a sequence of hex characters or a string
46
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
47
+ elsif value.include?('-')
48
+ # Interpret as a range
49
+ [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
50
+ else
51
+ # Interpret as a single HEX character
52
+ [:hex, "#x%x" % value[1..-1].to_i(base=2)]
53
+ end
54
+ end
55
+
56
+ # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?`
57
+ terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
58
+ if value.include?('.')
59
+ # Interpret segments in decimal creating a sequence of hex characters or a string
60
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
61
+ elsif value.include?('-')
62
+ # Interpret as a range
63
+ [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
64
+ else
65
+ # Interpret as a single HEX character
66
+ [:hex, "#x%x" % value[1..-1].to_i]
67
+ end
68
+ end
69
+
70
+ # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?`
71
+ terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
72
+ if value.include?('.')
73
+ # Interpret segments in hexadecimal creating a sequence of hex characters or a string
74
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
75
+ elsif value.include?('-')
76
+ # Interpret as a range
77
+ [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
78
+ else
79
+ # Interpret as a single HEX character
80
+ [:hex, "#x#{value[1..-1]}"]
81
+ end
82
+ end
83
+
84
+ # `c_wsp ::= WSP | (c_nl WSP)`
85
+ terminal(:c_wsp, C_WSP)
86
+
87
+ # `c_nl ::= comment | CRLF`
88
+ terminal(:c_nl, C_NL)
89
+
90
+ # `DIGIT ::= [#x30-#x39]`
91
+ terminal(:DIGIT, /\d/)
92
+
93
+ # ## Non-terminal productions
94
+
95
+ # The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes.
96
+ start_production(:rule, as_hash: true)
97
+
98
+ # `rule ::= rulename defined_as elements c_nl`
99
+ production(:rule) do |value|
100
+ # value contains an expression.
101
+ # Invoke callback
102
+ sym = value[:rulename]
103
+ elements = value[:elements]
104
+
105
+ if value[:defined_as] == "=/"
106
+ # append to rule alternate
107
+ rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
108
+ rule.expr = [:alt, rule.expr] unless rule.alt?
109
+ if elements.is_a?(Array) && elements.first == :alt
110
+ # append alternatives to rule
111
+ rule.expr.concat(elements[1..-1])
112
+ else
113
+ # add elements as last alternative
114
+ rule.expr.push(elements)
115
+ end
116
+ else
117
+ # There shouldn't be an existing rule
118
+ raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
119
+ parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
120
+ end
121
+ progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
122
+ sym
123
+ end
124
+
125
+ # `elements ::= alternation c_wsp*`
126
+ production(:elements) do |value|
127
+ value.first[:alternation]
128
+ end
129
+
130
+ # `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*`
131
+ production(:alternation) do |value|
132
+ unless value.last[:_alternation_1].empty?
133
+ [:alt, value.first[:concatenation]] + value.last[:_alternation_1]
134
+ else
135
+ value.first[:concatenation]
136
+ end
137
+ end
138
+
139
+ # The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule.
140
+ production(:_alternation_2) do |value|
141
+ if Array(value.last[:concatenation]).first == :alt
142
+ value.last[:concatenation][1..-1]
143
+ else
144
+ [value.last[:concatenation]]
145
+ end
146
+ value.last[:concatenation]
147
+ end
148
+
149
+ # `concatenation::= repetition (c_wsp+ repetition)*`
150
+ production(:concatenation) do |value|
151
+ unless value.last[:_concatenation_1].empty?
152
+ [:seq, value.first[:repetition]] + value.last[:_concatenation_1]
153
+ else
154
+ value.first[:repetition]
155
+ end
156
+ end
157
+ start_production(:_concatenation_2, as_hash: true)
158
+ production(:_concatenation_2) do |value|
159
+ value[:repetition]
160
+ end
161
+
162
+ # `repetition ::= repeat? element`
163
+ production(:repetition) do |value|
164
+ rept = value.first[:_repetition_1]
165
+ elt = value.last[:element]
166
+ case rept
167
+ when [0, '*'] then [:star, elt]
168
+ when [1, '*'] then [:plus, elt]
169
+ when nil then elt
170
+ else
171
+ [:rept, rept.first, rept.last, elt]
172
+ end
173
+ end
174
+
175
+ # `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)`
176
+ production(:repeat) do |value|
177
+ if value.is_a?(Integer)
178
+ [value, value]
179
+ else
180
+ [value.first, value.last]
181
+ end
182
+ end
183
+ start_production(:_repeat_1, as_hash: true)
184
+ production(:_repeat_1) {|value| value.values}
185
+ production(:_repeat_2) {|value| value.join("").to_i}
186
+ production(:_repeat_3) {|value| value.join("").to_i}
187
+ production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
188
+
189
+ # `element ::= rulename | group | option | char_val | num_val | prose_val`
190
+ production(:element) do |value|
191
+ value
192
+ end
193
+
194
+ # `group ::= "(" c_wsp* alternation c_wsp* ")"`
195
+ start_production(:group, as_hash: true)
196
+ production(:group) do |value|
197
+ value[:alternation]
198
+ end
199
+
200
+ # `option ::= "[" c_wsp* alternation c_wsp* "]"`
201
+ start_production(:option, as_hash: true)
202
+ production(:option) do |value|
203
+ [:opt, value[:alternation]]
204
+ end
205
+
206
+ # `case_insensitive_string ::= "%i"? quoted_string`
207
+ production(:case_insensitive_string) do |value|
208
+ str = value.last[:quoted_string]
209
+ if str.match?(/[[:alpha:]]/)
210
+ # Only need to use case-insensitive if there are alphabetic characters in the string.
211
+ [:istr, value.last[:quoted_string]]
212
+ else
213
+ value.last[:quoted_string]
214
+ end
215
+ end
216
+
217
+ # `case_sensitive_string ::= "%s" quoted_string`
218
+ production(:case_sensitive_string) do |value|
219
+ value.last[:quoted_string]
220
+ end
221
+
222
+ # `num_val ::= "%" (bin_val | dec_val | hex_val)`
223
+ production(:num_val) do |value|
224
+ value.last[:_num_val_1]
225
+ end
226
+
227
+ # ## Parser invocation.
228
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
229
+ #
230
+ # @param [#read, #to_s] input
231
+ # @param [Hash{Symbol => Object}] options
232
+ # @option options [Boolean] :level
233
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
234
+ # @return [EBNFParser]
235
+ def initialize(input, **options)
236
+ # If the `level` option is set, instantiate a logger for collecting trace information.
237
+ if options.has_key?(:level)
238
+ options[:logger] = Logger.new(STDERR)
239
+ options[:logger].level = options[:level]
240
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
241
+ end
242
+
243
+ # Read input, if necessary, which will be used in a Scanner.
244
+ @input = input.respond_to?(:read) ? input.read : input.to_s
245
+
246
+ @parsed_rules = {}
247
+
248
+ # Parses into `@parsed_rules`
249
+ parse(@input,
250
+ :rulelist, # Starting rule
251
+ ABNFMeta::RULES, # PEG rules
252
+ whitespace: '', # No implicit whitespace
253
+ **options)
254
+ rescue EBNF::PEG::Parser::Error => e
255
+ raise SyntaxError, e.message
256
+ end
257
+
258
+ ##
259
+ # The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.
260
+ #
261
+ # @return [Array<EBNF::Rule>]
262
+ def ast
263
+ # Add built-in rules for standard ABNF rules not
264
+ parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
265
+ rule = ABNFCore::RULES.detect {|r| r.sym == sym}
266
+ parsed_rules[sym] ||= rule if rule
267
+ end
268
+
269
+ parsed_rules.values
270
+ end
271
+
272
+ private
273
+ # Generate a combination of seq and string to represent a sequence of characters
274
+ #
275
+ # @param [Array<String>] characters
276
+ # @return [String,Array]
277
+ def hex_or_string(characters)
278
+ seq = [:seq]
279
+ str_result = ""
280
+ characters.each do |c|
281
+ if VCHAR.match?(c)
282
+ str_result << c
283
+ else
284
+ if str_result.length > 0
285
+ seq << str_result
286
+ str_result = ""
287
+ end
288
+ seq << [:hex, "#x%x" % c.codepoints.first]
289
+ end
290
+ end
291
+ seq << str_result if str_result.length > 0
292
+
293
+ # Either return the sequence, or a string
294
+ if seq.length == 2 && seq.last.is_a?(String)
295
+ seq.last
296
+ else
297
+ seq
298
+ end
299
+ end
300
+ end
301
+ end