ebnf 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,138 @@
1
+ (* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *)
2
+ (* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *)
3
+
4
+ syntax = syntax_rule, {syntax_rule} ;
5
+
6
+ syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol
7
+ (* A <syntax rule> defines the sequences of
8
+ symbols represented by a <meta identifier> *);
9
+
10
+ definitions_list = single_definition, {definition_separator_symbol, definitions_list}
11
+ (* | separates alternative <single definitions> *);
12
+
13
+ single_definition = term, {',', term}
14
+ (* , separates successive <terms> *);
15
+
16
+ term = factor, ['-', exception]
17
+ (* A <term> represents any sequence of symbols that is defined by the <factor> but
18
+ not defined by the <exception> *);
19
+
20
+ exception = factor
21
+ (* A <factor> may be used as an <exception>
22
+ if it could be replaced by a <factor>
23
+ containingno<metaidentifiers> *);
24
+
25
+ factor = [integer, '*'], primary
26
+ (* The <integer> specifies the number of repetitions of the <primary> *);
27
+
28
+ primary = optional_sequence
29
+ | repeated_sequence
30
+ | special_sequence
31
+ | grouped_sequence
32
+ | meta_identifier
33
+ | terminal_string
34
+ | empty
35
+ ;
36
+
37
+ optional_sequence = start_option_symbol, definitions_list, end_option_symbol
38
+ (* The brackets [ and ] enclose symbols which are optional *);
39
+
40
+ repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol
41
+ (* The brackets { and } enclose symbols
42
+ which may be repeated any number of times *);
43
+
44
+ grouped_sequence = '(', definitions_list, ')'
45
+ (* The brackets ( and ) allow any <definitions list> to be a <primary> *);
46
+
47
+ terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'")
48
+ | ('"', second_terminal_character, {second_terminal_character}, '"')
49
+ (* A <terminal string> represents the
50
+ <characters> between the quote symbols '_' or "_" *);
51
+
52
+ meta_identifier = letter, {meta_identifier_character}
53
+ (* A <meta identifier> is the name of a syntactic element of the language being defined *);
54
+
55
+ integer = decimal_digit, {decimal_digit} ;
56
+
57
+ special_sequence = '?', {special_sequence_character}, '?'
58
+ (* The meaning of a <special sequence> is not defined in the standard metalanguage. *);
59
+
60
+ comment = '(*', {comment_symbol}, '*)'
61
+ (* A comment is allowed anywhere outside a
62
+ <terminal string>, <meta identifier>,
63
+ <integer> or <special sequence> *);
64
+
65
+ comment_symbol = comment | commentless_symbol | other_character ;
66
+
67
+ commentless_symbol = terminal_character | meta_identifier | integer
68
+ | terminal_string | special_sequence
69
+ ;
70
+
71
+ letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
72
+ | "H" | "I" | "J" | "K" | "L" | "M" | "N"
73
+ | "O" | "P" | "Q" | "R" | "S" | "T" | "U"
74
+ | "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
75
+ | "c" | "d" | "e" | "f" | "g" | "h" | "i"
76
+ | "j" | "k" | "l" | "m" | "n" | "o" | "p"
77
+ | "q" | "r" | "s" | "t" | "u" | "v" | "w"
78
+ | "x" | "y" | "z"
79
+ ;
80
+
81
+ decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
82
+
83
+ (* Extended to allow '_' *)
84
+ meta_identifier_character = letter | decimal_digit | '_' ;
85
+
86
+ first_terminal_character = terminal_character - "'" ;
87
+
88
+ second_terminal_character = terminal_character - '"' ;
89
+
90
+ special_sequence_character = terminal_character - '?' ;
91
+
92
+ terminal_character = letter
93
+ | decimal_digit
94
+ | concatenate_symbol
95
+ | defining_symbol
96
+ | definition_separator_symbol
97
+ | end_comment_symbol
98
+ | end_group_symbol
99
+ | end_option_symbol
100
+ | end_repeat_symbol
101
+ | except_symbol
102
+ | first_quote_symbol
103
+ | repetition_symbol
104
+ | second_quote_symbol
105
+ | special_sequence_symbol
106
+ | start_comment_symbol
107
+ | start_group_symbol
108
+ | start_option_symbol
109
+ | start_repeat_symbol
110
+ | terminator_symbol
111
+ | other_character
112
+ ;
113
+
114
+ other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&'
115
+ | '#' | '$' | '<' | '>' | '\' | '^' | '`'
116
+ | '~' ;
117
+
118
+ empty = ;
119
+
120
+ concatenate_symbol = ',' ;
121
+ repetition_symbol = '*' ;
122
+ except_symbol = '-' ;
123
+ first_quote_symbol = "'" ;
124
+ second_quote_symbol = '"' ;
125
+ start_comment_symbol = '(*' ;
126
+ end_comment_symbol = '*)' ;
127
+ start_group_symbol = '(' ;
128
+ end_group_symbol = ')' ;
129
+ special_sequence_symbol = '?' ;
130
+
131
+ (* Simple terminals that are often extended *)
132
+ defining_symbol = '=' | ':' ;
133
+ definition_separator_symbol = '|' | '/' | '!' ;
134
+ terminator_symbol = ';' | '.' ;
135
+ start_option_symbol = '[' ;
136
+ end_option_symbol = ']' ;
137
+ start_repeat_symbol = '{' | '(:' ;
138
+ end_repeat_symbol = '}' | ':)' ;
@@ -0,0 +1,65 @@
1
+ (
2
+ (rule syntax (star syntax_rule))
3
+ (rule syntax_rule
4
+ (seq meta_identifier defining_symbol definitions_list terminator_symbol))
5
+ (rule definitions_list
6
+ (seq single_definition (star (seq definition_separator_symbol definitions_list))))
7
+ (rule single_definition (seq term (star (seq "," term))))
8
+ (rule term (seq factor (opt (seq "-" exception))))
9
+ (rule exception (seq factor))
10
+ (rule factor (seq (opt (seq integer "*")) primary))
11
+ (rule primary
12
+ (alt optional_sequence repeated_sequence special_sequence grouped_sequence
13
+ meta_identifier terminal_string empty ))
14
+ (rule optional_sequence
15
+ (seq start_option_symbol definitions_list end_option_symbol))
16
+ (rule repeated_sequence
17
+ (seq start_repeat_symbol definitions_list end_repeat_symbol))
18
+ (rule grouped_sequence (seq "(" definitions_list ")"))
19
+ (terminals _terminals (seq))
20
+ (terminal terminal_string
21
+ (alt
22
+ (seq "'" (plus first_terminal_character) "'")
23
+ (seq "\"" (plus second_terminal_character) "\"")) )
24
+ (terminal meta_identifier (seq letter (star meta_identifier_character)))
25
+ (terminal integer (plus decimal_digit))
26
+ (terminal special_sequence (seq "?" (star special_sequence_character) "?"))
27
+ (terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol))
28
+ (terminal comment_symbol (alt comment commentless_symbol other_character))
29
+ (terminal commentless_symbol
30
+ (alt terminal_character meta_identifier integer terminal_string special_sequence))
31
+ (terminal letter (range "a-zA-Z"))
32
+ (terminal decimal_digit (range "0-9"))
33
+ (terminal meta_identifier_character (alt letter decimal_digit "_"))
34
+ (terminal first_terminal_character (diff terminal_character "'"))
35
+ (terminal second_terminal_character (diff terminal_character "\""))
36
+ (terminal special_sequence_character (diff terminal_character "?"))
37
+ (terminal terminal_character
38
+ (alt letter decimal_digit concatenate_symbol defining_symbol
39
+ definition_separator_symbol end_comment_symbol end_group_symbol
40
+ end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
41
+ repetition_symbol second_quote_symbol special_sequence_symbol
42
+ start_comment_symbol start_group_symbol start_option_symbol
43
+ start_repeat_symbol terminator_symbol other_character ))
44
+ (terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\"))
45
+ (terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
46
+ (pass _pass (alt (plus gap_separator) comment))
47
+ (terminal empty (seq ""))
48
+ (terminal concatenate_symbol (seq ","))
49
+ (terminal repetition_symbol (seq "*"))
50
+ (terminal except_symbol (seq "-"))
51
+ (terminal first_quote_symbol (seq "'"))
52
+ (terminal second_quote_symbol (seq "\""))
53
+ (terminal start_comment_symbol (seq "(*"))
54
+ (terminal end_comment_symbol (seq "*)"))
55
+ (terminal start_group_symbol (seq "("))
56
+ (terminal end_group_symbol (seq ")"))
57
+ (terminal special_sequence_symbol (seq "?"))
58
+ (terminal defining_symbol (alt "=" ":"))
59
+ (terminal definition_separator_symbol (alt "|" "/" "!"))
60
+ (terminal terminator_symbol (alt ";" "."))
61
+ (terminal start_option_symbol (seq "["))
62
+ (terminal end_option_symbol (seq "]"))
63
+ (terminal start_repeat_symbol (alt "{" "(:"))
64
+ (terminal end_repeat_symbol (alt "}" ":)"))
65
+ (terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)))
@@ -243,13 +243,13 @@
243
243
 
244
244
  @terminals
245
245
 
246
- [139] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
246
+ [139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
247
247
  [140] PNAME_NS ::= PN_PREFIX? ':'
248
248
  [141] PNAME_LN ::= PNAME_NS PN_LOCAL
249
249
  [142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
250
250
  [143] VAR1 ::= '?' VARNAME
251
251
  [144] VAR2 ::= '$' VARNAME
252
- [145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
252
+ [145] LANGTAG ::= '@' ([a-zA-Z])+ ('-' ([a-zA-Z0-9])+)*
253
253
  [146] INTEGER ::= [0-9]+
254
254
  [147] DECIMAL ::= [0-9]* '.' [0-9]+
255
255
  [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT
@@ -269,7 +269,7 @@
269
269
  [161] NIL ::= '(' WS* ')'
270
270
  [162] WS ::= #x20 | #x9 | #xD | #xA
271
271
  [163] ANON ::= '[' WS* ']'
272
- [164] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6]
272
+ [164] PN_CHARS_BASE ::= [A-Za-z] | [#x00C0-#x00D6]
273
273
  | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D]
274
274
  | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F]
275
275
  | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF]
@@ -283,6 +283,6 @@
283
283
  [169] PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
284
284
  [170] PLX ::= PERCENT | PN_LOCAL_ESC
285
285
  [171] PERCENT ::= '%' HEX HEX
286
- [172] HEX ::= [0-9] | [A-F] | [a-f]
286
+ [172] HEX ::= [0-9A-Fa-f]
287
287
  [173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
288
288
  | '/' | '?' | '#' | '@' | '%' )
@@ -282,7 +282,9 @@
282
282
  (rule iri "136" (alt IRIREF PrefixedName))
283
283
  (rule PrefixedName "137" (alt PNAME_LN PNAME_NS))
284
284
  (rule BlankNode "138" (alt BLANK_NODE_LABEL ANON))
285
- (terminal IRIREF "139" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
285
+ (terminals _terminals (seq))
286
+ (terminal IRIREF "139"
287
+ (seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">"))
286
288
  (terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":"))
287
289
  (terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL))
288
290
  (terminal BLANK_NODE_LABEL "142"
@@ -310,17 +312,16 @@
310
312
  (terminal STRING_LITERAL2 "157"
311
313
  (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\""))
312
314
  (terminal STRING_LITERAL_LONG1 "158"
313
- (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR ))* \"'''\""))))
315
+ (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''"))
314
316
  (terminal STRING_LITERAL_LONG2 "159"
315
- (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR ))* '\"\"\"'"))))
316
- (terminal ECHAR "160" (seq "\\" (range "tbnrf\"'")))
317
+ (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\""))
318
+ (terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'")))
317
319
  (terminal NIL "161" (seq "(" (star WS) ")"))
318
320
  (terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
319
321
  (terminal ANON "163" (seq "[" (star WS) "]"))
320
322
  (terminal PN_CHARS_BASE "164"
321
323
  (alt
322
- (range "A-Z")
323
- (range "a-z")
324
+ (range "A-Za-z")
324
325
  (range "#x00C0-#x00D6")
325
326
  (range "#x00D8-#x00F6")
326
327
  (range "#x00F8-#x02FF")
@@ -355,7 +356,7 @@
355
356
  (seq (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
356
357
  (terminal PLX "170" (alt PERCENT PN_LOCAL_ESC))
357
358
  (terminal PERCENT "171" (seq "%" HEX HEX))
358
- (terminal HEX "172" (alt (range "0-9") (range "A-F") (range "a-f")))
359
+ (terminal HEX "172" (range "0-9A-Fa-f"))
359
360
  (terminal PN_LOCAL_ESC "173"
360
361
  (seq "\\"
361
362
  (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
@@ -26,11 +26,11 @@
26
26
 
27
27
  @terminals
28
28
 
29
- [18] IRIREF ::= '<' ([^#x00-#x20<>"{}|^`\] | UCHAR)* '>'
29
+ [18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'
30
30
  [139s] PNAME_NS ::= PN_PREFIX? ":"
31
31
  [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
32
32
  [141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
33
- [144s] LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )*
33
+ [144s] LANGTAG ::= "@" ([a-zA-Z])+ ( "-" ([a-zA-Z0-9])+ )*
34
34
  [19] INTEGER ::= [+-]? [0-9]+
35
35
  [20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
36
36
  [21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
@@ -65,6 +65,6 @@
65
65
  [168s] PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
66
66
  [169s] PLX ::= PERCENT | PN_LOCAL_ESC
67
67
  [170s] PERCENT ::= '%' HEX HEX
68
- [171s] HEX ::= [0-9] | [A-F] | [a-f]
68
+ [171s] HEX ::= [0-9A-Fa-f]
69
69
  [172s] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
70
70
  | '/' | '?' | '#' | '@' | '%' )
@@ -4,6 +4,8 @@
4
4
  (rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
5
5
  (rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
6
6
  (rule base "5" (seq "@base" IRIREF "."))
7
+ (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
8
+ (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
7
9
  (rule triples "6"
8
10
  (alt
9
11
  (seq subject predicateObjectList)
@@ -19,10 +21,23 @@
19
21
  (rule blankNodePropertyList "14" (seq "[" predicateObjectList "]"))
20
22
  (rule collection "15" (seq "(" (star object) ")"))
21
23
  (rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
24
+ (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
25
+ (rule BooleanLiteral "133s" (alt "true" "false"))
22
26
  (rule String "17"
23
27
  (alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
24
28
  STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
25
- (terminal IRIREF "18" (seq "<" (range "^#x00-#x20<>\"{}|^`] | UCHAR)* '>'")))
29
+ (rule iri "135s" (alt IRIREF PrefixedName))
30
+ (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
31
+ (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
32
+ (terminals _terminals (seq))
33
+ (terminal IRIREF "18"
34
+ (seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">"))
35
+ (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
36
+ (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
37
+ (terminal BLANK_NODE_LABEL "141s"
38
+ (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
39
+ (terminal LANGTAG "144s"
40
+ (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
26
41
  (terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
27
42
  (terminal DECIMAL "20"
28
43
  (seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
@@ -33,34 +48,21 @@
33
48
  (seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
34
49
  (seq "." (plus (range "0-9")) EXPONENT)
35
50
  (seq (plus (range "0-9")) EXPONENT)) ))
51
+ (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
36
52
  (terminal STRING_LITERAL_QUOTE "22"
37
53
  (seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
38
54
  (terminal STRING_LITERAL_SINGLE_QUOTE "23"
39
55
  (seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
40
56
  (terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
41
- (seq "'''" (seq (opt (alt "'" "''")) (range "^'] | ECHAR | UCHAR ))* \"'''\""))))
57
+ (seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''"))
42
58
  (terminal STRING_LITERAL_LONG_QUOTE "25"
43
- (seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"] | ECHAR | UCHAR ))* '\"\"\"'"))))
59
+ (seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\""))
44
60
  (terminal UCHAR "26"
45
- (alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
46
- (rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
61
+ (alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX)))
62
+ (terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'")))
47
63
  (terminal SPARQL_PREFIX "28t"
48
64
  (seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
49
65
  (terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))
50
- (rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
51
- (rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
52
- (rule BooleanLiteral "133s" (alt "true" "false"))
53
- (rule iri "135s" (alt IRIREF PrefixedName))
54
- (rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
55
- (rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
56
- (terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
57
- (terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
58
- (terminal BLANK_NODE_LABEL "141s"
59
- (seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
60
- (terminal LANGTAG "144s"
61
- (seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
62
- (terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
63
- (terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
64
66
  (terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
65
67
  (terminal ANON "162s" (seq "[" (star WS) "]"))
66
68
  (terminal PN_CHARS_BASE "163s"
@@ -94,7 +96,7 @@
94
96
  (opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
95
97
  (terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
96
98
  (terminal PERCENT "170s" (seq "%" HEX HEX))
97
- (terminal HEX "171s" (alt (range "0-9") (range "A-F") (range "a-f")))
99
+ (terminal HEX "171s" (range "0-9A-Fa-f"))
98
100
  (terminal PN_LOCAL_ESC "172s"
99
101
  (seq "\\"
100
102
  (alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
@@ -1,7 +1,10 @@
1
1
  module EBNF
2
+ autoload :ABNF, "ebnf/abnf"
2
3
  autoload :Base, "ebnf/base"
3
4
  autoload :BNF, "ebnf/bnf"
5
+ autoload :ISOEBNF, "ebnf/isoebnf"
4
6
  autoload :LL1, "ebnf/ll1"
7
+ autoload :Native, "ebnf/native"
5
8
  autoload :Parser, "ebnf/parser"
6
9
  autoload :PEG, "ebnf/peg"
7
10
  autoload :Rule, "ebnf/rule"
@@ -0,0 +1,301 @@
1
+ require_relative 'abnf/core'
2
+ require_relative 'abnf/meta'
3
+ require 'logger'
4
+
5
+ # ABNF parser
6
+ # Parses ABNF into an array of {EBNF::Rule}.
7
+ module EBNF
8
+ class ABNF
9
+ include EBNF::PEG::Parser
10
+
11
+ # Regular expressions for both "Core" and ABNF-specific terminals.
12
+ ALPHA = %r{[\x41-\x5A\x61-\x7A]}
13
+ VCHAR = %r{[\x20-\x7E]}
14
+ WSP = %r{[\x20\x09]}
15
+ CRLF = %r{\x0D?\x0A}
16
+ COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
17
+ C_NL = %r{#{COMMENT}|#{CRLF}}
18
+ C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
19
+
20
+ ##
21
+ # Hash of generated {EBNF::Rule} objects by symbol
22
+ #
23
+ # @return [Hash{Symbol => EBNF::Rule}]
24
+ attr_reader :parsed_rules
25
+
26
+ ##
27
+ # The following ABNF grammar rules are treated as terminals.
28
+
29
+ # `rulename ::= ALPHA (ALPHA | DIGIT | "-")*`
30
+ terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
31
+ value.to_sym
32
+ end
33
+
34
+ # `defined_as ::= c_wsp* ("=" | "=/") c_wsp*`
35
+ terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
36
+
37
+ # `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE`
38
+ terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
39
+ value[1..-2]
40
+ end
41
+
42
+ # `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?`
43
+ terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
44
+ if value.include?('.')
45
+ # Interpret segments in binary creating a sequence of hex characters or a string
46
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
47
+ elsif value.include?('-')
48
+ # Interpret as a range
49
+ [:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
50
+ else
51
+ # Interpret as a single HEX character
52
+ [:hex, "#x%x" % value[1..-1].to_i(base=2)]
53
+ end
54
+ end
55
+
56
+ # `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?`
57
+ terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
58
+ if value.include?('.')
59
+ # Interpret segments in decimal creating a sequence of hex characters or a string
60
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
61
+ elsif value.include?('-')
62
+ # Interpret as a range
63
+ [:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
64
+ else
65
+ # Interpret as a single HEX character
66
+ [:hex, "#x%x" % value[1..-1].to_i]
67
+ end
68
+ end
69
+
70
+ # `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?`
71
+ terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
72
+ if value.include?('.')
73
+ # Interpret segments in hexadecimal creating a sequence of hex characters or a string
74
+ hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
75
+ elsif value.include?('-')
76
+ # Interpret as a range
77
+ [:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
78
+ else
79
+ # Interpret as a single HEX character
80
+ [:hex, "#x#{value[1..-1]}"]
81
+ end
82
+ end
83
+
84
+ # `c_wsp ::= WSP | (c_nl WSP)`
85
+ terminal(:c_wsp, C_WSP)
86
+
87
+ # `c_nl ::= comment | CRLF`
88
+ terminal(:c_nl, C_NL)
89
+
90
+ # `DIGIT ::= [#x30-#x39]`
91
+ terminal(:DIGIT, /\d/)
92
+
93
+ # ## Non-terminal productions
94
+
95
+ # The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes.
96
+ start_production(:rule, as_hash: true)
97
+
98
+ # `rule ::= rulename defined_as elements c_nl`
99
+ production(:rule) do |value|
100
+ # value contains an expression.
101
+ # Invoke callback
102
+ sym = value[:rulename]
103
+ elements = value[:elements]
104
+
105
+ if value[:defined_as] == "=/"
106
+ # append to rule alternate
107
+ rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
108
+ rule.expr = [:alt, rule.expr] unless rule.alt?
109
+ if elements.is_a?(Array) && elements.first == :alt
110
+ # append alternatives to rule
111
+ rule.expr.concat(elements[1..-1])
112
+ else
113
+ # add elements as last alternative
114
+ rule.expr.push(elements)
115
+ end
116
+ else
117
+ # There shouldn't be an existing rule
118
+ raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
119
+ parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
120
+ end
121
+ progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
122
+ sym
123
+ end
124
+
125
+ # `elements ::= alternation c_wsp*`
126
+ production(:elements) do |value|
127
+ value.first[:alternation]
128
+ end
129
+
130
+ # `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*`
131
+ production(:alternation) do |value|
132
+ unless value.last[:_alternation_1].empty?
133
+ [:alt, value.first[:concatenation]] + value.last[:_alternation_1]
134
+ else
135
+ value.first[:concatenation]
136
+ end
137
+ end
138
+
139
+ # The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule.
140
+ production(:_alternation_2) do |value|
141
+ if Array(value.last[:concatenation]).first == :alt
142
+ value.last[:concatenation][1..-1]
143
+ else
144
+ [value.last[:concatenation]]
145
+ end
146
+ value.last[:concatenation]
147
+ end
148
+
149
+ # `concatenation::= repetition (c_wsp+ repetition)*`
150
+ production(:concatenation) do |value|
151
+ unless value.last[:_concatenation_1].empty?
152
+ [:seq, value.first[:repetition]] + value.last[:_concatenation_1]
153
+ else
154
+ value.first[:repetition]
155
+ end
156
+ end
157
+ start_production(:_concatenation_2, as_hash: true)
158
+ production(:_concatenation_2) do |value|
159
+ value[:repetition]
160
+ end
161
+
162
+ # `repetition ::= repeat? element`
163
+ production(:repetition) do |value|
164
+ rept = value.first[:_repetition_1]
165
+ elt = value.last[:element]
166
+ case rept
167
+ when [0, '*'] then [:star, elt]
168
+ when [1, '*'] then [:plus, elt]
169
+ when nil then elt
170
+ else
171
+ [:rept, rept.first, rept.last, elt]
172
+ end
173
+ end
174
+
175
+ # `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)`
176
+ production(:repeat) do |value|
177
+ if value.is_a?(Integer)
178
+ [value, value]
179
+ else
180
+ [value.first, value.last]
181
+ end
182
+ end
183
+ start_production(:_repeat_1, as_hash: true)
184
+ production(:_repeat_1) {|value| value.values}
185
+ production(:_repeat_2) {|value| value.join("").to_i}
186
+ production(:_repeat_3) {|value| value.join("").to_i}
187
+ production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
188
+
189
+ # `element ::= rulename | group | option | char_val | num_val | prose_val`
190
+ production(:element) do |value|
191
+ value
192
+ end
193
+
194
+ # `group ::= "(" c_wsp* alternation c_wsp* ")"`
195
+ start_production(:group, as_hash: true)
196
+ production(:group) do |value|
197
+ value[:alternation]
198
+ end
199
+
200
+ # `option ::= "[" c_wsp* alternation c_wsp* "]"`
201
+ start_production(:option, as_hash: true)
202
+ production(:option) do |value|
203
+ [:opt, value[:alternation]]
204
+ end
205
+
206
+ # `case_insensitive_string ::= "%i"? quoted_string`
207
+ production(:case_insensitive_string) do |value|
208
+ str = value.last[:quoted_string]
209
+ if str.match?(/[[:alpha:]]/)
210
+ # Only need to use case-insensitive if there are alphabetic characters in the string.
211
+ [:istr, value.last[:quoted_string]]
212
+ else
213
+ value.last[:quoted_string]
214
+ end
215
+ end
216
+
217
+ # `case_sensitive_string ::= "%s" quoted_string`
218
+ production(:case_sensitive_string) do |value|
219
+ value.last[:quoted_string]
220
+ end
221
+
222
+ # `num_val ::= "%" (bin_val | dec_val | hex_val)`
223
+ production(:num_val) do |value|
224
+ value.last[:_num_val_1]
225
+ end
226
+
227
+ # ## Parser invocation.
228
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
229
+ #
230
+ # @param [#read, #to_s] input
231
+ # @param [Hash{Symbol => Object}] options
232
+ # @option options [Boolean] :level
233
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
234
+ # @return [EBNFParser]
235
+ def initialize(input, **options)
236
+ # If the `level` option is set, instantiate a logger for collecting trace information.
237
+ if options.has_key?(:level)
238
+ options[:logger] = Logger.new(STDERR)
239
+ options[:logger].level = options[:level]
240
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
241
+ end
242
+
243
+ # Read input, if necessary, which will be used in a Scanner.
244
+ @input = input.respond_to?(:read) ? input.read : input.to_s
245
+
246
+ @parsed_rules = {}
247
+
248
+ # Parses into `@parsed_rules`
249
+ parse(@input,
250
+ :rulelist, # Starting rule
251
+ ABNFMeta::RULES, # PEG rules
252
+ whitespace: '', # No implicit whitespace
253
+ **options)
254
+ rescue EBNF::PEG::Parser::Error => e
255
+ raise SyntaxError, e.message
256
+ end
257
+
258
+ ##
259
+ # The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.
260
+ #
261
+ # @return [Array<EBNF::Rule>]
262
+ def ast
263
+ # Add built-in rules for standard ABNF rules not
264
+ parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
265
+ rule = ABNFCore::RULES.detect {|r| r.sym == sym}
266
+ parsed_rules[sym] ||= rule if rule
267
+ end
268
+
269
+ parsed_rules.values
270
+ end
271
+
272
+ private
273
+ # Generate a combination of seq and string to represent a sequence of characters
274
+ #
275
+ # @param [Array<String>] characters
276
+ # @return [String,Array]
277
+ def hex_or_string(characters)
278
+ seq = [:seq]
279
+ str_result = ""
280
+ characters.each do |c|
281
+ if VCHAR.match?(c)
282
+ str_result << c
283
+ else
284
+ if str_result.length > 0
285
+ seq << str_result
286
+ str_result = ""
287
+ end
288
+ seq << [:hex, "#x%x" % c.codepoints.first]
289
+ end
290
+ end
291
+ seq << str_result if str_result.length > 0
292
+
293
+ # Either return the sequence, or a string
294
+ if seq.length == 2 && seq.last.is_a?(String)
295
+ seq.last
296
+ else
297
+ seq
298
+ end
299
+ end
300
+ end
301
+ end