ebnf 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
@@ -0,0 +1,138 @@
|
|
1
|
+
(* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *)
|
2
|
+
(* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *)
|
3
|
+
|
4
|
+
syntax = syntax_rule, {syntax_rule} ;
|
5
|
+
|
6
|
+
syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol
|
7
|
+
(* A <syntax rule> defines the sequences of
|
8
|
+
symbols represented by a <meta identifier> *);
|
9
|
+
|
10
|
+
definitions_list = single_definition, {definition_separator_symbol, definitions_list}
|
11
|
+
(* | separates alternative <single definitions> *);
|
12
|
+
|
13
|
+
single_definition = term, {',', term}
|
14
|
+
(* , separates successive <terms> *);
|
15
|
+
|
16
|
+
term = factor, ['-', exception]
|
17
|
+
(* A <term> represents any sequence of symbols that is defined by the <factor> but
|
18
|
+
not defined by the <exception> *);
|
19
|
+
|
20
|
+
exception = factor
|
21
|
+
(* A <factor> may be used as an <exception>
|
22
|
+
if it could be replaced by a <factor>
|
23
|
+
containingno<metaidentifiers> *);
|
24
|
+
|
25
|
+
factor = [integer, '*'], primary
|
26
|
+
(* The <integer> specifies the number of repetitions of the <primary> *);
|
27
|
+
|
28
|
+
primary = optional_sequence
|
29
|
+
| repeated_sequence
|
30
|
+
| special_sequence
|
31
|
+
| grouped_sequence
|
32
|
+
| meta_identifier
|
33
|
+
| terminal_string
|
34
|
+
| empty
|
35
|
+
;
|
36
|
+
|
37
|
+
optional_sequence = start_option_symbol, definitions_list, end_option_symbol
|
38
|
+
(* The brackets [ and ] enclose symbols which are optional *);
|
39
|
+
|
40
|
+
repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol
|
41
|
+
(* The brackets { and } enclose symbols
|
42
|
+
which may be repeated any number of times *);
|
43
|
+
|
44
|
+
grouped_sequence = '(', definitions_list, ')'
|
45
|
+
(* The brackets ( and ) allow any <definitions list> to be a <primary> *);
|
46
|
+
|
47
|
+
terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'")
|
48
|
+
| ('"', second_terminal_character, {second_terminal_character}, '"')
|
49
|
+
(* A <terminal string> represents the
|
50
|
+
<characters> between the quote symbols '_' or "_" *);
|
51
|
+
|
52
|
+
meta_identifier = letter, {meta_identifier_character}
|
53
|
+
(* A <meta identifier> is the name of a syntactic element of the language being defined *);
|
54
|
+
|
55
|
+
integer = decimal_digit, {decimal_digit} ;
|
56
|
+
|
57
|
+
special_sequence = '?', {special_sequence_character}, '?'
|
58
|
+
(* The meaning of a <special sequence> is not defined in the standard metalanguage. *);
|
59
|
+
|
60
|
+
comment = '(*', {comment_symbol}, '*)'
|
61
|
+
(* A comment is allowed anywhere outside a
|
62
|
+
<terminal string>, <meta identifier>,
|
63
|
+
<integer> or <special sequence> *);
|
64
|
+
|
65
|
+
comment_symbol = comment | commentless_symbol | other_character ;
|
66
|
+
|
67
|
+
commentless_symbol = terminal_character | meta_identifier | integer
|
68
|
+
| terminal_string | special_sequence
|
69
|
+
;
|
70
|
+
|
71
|
+
letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
|
72
|
+
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
|
73
|
+
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
|
74
|
+
| "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
|
75
|
+
| "c" | "d" | "e" | "f" | "g" | "h" | "i"
|
76
|
+
| "j" | "k" | "l" | "m" | "n" | "o" | "p"
|
77
|
+
| "q" | "r" | "s" | "t" | "u" | "v" | "w"
|
78
|
+
| "x" | "y" | "z"
|
79
|
+
;
|
80
|
+
|
81
|
+
decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
|
82
|
+
|
83
|
+
(* Extended to allow '_' *)
|
84
|
+
meta_identifier_character = letter | decimal_digit | '_' ;
|
85
|
+
|
86
|
+
first_terminal_character = terminal_character - "'" ;
|
87
|
+
|
88
|
+
second_terminal_character = terminal_character - '"' ;
|
89
|
+
|
90
|
+
special_sequence_character = terminal_character - '?' ;
|
91
|
+
|
92
|
+
terminal_character = letter
|
93
|
+
| decimal_digit
|
94
|
+
| concatenate_symbol
|
95
|
+
| defining_symbol
|
96
|
+
| definition_separator_symbol
|
97
|
+
| end_comment_symbol
|
98
|
+
| end_group_symbol
|
99
|
+
| end_option_symbol
|
100
|
+
| end_repeat_symbol
|
101
|
+
| except_symbol
|
102
|
+
| first_quote_symbol
|
103
|
+
| repetition_symbol
|
104
|
+
| second_quote_symbol
|
105
|
+
| special_sequence_symbol
|
106
|
+
| start_comment_symbol
|
107
|
+
| start_group_symbol
|
108
|
+
| start_option_symbol
|
109
|
+
| start_repeat_symbol
|
110
|
+
| terminator_symbol
|
111
|
+
| other_character
|
112
|
+
;
|
113
|
+
|
114
|
+
other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&'
|
115
|
+
| '#' | '$' | '<' | '>' | '\' | '^' | '`'
|
116
|
+
| '~' ;
|
117
|
+
|
118
|
+
empty = ;
|
119
|
+
|
120
|
+
concatenate_symbol = ',' ;
|
121
|
+
repetition_symbol = '*' ;
|
122
|
+
except_symbol = '-' ;
|
123
|
+
first_quote_symbol = "'" ;
|
124
|
+
second_quote_symbol = '"' ;
|
125
|
+
start_comment_symbol = '(*' ;
|
126
|
+
end_comment_symbol = '*)' ;
|
127
|
+
start_group_symbol = '(' ;
|
128
|
+
end_group_symbol = ')' ;
|
129
|
+
special_sequence_symbol = '?' ;
|
130
|
+
|
131
|
+
(* Simple terminals that are often extended *)
|
132
|
+
defining_symbol = '=' | ':' ;
|
133
|
+
definition_separator_symbol = '|' | '/' | '!' ;
|
134
|
+
terminator_symbol = ';' | '.' ;
|
135
|
+
start_option_symbol = '[' ;
|
136
|
+
end_option_symbol = ']' ;
|
137
|
+
start_repeat_symbol = '{' | '(:' ;
|
138
|
+
end_repeat_symbol = '}' | ':)' ;
|
data/etc/iso-ebnf.sxp
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
(
|
2
|
+
(rule syntax (star syntax_rule))
|
3
|
+
(rule syntax_rule
|
4
|
+
(seq meta_identifier defining_symbol definitions_list terminator_symbol))
|
5
|
+
(rule definitions_list
|
6
|
+
(seq single_definition (star (seq definition_separator_symbol definitions_list))))
|
7
|
+
(rule single_definition (seq term (star (seq "," term))))
|
8
|
+
(rule term (seq factor (opt (seq "-" exception))))
|
9
|
+
(rule exception (seq factor))
|
10
|
+
(rule factor (seq (opt (seq integer "*")) primary))
|
11
|
+
(rule primary
|
12
|
+
(alt optional_sequence repeated_sequence special_sequence grouped_sequence
|
13
|
+
meta_identifier terminal_string empty ))
|
14
|
+
(rule optional_sequence
|
15
|
+
(seq start_option_symbol definitions_list end_option_symbol))
|
16
|
+
(rule repeated_sequence
|
17
|
+
(seq start_repeat_symbol definitions_list end_repeat_symbol))
|
18
|
+
(rule grouped_sequence (seq "(" definitions_list ")"))
|
19
|
+
(terminals _terminals (seq))
|
20
|
+
(terminal terminal_string
|
21
|
+
(alt
|
22
|
+
(seq "'" (plus first_terminal_character) "'")
|
23
|
+
(seq "\"" (plus second_terminal_character) "\"")) )
|
24
|
+
(terminal meta_identifier (seq letter (star meta_identifier_character)))
|
25
|
+
(terminal integer (plus decimal_digit))
|
26
|
+
(terminal special_sequence (seq "?" (star special_sequence_character) "?"))
|
27
|
+
(terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol))
|
28
|
+
(terminal comment_symbol (alt comment commentless_symbol other_character))
|
29
|
+
(terminal commentless_symbol
|
30
|
+
(alt terminal_character meta_identifier integer terminal_string special_sequence))
|
31
|
+
(terminal letter (range "a-zA-Z"))
|
32
|
+
(terminal decimal_digit (range "0-9"))
|
33
|
+
(terminal meta_identifier_character (alt letter decimal_digit "_"))
|
34
|
+
(terminal first_terminal_character (diff terminal_character "'"))
|
35
|
+
(terminal second_terminal_character (diff terminal_character "\""))
|
36
|
+
(terminal special_sequence_character (diff terminal_character "?"))
|
37
|
+
(terminal terminal_character
|
38
|
+
(alt letter decimal_digit concatenate_symbol defining_symbol
|
39
|
+
definition_separator_symbol end_comment_symbol end_group_symbol
|
40
|
+
end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
|
41
|
+
repetition_symbol second_quote_symbol special_sequence_symbol
|
42
|
+
start_comment_symbol start_group_symbol start_option_symbol
|
43
|
+
start_repeat_symbol terminator_symbol other_character ))
|
44
|
+
(terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\"))
|
45
|
+
(terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
|
46
|
+
(pass _pass (alt (plus gap_separator) comment))
|
47
|
+
(terminal empty (seq ""))
|
48
|
+
(terminal concatenate_symbol (seq ","))
|
49
|
+
(terminal repetition_symbol (seq "*"))
|
50
|
+
(terminal except_symbol (seq "-"))
|
51
|
+
(terminal first_quote_symbol (seq "'"))
|
52
|
+
(terminal second_quote_symbol (seq "\""))
|
53
|
+
(terminal start_comment_symbol (seq "(*"))
|
54
|
+
(terminal end_comment_symbol (seq "*)"))
|
55
|
+
(terminal start_group_symbol (seq "("))
|
56
|
+
(terminal end_group_symbol (seq ")"))
|
57
|
+
(terminal special_sequence_symbol (seq "?"))
|
58
|
+
(terminal defining_symbol (alt "=" ":"))
|
59
|
+
(terminal definition_separator_symbol (alt "|" "/" "!"))
|
60
|
+
(terminal terminator_symbol (alt ";" "."))
|
61
|
+
(terminal start_option_symbol (seq "["))
|
62
|
+
(terminal end_option_symbol (seq "]"))
|
63
|
+
(terminal start_repeat_symbol (alt "{" "(:"))
|
64
|
+
(terminal end_repeat_symbol (alt "}" ":)"))
|
65
|
+
(terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)))
|
data/etc/sparql.ebnf
CHANGED
@@ -243,13 +243,13 @@
|
|
243
243
|
|
244
244
|
@terminals
|
245
245
|
|
246
|
-
[139] IRIREF ::= '<' ([
|
246
|
+
[139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
|
247
247
|
[140] PNAME_NS ::= PN_PREFIX? ':'
|
248
248
|
[141] PNAME_LN ::= PNAME_NS PN_LOCAL
|
249
249
|
[142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
250
250
|
[143] VAR1 ::= '?' VARNAME
|
251
251
|
[144] VAR2 ::= '$' VARNAME
|
252
|
-
[145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
|
252
|
+
[145] LANGTAG ::= '@' ([a-zA-Z])+ ('-' ([a-zA-Z0-9])+)*
|
253
253
|
[146] INTEGER ::= [0-9]+
|
254
254
|
[147] DECIMAL ::= [0-9]* '.' [0-9]+
|
255
255
|
[148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT
|
@@ -269,7 +269,7 @@
|
|
269
269
|
[161] NIL ::= '(' WS* ')'
|
270
270
|
[162] WS ::= #x20 | #x9 | #xD | #xA
|
271
271
|
[163] ANON ::= '[' WS* ']'
|
272
|
-
[164] PN_CHARS_BASE ::= [A-
|
272
|
+
[164] PN_CHARS_BASE ::= [A-Za-z] | [#x00C0-#x00D6]
|
273
273
|
| [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D]
|
274
274
|
| [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F]
|
275
275
|
| [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF]
|
@@ -283,6 +283,6 @@
|
|
283
283
|
[169] PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
284
284
|
[170] PLX ::= PERCENT | PN_LOCAL_ESC
|
285
285
|
[171] PERCENT ::= '%' HEX HEX
|
286
|
-
[172] HEX ::= [0-
|
286
|
+
[172] HEX ::= [0-9A-Fa-f]
|
287
287
|
[173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
|
288
288
|
| '/' | '?' | '#' | '@' | '%' )
|
data/etc/sparql.sxp
CHANGED
@@ -282,7 +282,9 @@
|
|
282
282
|
(rule iri "136" (alt IRIREF PrefixedName))
|
283
283
|
(rule PrefixedName "137" (alt PNAME_LN PNAME_NS))
|
284
284
|
(rule BlankNode "138" (alt BLANK_NODE_LABEL ANON))
|
285
|
-
(
|
285
|
+
(terminals _terminals (seq))
|
286
|
+
(terminal IRIREF "139"
|
287
|
+
(seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">"))
|
286
288
|
(terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":"))
|
287
289
|
(terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL))
|
288
290
|
(terminal BLANK_NODE_LABEL "142"
|
@@ -310,17 +312,16 @@
|
|
310
312
|
(terminal STRING_LITERAL2 "157"
|
311
313
|
(seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\""))
|
312
314
|
(terminal STRING_LITERAL_LONG1 "158"
|
313
|
-
(seq "'''" (seq (opt (alt "'" "''")) (range "^'
|
315
|
+
(seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''"))
|
314
316
|
(terminal STRING_LITERAL_LONG2 "159"
|
315
|
-
(seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"
|
316
|
-
(terminal ECHAR "160" (seq "\\" (range "tbnrf
|
317
|
+
(seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\""))
|
318
|
+
(terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'")))
|
317
319
|
(terminal NIL "161" (seq "(" (star WS) ")"))
|
318
320
|
(terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
|
319
321
|
(terminal ANON "163" (seq "[" (star WS) "]"))
|
320
322
|
(terminal PN_CHARS_BASE "164"
|
321
323
|
(alt
|
322
|
-
(range "A-
|
323
|
-
(range "a-z")
|
324
|
+
(range "A-Za-z")
|
324
325
|
(range "#x00C0-#x00D6")
|
325
326
|
(range "#x00D8-#x00F6")
|
326
327
|
(range "#x00F8-#x02FF")
|
@@ -355,7 +356,7 @@
|
|
355
356
|
(seq (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
356
357
|
(terminal PLX "170" (alt PERCENT PN_LOCAL_ESC))
|
357
358
|
(terminal PERCENT "171" (seq "%" HEX HEX))
|
358
|
-
(terminal HEX "172" (
|
359
|
+
(terminal HEX "172" (range "0-9A-Fa-f"))
|
359
360
|
(terminal PN_LOCAL_ESC "173"
|
360
361
|
(seq "\\"
|
361
362
|
(alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
|
data/etc/turtle.ebnf
CHANGED
@@ -26,11 +26,11 @@
|
|
26
26
|
|
27
27
|
@terminals
|
28
28
|
|
29
|
-
[18] IRIREF ::=
|
29
|
+
[18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'
|
30
30
|
[139s] PNAME_NS ::= PN_PREFIX? ":"
|
31
31
|
[140s] PNAME_LN ::= PNAME_NS PN_LOCAL
|
32
32
|
[141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
33
|
-
[144s] LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )*
|
33
|
+
[144s] LANGTAG ::= "@" ([a-zA-Z])+ ( "-" ([a-zA-Z0-9])+ )*
|
34
34
|
[19] INTEGER ::= [+-]? [0-9]+
|
35
35
|
[20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
|
36
36
|
[21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
|
@@ -65,6 +65,6 @@
|
|
65
65
|
[168s] PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
|
66
66
|
[169s] PLX ::= PERCENT | PN_LOCAL_ESC
|
67
67
|
[170s] PERCENT ::= '%' HEX HEX
|
68
|
-
[171s] HEX ::= [0-
|
68
|
+
[171s] HEX ::= [0-9A-Fa-f]
|
69
69
|
[172s] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
|
70
70
|
| '/' | '?' | '#' | '@' | '%' )
|
data/etc/turtle.sxp
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
(rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
|
5
5
|
(rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
|
6
6
|
(rule base "5" (seq "@base" IRIREF "."))
|
7
|
+
(rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
|
8
|
+
(rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
|
7
9
|
(rule triples "6"
|
8
10
|
(alt
|
9
11
|
(seq subject predicateObjectList)
|
@@ -19,10 +21,23 @@
|
|
19
21
|
(rule blankNodePropertyList "14" (seq "[" predicateObjectList "]"))
|
20
22
|
(rule collection "15" (seq "(" (star object) ")"))
|
21
23
|
(rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
|
24
|
+
(rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
|
25
|
+
(rule BooleanLiteral "133s" (alt "true" "false"))
|
22
26
|
(rule String "17"
|
23
27
|
(alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
|
24
28
|
STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
|
25
|
-
(
|
29
|
+
(rule iri "135s" (alt IRIREF PrefixedName))
|
30
|
+
(rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
|
31
|
+
(rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
|
32
|
+
(terminals _terminals (seq))
|
33
|
+
(terminal IRIREF "18"
|
34
|
+
(seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">"))
|
35
|
+
(terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
|
36
|
+
(terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
|
37
|
+
(terminal BLANK_NODE_LABEL "141s"
|
38
|
+
(seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
39
|
+
(terminal LANGTAG "144s"
|
40
|
+
(seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
|
26
41
|
(terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
|
27
42
|
(terminal DECIMAL "20"
|
28
43
|
(seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
|
@@ -33,34 +48,21 @@
|
|
33
48
|
(seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
|
34
49
|
(seq "." (plus (range "0-9")) EXPONENT)
|
35
50
|
(seq (plus (range "0-9")) EXPONENT)) ))
|
51
|
+
(terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
|
36
52
|
(terminal STRING_LITERAL_QUOTE "22"
|
37
53
|
(seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
|
38
54
|
(terminal STRING_LITERAL_SINGLE_QUOTE "23"
|
39
55
|
(seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
|
40
56
|
(terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
|
41
|
-
(seq "'''" (seq (opt (alt "'" "''")) (range "^'
|
57
|
+
(seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''"))
|
42
58
|
(terminal STRING_LITERAL_LONG_QUOTE "25"
|
43
|
-
(seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"
|
59
|
+
(seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\""))
|
44
60
|
(terminal UCHAR "26"
|
45
|
-
(alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
|
46
|
-
(
|
61
|
+
(alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX)))
|
62
|
+
(terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'")))
|
47
63
|
(terminal SPARQL_PREFIX "28t"
|
48
64
|
(seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
|
49
65
|
(terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))
|
50
|
-
(rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
|
51
|
-
(rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
|
52
|
-
(rule BooleanLiteral "133s" (alt "true" "false"))
|
53
|
-
(rule iri "135s" (alt IRIREF PrefixedName))
|
54
|
-
(rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
|
55
|
-
(rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
|
56
|
-
(terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
|
57
|
-
(terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
|
58
|
-
(terminal BLANK_NODE_LABEL "141s"
|
59
|
-
(seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
60
|
-
(terminal LANGTAG "144s"
|
61
|
-
(seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
|
62
|
-
(terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
|
63
|
-
(terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
|
64
66
|
(terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
|
65
67
|
(terminal ANON "162s" (seq "[" (star WS) "]"))
|
66
68
|
(terminal PN_CHARS_BASE "163s"
|
@@ -94,7 +96,7 @@
|
|
94
96
|
(opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
|
95
97
|
(terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
|
96
98
|
(terminal PERCENT "170s" (seq "%" HEX HEX))
|
97
|
-
(terminal HEX "171s" (
|
99
|
+
(terminal HEX "171s" (range "0-9A-Fa-f"))
|
98
100
|
(terminal PN_LOCAL_ESC "172s"
|
99
101
|
(seq "\\"
|
100
102
|
(alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
|
data/lib/ebnf.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module EBNF
|
2
|
+
autoload :ABNF, "ebnf/abnf"
|
2
3
|
autoload :Base, "ebnf/base"
|
3
4
|
autoload :BNF, "ebnf/bnf"
|
5
|
+
autoload :ISOEBNF, "ebnf/isoebnf"
|
4
6
|
autoload :LL1, "ebnf/ll1"
|
7
|
+
autoload :Native, "ebnf/native"
|
5
8
|
autoload :Parser, "ebnf/parser"
|
6
9
|
autoload :PEG, "ebnf/peg"
|
7
10
|
autoload :Rule, "ebnf/rule"
|
data/lib/ebnf/abnf.rb
ADDED
@@ -0,0 +1,301 @@
|
|
1
|
+
require_relative 'abnf/core'
|
2
|
+
require_relative 'abnf/meta'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
# ABNF parser
|
6
|
+
# Parses ABNF into an array of {EBNF::Rule}.
|
7
|
+
module EBNF
|
8
|
+
class ABNF
|
9
|
+
include EBNF::PEG::Parser
|
10
|
+
|
11
|
+
# Regular expressions for both "Core" and ABNF-specific terminals.
|
12
|
+
ALPHA = %r{[\x41-\x5A\x61-\x7A]}
|
13
|
+
VCHAR = %r{[\x20-\x7E]}
|
14
|
+
WSP = %r{[\x20\x09]}
|
15
|
+
CRLF = %r{\x0D?\x0A}
|
16
|
+
COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
|
17
|
+
C_NL = %r{#{COMMENT}|#{CRLF}}
|
18
|
+
C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
|
19
|
+
|
20
|
+
##
|
21
|
+
# Hash of generated {EBNF::Rule} objects by symbol
|
22
|
+
#
|
23
|
+
# @return [Hash{Symbol => EBNF::Rule}]
|
24
|
+
attr_reader :parsed_rules
|
25
|
+
|
26
|
+
##
|
27
|
+
# The following ABNF grammar rules are treated as terminals.
|
28
|
+
|
29
|
+
# `rulename ::= ALPHA (ALPHA | DIGIT | "-")*`
|
30
|
+
terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
|
31
|
+
value.to_sym
|
32
|
+
end
|
33
|
+
|
34
|
+
# `defined_as ::= c_wsp* ("=" | "=/") c_wsp*`
|
35
|
+
terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
|
36
|
+
|
37
|
+
# `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE`
|
38
|
+
terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
|
39
|
+
value[1..-2]
|
40
|
+
end
|
41
|
+
|
42
|
+
# `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?`
|
43
|
+
terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
|
44
|
+
if value.include?('.')
|
45
|
+
# Interpret segments in binary creating a sequence of hex characters or a string
|
46
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
|
47
|
+
elsif value.include?('-')
|
48
|
+
# Interpret as a range
|
49
|
+
[:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
|
50
|
+
else
|
51
|
+
# Interpret as a single HEX character
|
52
|
+
[:hex, "#x%x" % value[1..-1].to_i(base=2)]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?`
|
57
|
+
terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
|
58
|
+
if value.include?('.')
|
59
|
+
# Interpret segments in decimal creating a sequence of hex characters or a string
|
60
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
|
61
|
+
elsif value.include?('-')
|
62
|
+
# Interpret as a range
|
63
|
+
[:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
|
64
|
+
else
|
65
|
+
# Interpret as a single HEX character
|
66
|
+
[:hex, "#x%x" % value[1..-1].to_i]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?`
|
71
|
+
terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
|
72
|
+
if value.include?('.')
|
73
|
+
# Interpret segments in hexadecimal creating a sequence of hex characters or a string
|
74
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
|
75
|
+
elsif value.include?('-')
|
76
|
+
# Interpret as a range
|
77
|
+
[:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
|
78
|
+
else
|
79
|
+
# Interpret as a single HEX character
|
80
|
+
[:hex, "#x#{value[1..-1]}"]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# `c_wsp ::= WSP | (c_nl WSP)`
|
85
|
+
terminal(:c_wsp, C_WSP)
|
86
|
+
|
87
|
+
# `c_nl ::= comment | CRLF`
|
88
|
+
terminal(:c_nl, C_NL)
|
89
|
+
|
90
|
+
# `DIGIT ::= [#x30-#x39]`
|
91
|
+
terminal(:DIGIT, /\d/)
|
92
|
+
|
93
|
+
# ## Non-terminal productions
|
94
|
+
|
95
|
+
# The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes.
|
96
|
+
start_production(:rule, as_hash: true)
|
97
|
+
|
98
|
+
# `rule ::= rulename defined_as elements c_nl`
|
99
|
+
production(:rule) do |value|
|
100
|
+
# value contains an expression.
|
101
|
+
# Invoke callback
|
102
|
+
sym = value[:rulename]
|
103
|
+
elements = value[:elements]
|
104
|
+
|
105
|
+
if value[:defined_as] == "=/"
|
106
|
+
# append to rule alternate
|
107
|
+
rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
|
108
|
+
rule.expr = [:alt, rule.expr] unless rule.alt?
|
109
|
+
if elements.is_a?(Array) && elements.first == :alt
|
110
|
+
# append alternatives to rule
|
111
|
+
rule.expr.concat(elements[1..-1])
|
112
|
+
else
|
113
|
+
# add elements as last alternative
|
114
|
+
rule.expr.push(elements)
|
115
|
+
end
|
116
|
+
else
|
117
|
+
# There shouldn't be an existing rule
|
118
|
+
raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
|
119
|
+
parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
|
120
|
+
end
|
121
|
+
progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
|
122
|
+
sym
|
123
|
+
end
|
124
|
+
|
125
|
+
# `elements ::= alternation c_wsp*`
|
126
|
+
production(:elements) do |value|
|
127
|
+
value.first[:alternation]
|
128
|
+
end
|
129
|
+
|
130
|
+
# `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*`
|
131
|
+
production(:alternation) do |value|
|
132
|
+
unless value.last[:_alternation_1].empty?
|
133
|
+
[:alt, value.first[:concatenation]] + value.last[:_alternation_1]
|
134
|
+
else
|
135
|
+
value.first[:concatenation]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule.
|
140
|
+
production(:_alternation_2) do |value|
|
141
|
+
if Array(value.last[:concatenation]).first == :alt
|
142
|
+
value.last[:concatenation][1..-1]
|
143
|
+
else
|
144
|
+
[value.last[:concatenation]]
|
145
|
+
end
|
146
|
+
value.last[:concatenation]
|
147
|
+
end
|
148
|
+
|
149
|
+
# `concatenation::= repetition (c_wsp+ repetition)*`
|
150
|
+
production(:concatenation) do |value|
|
151
|
+
unless value.last[:_concatenation_1].empty?
|
152
|
+
[:seq, value.first[:repetition]] + value.last[:_concatenation_1]
|
153
|
+
else
|
154
|
+
value.first[:repetition]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
start_production(:_concatenation_2, as_hash: true)
|
158
|
+
production(:_concatenation_2) do |value|
|
159
|
+
value[:repetition]
|
160
|
+
end
|
161
|
+
|
162
|
+
# `repetition ::= repeat? element`
|
163
|
+
production(:repetition) do |value|
|
164
|
+
rept = value.first[:_repetition_1]
|
165
|
+
elt = value.last[:element]
|
166
|
+
case rept
|
167
|
+
when [0, '*'] then [:star, elt]
|
168
|
+
when [1, '*'] then [:plus, elt]
|
169
|
+
when nil then elt
|
170
|
+
else
|
171
|
+
[:rept, rept.first, rept.last, elt]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)`
|
176
|
+
production(:repeat) do |value|
|
177
|
+
if value.is_a?(Integer)
|
178
|
+
[value, value]
|
179
|
+
else
|
180
|
+
[value.first, value.last]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
start_production(:_repeat_1, as_hash: true)
|
184
|
+
production(:_repeat_1) {|value| value.values}
|
185
|
+
production(:_repeat_2) {|value| value.join("").to_i}
|
186
|
+
production(:_repeat_3) {|value| value.join("").to_i}
|
187
|
+
production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
|
188
|
+
|
189
|
+
# `element ::= rulename | group | option | char_val | num_val | prose_val`
|
190
|
+
production(:element) do |value|
|
191
|
+
value
|
192
|
+
end
|
193
|
+
|
194
|
+
# `group ::= "(" c_wsp* alternation c_wsp* ")"`
|
195
|
+
start_production(:group, as_hash: true)
|
196
|
+
production(:group) do |value|
|
197
|
+
value[:alternation]
|
198
|
+
end
|
199
|
+
|
200
|
+
# `option ::= "[" c_wsp* alternation c_wsp* "]"`
|
201
|
+
start_production(:option, as_hash: true)
|
202
|
+
production(:option) do |value|
|
203
|
+
[:opt, value[:alternation]]
|
204
|
+
end
|
205
|
+
|
206
|
+
# `case_insensitive_string ::= "%i"? quoted_string`
|
207
|
+
production(:case_insensitive_string) do |value|
|
208
|
+
str = value.last[:quoted_string]
|
209
|
+
if str.match?(/[[:alpha:]]/)
|
210
|
+
# Only need to use case-insensitive if there are alphabetic characters in the string.
|
211
|
+
[:istr, value.last[:quoted_string]]
|
212
|
+
else
|
213
|
+
value.last[:quoted_string]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# `case_sensitive_string ::= "%s" quoted_string`
|
218
|
+
production(:case_sensitive_string) do |value|
|
219
|
+
value.last[:quoted_string]
|
220
|
+
end
|
221
|
+
|
222
|
+
# `num_val ::= "%" (bin_val | dec_val | hex_val)`
|
223
|
+
production(:num_val) do |value|
|
224
|
+
value.last[:_num_val_1]
|
225
|
+
end
|
226
|
+
|
227
|
+
# ## Parser invocation.
|
228
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
229
|
+
#
|
230
|
+
# @param [#read, #to_s] input
|
231
|
+
# @param [Hash{Symbol => Object}] options
|
232
|
+
# @option options [Boolean] :level
|
233
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
234
|
+
# @return [EBNFParser]
|
235
|
+
def initialize(input, **options)
|
236
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
237
|
+
if options.has_key?(:level)
|
238
|
+
options[:logger] = Logger.new(STDERR)
|
239
|
+
options[:logger].level = options[:level]
|
240
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
241
|
+
end
|
242
|
+
|
243
|
+
# Read input, if necessary, which will be used in a Scanner.
|
244
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
245
|
+
|
246
|
+
@parsed_rules = {}
|
247
|
+
|
248
|
+
# Parses into `@parsed_rules`
|
249
|
+
parse(@input,
|
250
|
+
:rulelist, # Starting rule
|
251
|
+
ABNFMeta::RULES, # PEG rules
|
252
|
+
whitespace: '', # No implicit whitespace
|
253
|
+
**options)
|
254
|
+
rescue EBNF::PEG::Parser::Error => e
|
255
|
+
raise SyntaxError, e.message
|
256
|
+
end
|
257
|
+
|
258
|
+
##
|
259
|
+
# The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.
|
260
|
+
#
|
261
|
+
# @return [Array<EBNF::Rule>]
|
262
|
+
def ast
|
263
|
+
# Add built-in rules for standard ABNF rules not
|
264
|
+
parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
|
265
|
+
rule = ABNFCore::RULES.detect {|r| r.sym == sym}
|
266
|
+
parsed_rules[sym] ||= rule if rule
|
267
|
+
end
|
268
|
+
|
269
|
+
parsed_rules.values
|
270
|
+
end
|
271
|
+
|
272
|
+
private
|
273
|
+
# Generate a combination of seq and string to represent a sequence of characters
|
274
|
+
#
|
275
|
+
# @param [Array<String>] characters
|
276
|
+
# @return [String,Array]
|
277
|
+
def hex_or_string(characters)
|
278
|
+
seq = [:seq]
|
279
|
+
str_result = ""
|
280
|
+
characters.each do |c|
|
281
|
+
if VCHAR.match?(c)
|
282
|
+
str_result << c
|
283
|
+
else
|
284
|
+
if str_result.length > 0
|
285
|
+
seq << str_result
|
286
|
+
str_result = ""
|
287
|
+
end
|
288
|
+
seq << [:hex, "#x%x" % c.codepoints.first]
|
289
|
+
end
|
290
|
+
end
|
291
|
+
seq << str_result if str_result.length > 0
|
292
|
+
|
293
|
+
# Either return the sequence, or a string
|
294
|
+
if seq.length == 2 && seq.last.is_a?(String)
|
295
|
+
seq.last
|
296
|
+
else
|
297
|
+
seq
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|