ebnf 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
@@ -0,0 +1,138 @@
|
|
1
|
+
(* W3C EBNF for ISO/IEC 14977 : 1996 EBNF *)
|
2
|
+
(* Scoured from https://www.cl.cam.ac.uk/~mgk25/iso-14977.pdf *)
|
3
|
+
|
4
|
+
syntax = syntax_rule, {syntax_rule} ;
|
5
|
+
|
6
|
+
syntax_rule = meta_identifier, defining_symbol, definitions_list, terminator_symbol
|
7
|
+
(* A <syntax rule> defines the sequences of
|
8
|
+
symbols represented by a <meta identifier> *);
|
9
|
+
|
10
|
+
definitions_list = single_definition, {definition_separator_symbol, definitions_list}
|
11
|
+
(* | separates alternative <single definitions> *);
|
12
|
+
|
13
|
+
single_definition = term, {',', term}
|
14
|
+
(* , separates successive <terms> *);
|
15
|
+
|
16
|
+
term = factor, ['-', exception]
|
17
|
+
(* A <term> represents any sequence of symbols that is defined by the <factor> but
|
18
|
+
not defined by the <exception> *);
|
19
|
+
|
20
|
+
exception = factor
|
21
|
+
(* A <factor> may be used as an <exception>
|
22
|
+
if it could be replaced by a <factor>
|
23
|
+
containingno<metaidentifiers> *);
|
24
|
+
|
25
|
+
factor = [integer, '*'], primary
|
26
|
+
(* The <integer> specifies the number of repetitions of the <primary> *);
|
27
|
+
|
28
|
+
primary = optional_sequence
|
29
|
+
| repeated_sequence
|
30
|
+
| special_sequence
|
31
|
+
| grouped_sequence
|
32
|
+
| meta_identifier
|
33
|
+
| terminal_string
|
34
|
+
| empty
|
35
|
+
;
|
36
|
+
|
37
|
+
optional_sequence = start_option_symbol, definitions_list, end_option_symbol
|
38
|
+
(* The brackets [ and ] enclose symbols which are optional *);
|
39
|
+
|
40
|
+
repeated_sequence = start_repeat_symbol, definitions_list, end_repeat_symbol
|
41
|
+
(* The brackets { and } enclose symbols
|
42
|
+
which may be repeated any number of times *);
|
43
|
+
|
44
|
+
grouped_sequence = '(', definitions_list, ')'
|
45
|
+
(* The brackets ( and ) allow any <definitions list> to be a <primary> *);
|
46
|
+
|
47
|
+
terminal_string = ("'", first_terminal_character, {first_terminal_character}, "'")
|
48
|
+
| ('"', second_terminal_character, {second_terminal_character}, '"')
|
49
|
+
(* A <terminal string> represents the
|
50
|
+
<characters> between the quote symbols '_' or "_" *);
|
51
|
+
|
52
|
+
meta_identifier = letter, {meta_identifier_character}
|
53
|
+
(* A <meta identifier> is the name of a syntactic element of the language being defined *);
|
54
|
+
|
55
|
+
integer = decimal_digit, {decimal_digit} ;
|
56
|
+
|
57
|
+
special_sequence = '?', {special_sequence_character}, '?'
|
58
|
+
(* The meaning of a <special sequence> is not defined in the standard metalanguage. *);
|
59
|
+
|
60
|
+
comment = '(*', {comment_symbol}, '*)'
|
61
|
+
(* A comment is allowed anywhere outside a
|
62
|
+
<terminal string>, <meta identifier>,
|
63
|
+
<integer> or <special sequence> *);
|
64
|
+
|
65
|
+
comment_symbol = comment | commentless_symbol | other_character ;
|
66
|
+
|
67
|
+
commentless_symbol = terminal_character | meta_identifier | integer
|
68
|
+
| terminal_string | special_sequence
|
69
|
+
;
|
70
|
+
|
71
|
+
letter = "A" | "B" | "C" | "D" | "E" | "F" | "G"
|
72
|
+
| "H" | "I" | "J" | "K" | "L" | "M" | "N"
|
73
|
+
| "O" | "P" | "Q" | "R" | "S" | "T" | "U"
|
74
|
+
| "V" | "W" | "X" | "Y" | "Z" | "a" | "b"
|
75
|
+
| "c" | "d" | "e" | "f" | "g" | "h" | "i"
|
76
|
+
| "j" | "k" | "l" | "m" | "n" | "o" | "p"
|
77
|
+
| "q" | "r" | "s" | "t" | "u" | "v" | "w"
|
78
|
+
| "x" | "y" | "z"
|
79
|
+
;
|
80
|
+
|
81
|
+
decimal_digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
|
82
|
+
|
83
|
+
(* Extended to allow '_' *)
|
84
|
+
meta_identifier_character = letter | decimal_digit | '_' ;
|
85
|
+
|
86
|
+
first_terminal_character = terminal_character - "'" ;
|
87
|
+
|
88
|
+
second_terminal_character = terminal_character - '"' ;
|
89
|
+
|
90
|
+
special_sequence_character = terminal_character - '?' ;
|
91
|
+
|
92
|
+
terminal_character = letter
|
93
|
+
| decimal_digit
|
94
|
+
| concatenate_symbol
|
95
|
+
| defining_symbol
|
96
|
+
| definition_separator_symbol
|
97
|
+
| end_comment_symbol
|
98
|
+
| end_group_symbol
|
99
|
+
| end_option_symbol
|
100
|
+
| end_repeat_symbol
|
101
|
+
| except_symbol
|
102
|
+
| first_quote_symbol
|
103
|
+
| repetition_symbol
|
104
|
+
| second_quote_symbol
|
105
|
+
| special_sequence_symbol
|
106
|
+
| start_comment_symbol
|
107
|
+
| start_group_symbol
|
108
|
+
| start_option_symbol
|
109
|
+
| start_repeat_symbol
|
110
|
+
| terminator_symbol
|
111
|
+
| other_character
|
112
|
+
;
|
113
|
+
|
114
|
+
other_character = ' ' | ':' | '+' | '_' | '%' | '@' | '&'
|
115
|
+
| '#' | '$' | '<' | '>' | '\' | '^' | '`'
|
116
|
+
| '~' ;
|
117
|
+
|
118
|
+
empty = ;
|
119
|
+
|
120
|
+
concatenate_symbol = ',' ;
|
121
|
+
repetition_symbol = '*' ;
|
122
|
+
except_symbol = '-' ;
|
123
|
+
first_quote_symbol = "'" ;
|
124
|
+
second_quote_symbol = '"' ;
|
125
|
+
start_comment_symbol = '(*' ;
|
126
|
+
end_comment_symbol = '*)' ;
|
127
|
+
start_group_symbol = '(' ;
|
128
|
+
end_group_symbol = ')' ;
|
129
|
+
special_sequence_symbol = '?' ;
|
130
|
+
|
131
|
+
(* Simple terminals that are often extended *)
|
132
|
+
defining_symbol = '=' | ':' ;
|
133
|
+
definition_separator_symbol = '|' | '/' | '!' ;
|
134
|
+
terminator_symbol = ';' | '.' ;
|
135
|
+
start_option_symbol = '[' ;
|
136
|
+
end_option_symbol = ']' ;
|
137
|
+
start_repeat_symbol = '{' | '(:' ;
|
138
|
+
end_repeat_symbol = '}' | ':)' ;
|
data/etc/iso-ebnf.sxp
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
(
|
2
|
+
(rule syntax (star syntax_rule))
|
3
|
+
(rule syntax_rule
|
4
|
+
(seq meta_identifier defining_symbol definitions_list terminator_symbol))
|
5
|
+
(rule definitions_list
|
6
|
+
(seq single_definition (star (seq definition_separator_symbol definitions_list))))
|
7
|
+
(rule single_definition (seq term (star (seq "," term))))
|
8
|
+
(rule term (seq factor (opt (seq "-" exception))))
|
9
|
+
(rule exception (seq factor))
|
10
|
+
(rule factor (seq (opt (seq integer "*")) primary))
|
11
|
+
(rule primary
|
12
|
+
(alt optional_sequence repeated_sequence special_sequence grouped_sequence
|
13
|
+
meta_identifier terminal_string empty ))
|
14
|
+
(rule optional_sequence
|
15
|
+
(seq start_option_symbol definitions_list end_option_symbol))
|
16
|
+
(rule repeated_sequence
|
17
|
+
(seq start_repeat_symbol definitions_list end_repeat_symbol))
|
18
|
+
(rule grouped_sequence (seq "(" definitions_list ")"))
|
19
|
+
(terminals _terminals (seq))
|
20
|
+
(terminal terminal_string
|
21
|
+
(alt
|
22
|
+
(seq "'" (plus first_terminal_character) "'")
|
23
|
+
(seq "\"" (plus second_terminal_character) "\"")) )
|
24
|
+
(terminal meta_identifier (seq letter (star meta_identifier_character)))
|
25
|
+
(terminal integer (plus decimal_digit))
|
26
|
+
(terminal special_sequence (seq "?" (star special_sequence_character) "?"))
|
27
|
+
(terminal comment (seq start_comment_symbol (star comment_symbol) end_comment_symbol))
|
28
|
+
(terminal comment_symbol (alt comment commentless_symbol other_character))
|
29
|
+
(terminal commentless_symbol
|
30
|
+
(alt terminal_character meta_identifier integer terminal_string special_sequence))
|
31
|
+
(terminal letter (range "a-zA-Z"))
|
32
|
+
(terminal decimal_digit (range "0-9"))
|
33
|
+
(terminal meta_identifier_character (alt letter decimal_digit "_"))
|
34
|
+
(terminal first_terminal_character (diff terminal_character "'"))
|
35
|
+
(terminal second_terminal_character (diff terminal_character "\""))
|
36
|
+
(terminal special_sequence_character (diff terminal_character "?"))
|
37
|
+
(terminal terminal_character
|
38
|
+
(alt letter decimal_digit concatenate_symbol defining_symbol
|
39
|
+
definition_separator_symbol end_comment_symbol end_group_symbol
|
40
|
+
end_option_symbol end_repeat_symbol except_symbol first_quote_symbol
|
41
|
+
repetition_symbol second_quote_symbol special_sequence_symbol
|
42
|
+
start_comment_symbol start_group_symbol start_option_symbol
|
43
|
+
start_repeat_symbol terminator_symbol other_character ))
|
44
|
+
(terminal other_character (alt (range ":+_%@&$<>^` ̃#x20#x23") "\\"))
|
45
|
+
(terminal gap_separator (range "#x9#xa#xb#xc#xd#x20"))
|
46
|
+
(pass _pass (alt (plus gap_separator) comment))
|
47
|
+
(terminal empty (seq ""))
|
48
|
+
(terminal concatenate_symbol (seq ","))
|
49
|
+
(terminal repetition_symbol (seq "*"))
|
50
|
+
(terminal except_symbol (seq "-"))
|
51
|
+
(terminal first_quote_symbol (seq "'"))
|
52
|
+
(terminal second_quote_symbol (seq "\""))
|
53
|
+
(terminal start_comment_symbol (seq "(*"))
|
54
|
+
(terminal end_comment_symbol (seq "*)"))
|
55
|
+
(terminal start_group_symbol (seq "("))
|
56
|
+
(terminal end_group_symbol (seq ")"))
|
57
|
+
(terminal special_sequence_symbol (seq "?"))
|
58
|
+
(terminal defining_symbol (alt "=" ":"))
|
59
|
+
(terminal definition_separator_symbol (alt "|" "/" "!"))
|
60
|
+
(terminal terminator_symbol (alt ";" "."))
|
61
|
+
(terminal start_option_symbol (seq "["))
|
62
|
+
(terminal end_option_symbol (seq "]"))
|
63
|
+
(terminal start_repeat_symbol (alt "{" "(:"))
|
64
|
+
(terminal end_repeat_symbol (alt "}" ":)"))
|
65
|
+
(terminal gap_free_symbol (alt (diff terminal_character (range "'\"")) terminal_string)))
|
data/etc/sparql.ebnf
CHANGED
@@ -243,13 +243,13 @@
|
|
243
243
|
|
244
244
|
@terminals
|
245
245
|
|
246
|
-
[139] IRIREF ::= '<' ([
|
246
|
+
[139] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20])* '>'
|
247
247
|
[140] PNAME_NS ::= PN_PREFIX? ':'
|
248
248
|
[141] PNAME_LN ::= PNAME_NS PN_LOCAL
|
249
249
|
[142] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
250
250
|
[143] VAR1 ::= '?' VARNAME
|
251
251
|
[144] VAR2 ::= '$' VARNAME
|
252
|
-
[145] LANGTAG ::= '@' [a-zA-Z]+ ('-' [a-zA-Z0-9]+)*
|
252
|
+
[145] LANGTAG ::= '@' ([a-zA-Z])+ ('-' ([a-zA-Z0-9])+)*
|
253
253
|
[146] INTEGER ::= [0-9]+
|
254
254
|
[147] DECIMAL ::= [0-9]* '.' [0-9]+
|
255
255
|
[148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT
|
@@ -269,7 +269,7 @@
|
|
269
269
|
[161] NIL ::= '(' WS* ')'
|
270
270
|
[162] WS ::= #x20 | #x9 | #xD | #xA
|
271
271
|
[163] ANON ::= '[' WS* ']'
|
272
|
-
[164] PN_CHARS_BASE ::= [A-
|
272
|
+
[164] PN_CHARS_BASE ::= [A-Za-z] | [#x00C0-#x00D6]
|
273
273
|
| [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D]
|
274
274
|
| [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F]
|
275
275
|
| [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF]
|
@@ -283,6 +283,6 @@
|
|
283
283
|
[169] PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
284
284
|
[170] PLX ::= PERCENT | PN_LOCAL_ESC
|
285
285
|
[171] PERCENT ::= '%' HEX HEX
|
286
|
-
[172] HEX ::= [0-
|
286
|
+
[172] HEX ::= [0-9A-Fa-f]
|
287
287
|
[173] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
|
288
288
|
| '/' | '?' | '#' | '@' | '%' )
|
data/etc/sparql.sxp
CHANGED
@@ -282,7 +282,9 @@
|
|
282
282
|
(rule iri "136" (alt IRIREF PrefixedName))
|
283
283
|
(rule PrefixedName "137" (alt PNAME_LN PNAME_NS))
|
284
284
|
(rule BlankNode "138" (alt BLANK_NODE_LABEL ANON))
|
285
|
-
(
|
285
|
+
(terminals _terminals (seq))
|
286
|
+
(terminal IRIREF "139"
|
287
|
+
(seq "<" (star (diff (range "^<>\"{}|^`\\") (range "#x00-#x20"))) ">"))
|
286
288
|
(terminal PNAME_NS "140" (seq (opt PN_PREFIX) ":"))
|
287
289
|
(terminal PNAME_LN "141" (seq PNAME_NS PN_LOCAL))
|
288
290
|
(terminal BLANK_NODE_LABEL "142"
|
@@ -310,17 +312,16 @@
|
|
310
312
|
(terminal STRING_LITERAL2 "157"
|
311
313
|
(seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR)) "\""))
|
312
314
|
(terminal STRING_LITERAL_LONG1 "158"
|
313
|
-
(seq "'''" (seq (opt (alt "'" "''")) (range "^'
|
315
|
+
(seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR))) "'''"))
|
314
316
|
(terminal STRING_LITERAL_LONG2 "159"
|
315
|
-
(seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"
|
316
|
-
(terminal ECHAR "160" (seq "\\" (range "tbnrf
|
317
|
+
(seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR))) "\"\"\""))
|
318
|
+
(terminal ECHAR "160" (seq "\\" (range "tbnrf\\\"'")))
|
317
319
|
(terminal NIL "161" (seq "(" (star WS) ")"))
|
318
320
|
(terminal WS "162" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
|
319
321
|
(terminal ANON "163" (seq "[" (star WS) "]"))
|
320
322
|
(terminal PN_CHARS_BASE "164"
|
321
323
|
(alt
|
322
|
-
(range "A-
|
323
|
-
(range "a-z")
|
324
|
+
(range "A-Za-z")
|
324
325
|
(range "#x00C0-#x00D6")
|
325
326
|
(range "#x00D8-#x00F6")
|
326
327
|
(range "#x00F8-#x02FF")
|
@@ -355,7 +356,7 @@
|
|
355
356
|
(seq (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
356
357
|
(terminal PLX "170" (alt PERCENT PN_LOCAL_ESC))
|
357
358
|
(terminal PERCENT "171" (seq "%" HEX HEX))
|
358
|
-
(terminal HEX "172" (
|
359
|
+
(terminal HEX "172" (range "0-9A-Fa-f"))
|
359
360
|
(terminal PN_LOCAL_ESC "173"
|
360
361
|
(seq "\\"
|
361
362
|
(alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
|
data/etc/turtle.ebnf
CHANGED
@@ -26,11 +26,11 @@
|
|
26
26
|
|
27
27
|
@terminals
|
28
28
|
|
29
|
-
[18] IRIREF ::=
|
29
|
+
[18] IRIREF ::= '<' ([^<>"{}|^`\]-[#x00-#x20] | UCHAR)* '>'
|
30
30
|
[139s] PNAME_NS ::= PN_PREFIX? ":"
|
31
31
|
[140s] PNAME_LN ::= PNAME_NS PN_LOCAL
|
32
32
|
[141s] BLANK_NODE_LABEL ::= '_:' ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
|
33
|
-
[144s] LANGTAG ::= "@" [a-zA-Z]+ ( "-" [a-zA-Z0-9]+ )*
|
33
|
+
[144s] LANGTAG ::= "@" ([a-zA-Z])+ ( "-" ([a-zA-Z0-9])+ )*
|
34
34
|
[19] INTEGER ::= [+-]? [0-9]+
|
35
35
|
[20] DECIMAL ::= [+-]? ( ([0-9])* '.' ([0-9])+ )
|
36
36
|
[21] DOUBLE ::= [+-]? ( [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT | ([0-9])+ EXPONENT )
|
@@ -65,6 +65,6 @@
|
|
65
65
|
[168s] PN_LOCAL ::= ( PN_CHARS_U | ':' | [0-9] | PLX ) ( ( PN_CHARS | '.' | ':' | PLX )* ( PN_CHARS | ':' | PLX ) ) ?
|
66
66
|
[169s] PLX ::= PERCENT | PN_LOCAL_ESC
|
67
67
|
[170s] PERCENT ::= '%' HEX HEX
|
68
|
-
[171s] HEX ::= [0-
|
68
|
+
[171s] HEX ::= [0-9A-Fa-f]
|
69
69
|
[172s] PN_LOCAL_ESC ::= '\' ( '_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '='
|
70
70
|
| '/' | '?' | '#' | '@' | '%' )
|
data/etc/turtle.sxp
CHANGED
@@ -4,6 +4,8 @@
|
|
4
4
|
(rule directive "3" (alt prefixID base sparqlPrefix sparqlBase))
|
5
5
|
(rule prefixID "4" (seq "@prefix" PNAME_NS IRIREF "."))
|
6
6
|
(rule base "5" (seq "@base" IRIREF "."))
|
7
|
+
(rule sparqlPrefix "28s" (seq SPARQL_PREFIX PNAME_NS IRIREF))
|
8
|
+
(rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
|
7
9
|
(rule triples "6"
|
8
10
|
(alt
|
9
11
|
(seq subject predicateObjectList)
|
@@ -19,10 +21,23 @@
|
|
19
21
|
(rule blankNodePropertyList "14" (seq "[" predicateObjectList "]"))
|
20
22
|
(rule collection "15" (seq "(" (star object) ")"))
|
21
23
|
(rule NumericLiteral "16" (alt INTEGER DECIMAL DOUBLE))
|
24
|
+
(rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
|
25
|
+
(rule BooleanLiteral "133s" (alt "true" "false"))
|
22
26
|
(rule String "17"
|
23
27
|
(alt STRING_LITERAL_QUOTE STRING_LITERAL_SINGLE_QUOTE
|
24
28
|
STRING_LITERAL_LONG_SINGLE_QUOTE STRING_LITERAL_LONG_QUOTE ))
|
25
|
-
(
|
29
|
+
(rule iri "135s" (alt IRIREF PrefixedName))
|
30
|
+
(rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
|
31
|
+
(rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
|
32
|
+
(terminals _terminals (seq))
|
33
|
+
(terminal IRIREF "18"
|
34
|
+
(seq "<" (star (alt (diff (range "^<>\"{}|^`\\") (range "#x00-#x20")) UCHAR)) ">"))
|
35
|
+
(terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
|
36
|
+
(terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
|
37
|
+
(terminal BLANK_NODE_LABEL "141s"
|
38
|
+
(seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
39
|
+
(terminal LANGTAG "144s"
|
40
|
+
(seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
|
26
41
|
(terminal INTEGER "19" (seq (opt (range "+-")) (plus (range "0-9"))))
|
27
42
|
(terminal DECIMAL "20"
|
28
43
|
(seq (opt (range "+-")) (seq (star (range "0-9")) "." (plus (range "0-9")))))
|
@@ -33,34 +48,21 @@
|
|
33
48
|
(seq (plus (range "0-9")) "." (star (range "0-9")) EXPONENT)
|
34
49
|
(seq "." (plus (range "0-9")) EXPONENT)
|
35
50
|
(seq (plus (range "0-9")) EXPONENT)) ))
|
51
|
+
(terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
|
36
52
|
(terminal STRING_LITERAL_QUOTE "22"
|
37
53
|
(seq "\"" (star (alt (range "^#x22#x5C#xA#xD") ECHAR UCHAR)) "\""))
|
38
54
|
(terminal STRING_LITERAL_SINGLE_QUOTE "23"
|
39
55
|
(seq "'" (star (alt (range "^#x27#x5C#xA#xD") ECHAR UCHAR)) "'"))
|
40
56
|
(terminal STRING_LITERAL_LONG_SINGLE_QUOTE "24"
|
41
|
-
(seq "'''" (seq (opt (alt "'" "''")) (range "^'
|
57
|
+
(seq "'''" (star (seq (opt (alt "'" "''")) (alt (range "^'\\") ECHAR UCHAR))) "'''"))
|
42
58
|
(terminal STRING_LITERAL_LONG_QUOTE "25"
|
43
|
-
(seq "\"\"\"" (seq (opt (alt "\"" "\"\"")) (range "^\"
|
59
|
+
(seq "\"\"\"" (star (seq (opt (alt "\"" "\"\"")) (alt (range "^\"\\") ECHAR UCHAR))) "\"\"\""))
|
44
60
|
(terminal UCHAR "26"
|
45
|
-
(alt (seq "u" HEX HEX HEX HEX) (seq "U" HEX HEX HEX HEX HEX HEX HEX HEX)))
|
46
|
-
(
|
61
|
+
(alt (seq "\\u" HEX HEX HEX HEX) (seq "\\U" HEX HEX HEX HEX HEX HEX HEX HEX)))
|
62
|
+
(terminal ECHAR "159s" (seq "\\" (range "tbnrf\\\"'")))
|
47
63
|
(terminal SPARQL_PREFIX "28t"
|
48
64
|
(seq (range "Pp") (range "Rr") (range "Ee") (range "Ff") (range "Ii") (range "Xx")))
|
49
65
|
(terminal SPARQL_BASE "29t" (seq (range "Bb") (range "Aa") (range "Ss") (range "Ee")))
|
50
|
-
(rule sparqlBase "29s" (seq SPARQL_BASE IRIREF))
|
51
|
-
(rule RDFLiteral "128s" (seq String (opt (alt LANGTAG (seq "^^" iri)))))
|
52
|
-
(rule BooleanLiteral "133s" (alt "true" "false"))
|
53
|
-
(rule iri "135s" (alt IRIREF PrefixedName))
|
54
|
-
(rule PrefixedName "136s" (alt PNAME_LN PNAME_NS))
|
55
|
-
(rule BlankNode "137s" (alt BLANK_NODE_LABEL ANON))
|
56
|
-
(terminal PNAME_NS "139s" (seq (opt PN_PREFIX) ":"))
|
57
|
-
(terminal PNAME_LN "140s" (seq PNAME_NS PN_LOCAL))
|
58
|
-
(terminal BLANK_NODE_LABEL "141s"
|
59
|
-
(seq "_:" (alt PN_CHARS_U (range "0-9")) (opt (seq (star (alt PN_CHARS ".")) PN_CHARS))))
|
60
|
-
(terminal LANGTAG "144s"
|
61
|
-
(seq "@" (plus (range "a-zA-Z")) (star (seq "-" (plus (range "a-zA-Z0-9"))))))
|
62
|
-
(terminal EXPONENT "154s" (seq (range "eE") (opt (range "+-")) (plus (range "0-9"))))
|
63
|
-
(terminal ECHAR "159s" (seq "\\" (range "tbnrf\"'")))
|
64
66
|
(terminal WS "161s" (alt (hex "#x20") (hex "#x9") (hex "#xD") (hex "#xA")))
|
65
67
|
(terminal ANON "162s" (seq "[" (star WS) "]"))
|
66
68
|
(terminal PN_CHARS_BASE "163s"
|
@@ -94,7 +96,7 @@
|
|
94
96
|
(opt (seq (star (alt PN_CHARS "." ":" PLX)) (alt PN_CHARS ":" PLX)))) )
|
95
97
|
(terminal PLX "169s" (alt PERCENT PN_LOCAL_ESC))
|
96
98
|
(terminal PERCENT "170s" (seq "%" HEX HEX))
|
97
|
-
(terminal HEX "171s" (
|
99
|
+
(terminal HEX "171s" (range "0-9A-Fa-f"))
|
98
100
|
(terminal PN_LOCAL_ESC "172s"
|
99
101
|
(seq "\\"
|
100
102
|
(alt "_" "~" "." "-" "!" "$" "&" "'" "(" ")" "*" "+" "," ";" "=" "/" "?" "#"
|
data/lib/ebnf.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
module EBNF
|
2
|
+
autoload :ABNF, "ebnf/abnf"
|
2
3
|
autoload :Base, "ebnf/base"
|
3
4
|
autoload :BNF, "ebnf/bnf"
|
5
|
+
autoload :ISOEBNF, "ebnf/isoebnf"
|
4
6
|
autoload :LL1, "ebnf/ll1"
|
7
|
+
autoload :Native, "ebnf/native"
|
5
8
|
autoload :Parser, "ebnf/parser"
|
6
9
|
autoload :PEG, "ebnf/peg"
|
7
10
|
autoload :Rule, "ebnf/rule"
|
data/lib/ebnf/abnf.rb
ADDED
@@ -0,0 +1,301 @@
|
|
1
|
+
require_relative 'abnf/core'
|
2
|
+
require_relative 'abnf/meta'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
# ABNF parser
|
6
|
+
# Parses ABNF into an array of {EBNF::Rule}.
|
7
|
+
module EBNF
|
8
|
+
class ABNF
|
9
|
+
include EBNF::PEG::Parser
|
10
|
+
|
11
|
+
# Regular expressions for both "Core" and ABNF-specific terminals.
|
12
|
+
ALPHA = %r{[\x41-\x5A\x61-\x7A]}
|
13
|
+
VCHAR = %r{[\x20-\x7E]}
|
14
|
+
WSP = %r{[\x20\x09]}
|
15
|
+
CRLF = %r{\x0D?\x0A}
|
16
|
+
COMMENT = %r{;(?:#{WSP}|#{VCHAR})*#{CRLF}}
|
17
|
+
C_NL = %r{#{COMMENT}|#{CRLF}}
|
18
|
+
C_WSP = %r{#{WSP}|(?:#{C_NL}#{WSP})}
|
19
|
+
|
20
|
+
##
|
21
|
+
# Hash of generated {EBNF::Rule} objects by symbol
|
22
|
+
#
|
23
|
+
# @return [Hash{Symbol => EBNF::Rule}]
|
24
|
+
attr_reader :parsed_rules
|
25
|
+
|
26
|
+
##
|
27
|
+
# The following ABNF grammar rules are treated as terminals.
|
28
|
+
|
29
|
+
# `rulename ::= ALPHA (ALPHA | DIGIT | "-")*`
|
30
|
+
terminal(:rulename, /#{ALPHA}(?:#{ALPHA}|[0-9-])*/) do |value|
|
31
|
+
value.to_sym
|
32
|
+
end
|
33
|
+
|
34
|
+
# `defined_as ::= c_wsp* ("=" | "=/") c_wsp*`
|
35
|
+
terminal(:defined_as, /#{C_WSP}*=\/?#{C_WSP}*/) {|value| value.strip}
|
36
|
+
|
37
|
+
# `quoted_string::= DQUOTE [#x20-#x21#x23-#x7E]* DQUOTE`
|
38
|
+
terminal(:quoted_string, /"[\x20-\x21\x23-\x7E]*"/) do |value|
|
39
|
+
value[1..-2]
|
40
|
+
end
|
41
|
+
|
42
|
+
# `bin_val ::= "b" BIT+ (("." BIT+)+ | ("-" BIT+))?`
|
43
|
+
terminal(:bin_val, /b[01]+(?:(?:(?:\.[01]+)+)|(?:-[01]+))?/) do |value|
|
44
|
+
if value.include?('.')
|
45
|
+
# Interpret segments in binary creating a sequence of hex characters or a string
|
46
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=2).chr(Encoding::UTF_8)})
|
47
|
+
elsif value.include?('-')
|
48
|
+
# Interpret as a range
|
49
|
+
[:range, value[1..-1].split('-').map {|b| "#x%x" % b.to_i(base=2)}.join("-")]
|
50
|
+
else
|
51
|
+
# Interpret as a single HEX character
|
52
|
+
[:hex, "#x%x" % value[1..-1].to_i(base=2)]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# `dec_val ::= "d" DIGIT+ (("." DIGIT+)+ | ("-" DIGIT+))?`
|
57
|
+
terminal(:dec_val, /d[0-9]+(?:(?:(?:\.[0-9]+)+)|(?:-[0-9]+))?/) do |value|
|
58
|
+
if value.include?('.')
|
59
|
+
# Interpret segments in decimal creating a sequence of hex characters or a string
|
60
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i.chr(Encoding::UTF_8)})
|
61
|
+
elsif value.include?('-')
|
62
|
+
# Interpret as a range
|
63
|
+
[:range, value[1..-1].split('-').map {|d| "#x%x" % d.to_i}.join("-")]
|
64
|
+
else
|
65
|
+
# Interpret as a single HEX character
|
66
|
+
[:hex, "#x%x" % value[1..-1].to_i]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# `hex_val ::= "x" HEXDIG+ (("." HEXDIG+)+ | ("-" HEXDIG+))?`
|
71
|
+
terminal(:hex_val, /x[0-9A-F]+(?:(?:(?:\.[0-9A-F]+)+)|(?:-[0-9A-F]+))?/i) do |value|
|
72
|
+
if value.include?('.')
|
73
|
+
# Interpret segments in hexadecimal creating a sequence of hex characters or a string
|
74
|
+
hex_or_string(value[1..-1].split('.').map {|b| b.to_i(base=16).chr(Encoding::UTF_8)})
|
75
|
+
elsif value.include?('-')
|
76
|
+
# Interpret as a range
|
77
|
+
[:range, value[1..-1].split('-').map {|h| "#x%x" % h.to_i(base=16)}.join("-")]
|
78
|
+
else
|
79
|
+
# Interpret as a single HEX character
|
80
|
+
[:hex, "#x#{value[1..-1]}"]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# `c_wsp ::= WSP | (c_nl WSP)`
|
85
|
+
terminal(:c_wsp, C_WSP)
|
86
|
+
|
87
|
+
# `c_nl ::= comment | CRLF`
|
88
|
+
terminal(:c_nl, C_NL)
|
89
|
+
|
90
|
+
# `DIGIT ::= [#x30-#x39]`
|
91
|
+
terminal(:DIGIT, /\d/)
|
92
|
+
|
93
|
+
# ## Non-terminal productions
|
94
|
+
|
95
|
+
# The `start_production` on `:rule` allows the parser to present the value as a single Hash, rather than an array of individual hashes.
|
96
|
+
start_production(:rule, as_hash: true)
|
97
|
+
|
98
|
+
# `rule ::= rulename defined_as elements c_nl`
|
99
|
+
production(:rule) do |value|
|
100
|
+
# value contains an expression.
|
101
|
+
# Invoke callback
|
102
|
+
sym = value[:rulename]
|
103
|
+
elements = value[:elements]
|
104
|
+
|
105
|
+
if value[:defined_as] == "=/"
|
106
|
+
# append to rule alternate
|
107
|
+
rule = parsed_rules.fetch(sym) {raise "No existing rule found for #{sym}"}
|
108
|
+
rule.expr = [:alt, rule.expr] unless rule.alt?
|
109
|
+
if elements.is_a?(Array) && elements.first == :alt
|
110
|
+
# append alternatives to rule
|
111
|
+
rule.expr.concat(elements[1..-1])
|
112
|
+
else
|
113
|
+
# add elements as last alternative
|
114
|
+
rule.expr.push(elements)
|
115
|
+
end
|
116
|
+
else
|
117
|
+
# There shouldn't be an existing rule
|
118
|
+
raise "Redefining rule #{sym}" if parsed_rules.has_key?(sym)
|
119
|
+
parsed_rules[sym] = EBNF::Rule.new(sym.to_sym, nil, elements)
|
120
|
+
end
|
121
|
+
progress(:rule, level: 2) {parsed_rules[sym].to_sxp}
|
122
|
+
sym
|
123
|
+
end
|
124
|
+
|
125
|
+
# `elements ::= alternation c_wsp*`
|
126
|
+
production(:elements) do |value|
|
127
|
+
value.first[:alternation]
|
128
|
+
end
|
129
|
+
|
130
|
+
# `alternation ::= concatenation (c_wsp* "/" c_wsp* concatenation)*`
|
131
|
+
production(:alternation) do |value|
|
132
|
+
unless value.last[:_alternation_1].empty?
|
133
|
+
[:alt, value.first[:concatenation]] + value.last[:_alternation_1]
|
134
|
+
else
|
135
|
+
value.first[:concatenation]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# The `_aleteration_2` rule comes from the expanded PEG grammar and serves as an opportunity to custommize the values presented to the `aleteration` rule.
|
140
|
+
production(:_alternation_2) do |value|
|
141
|
+
if Array(value.last[:concatenation]).first == :alt
|
142
|
+
value.last[:concatenation][1..-1]
|
143
|
+
else
|
144
|
+
[value.last[:concatenation]]
|
145
|
+
end
|
146
|
+
value.last[:concatenation]
|
147
|
+
end
|
148
|
+
|
149
|
+
# `concatenation::= repetition (c_wsp+ repetition)*`
|
150
|
+
production(:concatenation) do |value|
|
151
|
+
unless value.last[:_concatenation_1].empty?
|
152
|
+
[:seq, value.first[:repetition]] + value.last[:_concatenation_1]
|
153
|
+
else
|
154
|
+
value.first[:repetition]
|
155
|
+
end
|
156
|
+
end
|
157
|
+
start_production(:_concatenation_2, as_hash: true)
|
158
|
+
production(:_concatenation_2) do |value|
|
159
|
+
value[:repetition]
|
160
|
+
end
|
161
|
+
|
162
|
+
# `repetition ::= repeat? element`
|
163
|
+
production(:repetition) do |value|
|
164
|
+
rept = value.first[:_repetition_1]
|
165
|
+
elt = value.last[:element]
|
166
|
+
case rept
|
167
|
+
when [0, '*'] then [:star, elt]
|
168
|
+
when [1, '*'] then [:plus, elt]
|
169
|
+
when nil then elt
|
170
|
+
else
|
171
|
+
[:rept, rept.first, rept.last, elt]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
# `repeat ::= DIGIT+ | (DIGIT* "*" DIGIT*)`
|
176
|
+
production(:repeat) do |value|
|
177
|
+
if value.is_a?(Integer)
|
178
|
+
[value, value]
|
179
|
+
else
|
180
|
+
[value.first, value.last]
|
181
|
+
end
|
182
|
+
end
|
183
|
+
start_production(:_repeat_1, as_hash: true)
|
184
|
+
production(:_repeat_1) {|value| value.values}
|
185
|
+
production(:_repeat_2) {|value| value.join("").to_i}
|
186
|
+
production(:_repeat_3) {|value| value.join("").to_i}
|
187
|
+
production(:_repeat_4) {|value| value.length > 0 ? value.join("").to_i : '*'}
|
188
|
+
|
189
|
+
# `element ::= rulename | group | option | char_val | num_val | prose_val`
|
190
|
+
production(:element) do |value|
|
191
|
+
value
|
192
|
+
end
|
193
|
+
|
194
|
+
# `group ::= "(" c_wsp* alternation c_wsp* ")"`
|
195
|
+
start_production(:group, as_hash: true)
|
196
|
+
production(:group) do |value|
|
197
|
+
value[:alternation]
|
198
|
+
end
|
199
|
+
|
200
|
+
# `option ::= "[" c_wsp* alternation c_wsp* "]"`
|
201
|
+
start_production(:option, as_hash: true)
|
202
|
+
production(:option) do |value|
|
203
|
+
[:opt, value[:alternation]]
|
204
|
+
end
|
205
|
+
|
206
|
+
# `case_insensitive_string ::= "%i"? quoted_string`
|
207
|
+
production(:case_insensitive_string) do |value|
|
208
|
+
str = value.last[:quoted_string]
|
209
|
+
if str.match?(/[[:alpha:]]/)
|
210
|
+
# Only need to use case-insensitive if there are alphabetic characters in the string.
|
211
|
+
[:istr, value.last[:quoted_string]]
|
212
|
+
else
|
213
|
+
value.last[:quoted_string]
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
# `case_sensitive_string ::= "%s" quoted_string`
|
218
|
+
production(:case_sensitive_string) do |value|
|
219
|
+
value.last[:quoted_string]
|
220
|
+
end
|
221
|
+
|
222
|
+
# `num_val ::= "%" (bin_val | dec_val | hex_val)`
|
223
|
+
production(:num_val) do |value|
|
224
|
+
value.last[:_num_val_1]
|
225
|
+
end
|
226
|
+
|
227
|
+
# ## Parser invocation.
|
228
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
229
|
+
#
|
230
|
+
# @param [#read, #to_s] input
|
231
|
+
# @param [Hash{Symbol => Object}] options
|
232
|
+
# @option options [Boolean] :level
|
233
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
234
|
+
# @return [EBNFParser]
|
235
|
+
def initialize(input, **options)
|
236
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
237
|
+
if options.has_key?(:level)
|
238
|
+
options[:logger] = Logger.new(STDERR)
|
239
|
+
options[:logger].level = options[:level]
|
240
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
241
|
+
end
|
242
|
+
|
243
|
+
# Read input, if necessary, which will be used in a Scanner.
|
244
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
245
|
+
|
246
|
+
@parsed_rules = {}
|
247
|
+
|
248
|
+
# Parses into `@parsed_rules`
|
249
|
+
parse(@input,
|
250
|
+
:rulelist, # Starting rule
|
251
|
+
ABNFMeta::RULES, # PEG rules
|
252
|
+
whitespace: '', # No implicit whitespace
|
253
|
+
**options)
|
254
|
+
rescue EBNF::PEG::Parser::Error => e
|
255
|
+
raise SyntaxError, e.message
|
256
|
+
end
|
257
|
+
|
258
|
+
##
|
259
|
+
# The AST includes the parsed rules along with built-in rules for ABNF used within the parsed grammar.
|
260
|
+
#
|
261
|
+
# @return [Array<EBNF::Rule>]
|
262
|
+
def ast
|
263
|
+
# Add built-in rules for standard ABNF rules not
|
264
|
+
parsed_rules.values.map(&:symbols).flatten.uniq.each do |sym|
|
265
|
+
rule = ABNFCore::RULES.detect {|r| r.sym == sym}
|
266
|
+
parsed_rules[sym] ||= rule if rule
|
267
|
+
end
|
268
|
+
|
269
|
+
parsed_rules.values
|
270
|
+
end
|
271
|
+
|
272
|
+
private
|
273
|
+
# Generate a combination of seq and string to represent a sequence of characters
|
274
|
+
#
|
275
|
+
# @param [Array<String>] characters
|
276
|
+
# @return [String,Array]
|
277
|
+
def hex_or_string(characters)
|
278
|
+
seq = [:seq]
|
279
|
+
str_result = ""
|
280
|
+
characters.each do |c|
|
281
|
+
if VCHAR.match?(c)
|
282
|
+
str_result << c
|
283
|
+
else
|
284
|
+
if str_result.length > 0
|
285
|
+
seq << str_result
|
286
|
+
str_result = ""
|
287
|
+
end
|
288
|
+
seq << [:hex, "#x%x" % c.codepoints.first]
|
289
|
+
end
|
290
|
+
end
|
291
|
+
seq << str_result if str_result.length > 0
|
292
|
+
|
293
|
+
# Either return the sequence, or a string
|
294
|
+
if seq.length == 2 && seq.last.is_a?(String)
|
295
|
+
seq.last
|
296
|
+
else
|
297
|
+
seq
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|