ebnf 1.1.2 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,86 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :HEX => [:seq, :_alt_1],
|
13
|
+
# :O_RANGE => [:seq, :_alt_1],
|
14
|
+
# :RANGE => [:seq, :_alt_1],
|
15
|
+
# :STRING1 => [:seq, :_alt_1],
|
16
|
+
# :STRING2 => [:seq, :_alt_1],
|
17
|
+
# :SYMBOL => [:seq, :_alt_1],
|
18
|
+
# },
|
19
|
+
# ...
|
20
|
+
# :declaration => {
|
21
|
+
# "@pass" => [:pass],
|
22
|
+
# "@terminals" => ["@terminals"],
|
23
|
+
# },
|
24
|
+
# ...
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
28
|
+
#
|
29
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
30
|
+
#
|
31
|
+
### First/Follow Table
|
32
|
+
#
|
33
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
34
|
+
#
|
35
|
+
# FIRST = {
|
36
|
+
# :alt => [
|
37
|
+
# :HEX,
|
38
|
+
# :SYMBOL,
|
39
|
+
# :RANGE,
|
40
|
+
# :O_RANGE,
|
41
|
+
# :STRING1,
|
42
|
+
# :STRING2,
|
43
|
+
# "("],
|
44
|
+
# ...
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
### Terminals Table
|
48
|
+
#
|
49
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
50
|
+
#
|
51
|
+
# TERMINALS = ["(", ")", "-",
|
52
|
+
# "@pass", "@terminals",
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
54
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
55
|
+
# ].freeze
|
56
|
+
#
|
57
|
+
### Cleanup Table
|
58
|
+
#
|
59
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
60
|
+
#
|
61
|
+
# CLEANUP = {
|
62
|
+
# :_alt_1 => :star,
|
63
|
+
# :_alt_3 => :merge,
|
64
|
+
# :_diff_1 => :opt,
|
65
|
+
# :ebnf => :star,
|
66
|
+
# :_ebnf_2 => :merge,
|
67
|
+
# :_postfix_1 => :opt,
|
68
|
+
# :seq => :plus,
|
69
|
+
# :_seq_1 => :star,
|
70
|
+
# :_seq_2 => :merge,
|
71
|
+
# }.freeze
|
72
|
+
#
|
73
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
74
|
+
#
|
75
|
+
# ebnf ::= _empty _ebnf_2
|
76
|
+
# _ebnf_1 ::= declaration | rule
|
77
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
78
|
+
# _ebnf_3 ::= ebnf
|
79
|
+
#
|
80
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
81
|
+
#
|
82
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
83
|
+
|
2
84
|
module LL1
|
3
85
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
86
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +133,40 @@ module EBNF
|
|
51
133
|
##
|
52
134
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
135
|
#
|
136
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
137
|
+
#
|
138
|
+
# Given an initial rule in EBNF:
|
139
|
+
#
|
140
|
+
# (rule enbf "1" (star declaration rule))
|
141
|
+
#
|
142
|
+
# The BNF transformation becomes:
|
143
|
+
#
|
144
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
145
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
146
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
147
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
148
|
+
#
|
149
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
150
|
+
#
|
151
|
+
# (rule ebnf "1"
|
152
|
+
# (start #t)
|
153
|
+
# (first "@pass" "@terminals" LHS _eps)
|
154
|
+
# (follow _eof)
|
155
|
+
# (cleanup star)
|
156
|
+
# (alt _empty _ebnf_2))
|
157
|
+
# (rule _ebnf_1 "1.1"
|
158
|
+
# (first "@pass" "@terminals" LHS)
|
159
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
160
|
+
# (alt declaration rule))
|
161
|
+
# (rule _ebnf_2 "1.2"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow _eof)
|
164
|
+
# (cleanup merge)
|
165
|
+
# (seq _ebnf_1 ebnf))
|
166
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
167
|
+
#
|
54
168
|
# @return [EBNF] self
|
55
|
-
# @see
|
169
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
170
|
# @param [Array<Symbol>] starts
|
57
171
|
# Set of symbols which are start rules
|
58
172
|
def first_follow(*starts)
|
@@ -96,8 +210,9 @@ module EBNF
|
|
96
210
|
firsts, follows = 0, 0
|
97
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
98
212
|
#
|
99
|
-
# For sequences, this is the first rule in the sequence.
|
100
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
101
216
|
each(:rule) do |ai|
|
102
217
|
# Fi(a w' ) = { a } for every terminal a
|
103
218
|
ai.terminals(ast).each do |t|
|
@@ -168,6 +283,10 @@ module EBNF
|
|
168
283
|
progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
|
169
284
|
ittr += 1
|
170
285
|
end while (firsts + follows) > 0
|
286
|
+
|
287
|
+
debug("Fi.2-post: non-terminals without first") do
|
288
|
+
ast.reject(&:terminal?).reject(&:first).map(&:sym)
|
289
|
+
end if ast.reject(&:terminal?).any? {|r| r.first.nil?}
|
171
290
|
end
|
172
291
|
end
|
173
292
|
|
@@ -216,7 +335,7 @@ module EBNF
|
|
216
335
|
@branch = {}
|
217
336
|
@already = []
|
218
337
|
@agenda = []
|
219
|
-
@starts.each do |start|
|
338
|
+
Array(@starts).each do |start|
|
220
339
|
do_production(start)
|
221
340
|
while !@agenda.empty?
|
222
341
|
x = @agenda.shift
|
@@ -265,13 +384,26 @@ module EBNF
|
|
265
384
|
end
|
266
385
|
end
|
267
386
|
io.puts "#{ind0}}.freeze\n"
|
268
|
-
|
387
|
+
elsif table
|
269
388
|
io.puts "#{ind0}#{name} = [\n#{ind1}" +
|
270
389
|
table.sort_by{|t| t.to_s.sub(/^_/, '')}.map(&:inspect).join(",\n#{ind1}") +
|
271
390
|
"\n#{ind0}].freeze\n"
|
272
391
|
end
|
273
392
|
end
|
274
393
|
|
394
|
+
##
|
395
|
+
# Output Ruby parser files for LL(1) parsing
|
396
|
+
#
|
397
|
+
# @param [IO, StringIO] output
|
398
|
+
def to_ruby_ll1(output, **options)
|
399
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
400
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
401
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
402
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
403
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
404
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
405
|
+
end
|
406
|
+
|
275
407
|
private
|
276
408
|
def do_production(lhs)
|
277
409
|
rule = find_rule(lhs)
|
@@ -287,16 +419,16 @@ module EBNF
|
|
287
419
|
|
288
420
|
if rule.expr.first == :matches
|
289
421
|
debug("prod") {"Rule is regexp: #{rule}"}
|
290
|
-
|
291
|
-
error("No record of what token #{lhs} can start with") unless rule.first
|
292
422
|
return
|
293
423
|
end
|
294
424
|
|
425
|
+
error("No record of what token #{lhs.inspect} can start with") unless rule.first
|
426
|
+
|
295
427
|
if rule.alt?
|
296
428
|
# A First/Follow conflict appears when _eps is in the first
|
297
429
|
# of one rule and there is a token in the first and
|
298
430
|
# follow of the same rule
|
299
|
-
if rule.first.include?(:_eps) && !(overlap = ((rule.first & (rule.follow || [])) - [:eps])).empty?
|
431
|
+
if Array(rule.first).include?(:_eps) && !(overlap = ((Array(rule.first) & (rule.follow || [])) - [:eps])).empty?
|
300
432
|
error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
|
301
433
|
end
|
302
434
|
|