ebnf 1.1.2 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +218 -196
- data/UNLICENSE +1 -1
- data/VERSION +1 -1
- data/bin/ebnf +40 -21
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/doap.ttl +23 -15
- data/etc/ebnf.ebnf +21 -33
- data/etc/ebnf.html +171 -160
- data/etc/{ebnf.rb → ebnf.ll1.rb} +30 -107
- data/etc/ebnf.ll1.sxp +182 -183
- data/etc/ebnf.peg.rb +90 -0
- data/etc/ebnf.peg.sxp +84 -0
- data/etc/ebnf.sxp +40 -41
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.html +1603 -1751
- data/etc/sparql.ll1.sxp +7372 -7372
- data/etc/sparql.peg.rb +532 -0
- data/etc/sparql.peg.sxp +597 -0
- data/etc/sparql.sxp +363 -362
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.html +465 -517
- data/etc/{turtle.rb → turtle.ll1.rb} +3 -4
- data/etc/turtle.ll1.sxp +425 -425
- data/etc/turtle.peg.rb +182 -0
- data/etc/turtle.peg.sxp +199 -0
- data/etc/turtle.sxp +103 -101
- data/lib/ebnf.rb +7 -2
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +128 -87
- data/lib/ebnf/bnf.rb +1 -26
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +140 -8
- data/lib/ebnf/ll1/lexer.rb +37 -32
- data/lib/ebnf/ll1/parser.rb +113 -73
- data/lib/ebnf/ll1/scanner.rb +83 -51
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +39 -0
- data/lib/ebnf/peg/parser.rb +561 -0
- data/lib/ebnf/peg/rule.rb +241 -0
- data/lib/ebnf/rule.rb +453 -163
- data/lib/ebnf/terminals.rb +21 -0
- data/lib/ebnf/writer.rb +561 -88
- metadata +114 -28
- data/etc/sparql.rb +0 -45773
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -1,4 +1,86 @@
|
|
1
1
|
module EBNF
|
2
|
+
##
|
3
|
+
# This module extends {EBNF::Base} to create metadata including _branch_, [First/Follow][], and other tables which is used by {EBNF::LL1::Parser} to recognize examples of the associated grammar.
|
4
|
+
#
|
5
|
+
### Branch Table
|
6
|
+
#
|
7
|
+
# The Branch table is a hash mapping production rules to a hash relating terminals appearing in input to sequence of productions to follow when the corresponding input terminal is found. This allows either the `seq` primitive, where all terminals map to the same sequence of productions, or the `alt` primitive, where each terminal may map to a different production.
|
8
|
+
#
|
9
|
+
# BRANCH = {
|
10
|
+
# :alt => {
|
11
|
+
# "(" => [:seq, :_alt_1],
|
12
|
+
# :HEX => [:seq, :_alt_1],
|
13
|
+
# :O_RANGE => [:seq, :_alt_1],
|
14
|
+
# :RANGE => [:seq, :_alt_1],
|
15
|
+
# :STRING1 => [:seq, :_alt_1],
|
16
|
+
# :STRING2 => [:seq, :_alt_1],
|
17
|
+
# :SYMBOL => [:seq, :_alt_1],
|
18
|
+
# },
|
19
|
+
# ...
|
20
|
+
# :declaration => {
|
21
|
+
# "@pass" => [:pass],
|
22
|
+
# "@terminals" => ["@terminals"],
|
23
|
+
# },
|
24
|
+
# ...
|
25
|
+
# }
|
26
|
+
#
|
27
|
+
# In this case the `alt` rule is `seq ('|' seq)*` can happen when any of the specified tokens appears on the input stream. The all cause the same token to be passed to the `seq` rule and follow with `_alt_1`, which handles the `('|' seq)*` portion of the rule, after the first sequence is matched.
|
28
|
+
#
|
29
|
+
# The `declaration` rule is `@terminals' | pass` using the `alt` primitive determining the production to run based on the terminal appearing on the input stream. Eventually, a terminal production is found and the token is consumed.
|
30
|
+
#
|
31
|
+
### First/Follow Table
|
32
|
+
#
|
33
|
+
# The [First/Follow][] table is a hash mapping production rules to the terminals that may proceed or follow the rule. For example:
|
34
|
+
#
|
35
|
+
# FIRST = {
|
36
|
+
# :alt => [
|
37
|
+
# :HEX,
|
38
|
+
# :SYMBOL,
|
39
|
+
# :RANGE,
|
40
|
+
# :O_RANGE,
|
41
|
+
# :STRING1,
|
42
|
+
# :STRING2,
|
43
|
+
# "("],
|
44
|
+
# ...
|
45
|
+
# }
|
46
|
+
#
|
47
|
+
### Terminals Table
|
48
|
+
#
|
49
|
+
# This table is a simple list of the terminal productions found in the grammar. For example:
|
50
|
+
#
|
51
|
+
# TERMINALS = ["(", ")", "-",
|
52
|
+
# "@pass", "@terminals",
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
54
|
+
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
55
|
+
# ].freeze
|
56
|
+
#
|
57
|
+
### Cleanup Table
|
58
|
+
#
|
59
|
+
# This table identifies productions which used EBNF rules, which are transformed to BNF for actual parsing. This allows the parser, in some cases, to reproduce *star*, *plus*, and *opt* rule matches. For example:
|
60
|
+
#
|
61
|
+
# CLEANUP = {
|
62
|
+
# :_alt_1 => :star,
|
63
|
+
# :_alt_3 => :merge,
|
64
|
+
# :_diff_1 => :opt,
|
65
|
+
# :ebnf => :star,
|
66
|
+
# :_ebnf_2 => :merge,
|
67
|
+
# :_postfix_1 => :opt,
|
68
|
+
# :seq => :plus,
|
69
|
+
# :_seq_1 => :star,
|
70
|
+
# :_seq_2 => :merge,
|
71
|
+
# }.freeze
|
72
|
+
#
|
73
|
+
# In this case the `ebnf` rule was `(declaration | rule)*`. As BNF does not support a star operator, this is decomposed into a set of rules using `alt` and `seq` primitives:
|
74
|
+
#
|
75
|
+
# ebnf ::= _empty _ebnf_2
|
76
|
+
# _ebnf_1 ::= declaration | rule
|
77
|
+
# _ebnf_2 ::= _ebnf_1 ebnf
|
78
|
+
# _ebnf_3 ::= ebnf
|
79
|
+
#
|
80
|
+
# The `_empty` production matches an empty string, so allows for now value. `_ebnf_2` matches `declaration | rule` (using the `alt` primitive) followed by `ebnf`, creating a sequence of zero or more `declaration` or `alt` members.
|
81
|
+
#
|
82
|
+
# [First/Follow]: https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
83
|
+
|
2
84
|
module LL1
|
3
85
|
autoload :Lexer, "ebnf/ll1/lexer"
|
4
86
|
autoload :Parser, "ebnf/ll1/parser"
|
@@ -51,8 +133,40 @@ module EBNF
|
|
51
133
|
##
|
52
134
|
# Create first/follow for each rule using techniques defined for LL(1) parsers.
|
53
135
|
#
|
136
|
+
# This takes rules which have transformed into BNF and adds first/follow and otehr information to the rules to allow the generation of metadata tables used for driving a parser.
|
137
|
+
#
|
138
|
+
# Given an initial rule in EBNF:
|
139
|
+
#
|
140
|
+
# (rule enbf "1" (star declaration rule))
|
141
|
+
#
|
142
|
+
# The BNF transformation becomes:
|
143
|
+
#
|
144
|
+
# (rule ebnf "1" (alt _empty _ebnf_2))
|
145
|
+
# (rule _ebnf_1 "1.1" (alt declaration rule))
|
146
|
+
# (rule _ebnf_2 "1.2" (seq _ebnf_1 ebnf))
|
147
|
+
# (rule _ebnf_3 "1.3" (seq ebnf))
|
148
|
+
#
|
149
|
+
# After running this method, the rules are annotated with first/follow and cleanup rules:
|
150
|
+
#
|
151
|
+
# (rule ebnf "1"
|
152
|
+
# (start #t)
|
153
|
+
# (first "@pass" "@terminals" LHS _eps)
|
154
|
+
# (follow _eof)
|
155
|
+
# (cleanup star)
|
156
|
+
# (alt _empty _ebnf_2))
|
157
|
+
# (rule _ebnf_1 "1.1"
|
158
|
+
# (first "@pass" "@terminals" LHS)
|
159
|
+
# (follow "@pass" "@terminals" LHS _eof)
|
160
|
+
# (alt declaration rule))
|
161
|
+
# (rule _ebnf_2 "1.2"
|
162
|
+
# (first "@pass" "@terminals" LHS)
|
163
|
+
# (follow _eof)
|
164
|
+
# (cleanup merge)
|
165
|
+
# (seq _ebnf_1 ebnf))
|
166
|
+
# (rule _ebnf_3 "1.3" (first "@pass" "@terminals" LHS _eps) (follow _eof) (seq ebnf))
|
167
|
+
#
|
54
168
|
# @return [EBNF] self
|
55
|
-
# @see
|
169
|
+
# @see https://en.wikipedia.org/wiki/LL_parser#Constructing_an_LL.281.29_parsing_table
|
56
170
|
# @param [Array<Symbol>] starts
|
57
171
|
# Set of symbols which are start rules
|
58
172
|
def first_follow(*starts)
|
@@ -96,8 +210,9 @@ module EBNF
|
|
96
210
|
firsts, follows = 0, 0
|
97
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
98
212
|
#
|
99
|
-
# For sequences, this is the first rule in the sequence.
|
100
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
101
216
|
each(:rule) do |ai|
|
102
217
|
# Fi(a w' ) = { a } for every terminal a
|
103
218
|
ai.terminals(ast).each do |t|
|
@@ -168,6 +283,10 @@ module EBNF
|
|
168
283
|
progress("first_follow") {"(#{ittr}) firsts #{firsts}, follows #{follows}"}
|
169
284
|
ittr += 1
|
170
285
|
end while (firsts + follows) > 0
|
286
|
+
|
287
|
+
debug("Fi.2-post: non-terminals without first") do
|
288
|
+
ast.reject(&:terminal?).reject(&:first).map(&:sym)
|
289
|
+
end if ast.reject(&:terminal?).any? {|r| r.first.nil?}
|
171
290
|
end
|
172
291
|
end
|
173
292
|
|
@@ -216,7 +335,7 @@ module EBNF
|
|
216
335
|
@branch = {}
|
217
336
|
@already = []
|
218
337
|
@agenda = []
|
219
|
-
@starts.each do |start|
|
338
|
+
Array(@starts).each do |start|
|
220
339
|
do_production(start)
|
221
340
|
while !@agenda.empty?
|
222
341
|
x = @agenda.shift
|
@@ -265,13 +384,26 @@ module EBNF
|
|
265
384
|
end
|
266
385
|
end
|
267
386
|
io.puts "#{ind0}}.freeze\n"
|
268
|
-
|
387
|
+
elsif table
|
269
388
|
io.puts "#{ind0}#{name} = [\n#{ind1}" +
|
270
389
|
table.sort_by{|t| t.to_s.sub(/^_/, '')}.map(&:inspect).join(",\n#{ind1}") +
|
271
390
|
"\n#{ind0}].freeze\n"
|
272
391
|
end
|
273
392
|
end
|
274
393
|
|
394
|
+
##
|
395
|
+
# Output Ruby parser files for LL(1) parsing
|
396
|
+
#
|
397
|
+
# @param [IO, StringIO] output
|
398
|
+
def to_ruby_ll1(output, **options)
|
399
|
+
self.outputTable(output, 'BRANCH', self.branch, 1)
|
400
|
+
self.outputTable(output, 'TERMINALS', self.terminals, 1)
|
401
|
+
self.outputTable(output, 'FIRST', self.first, 1)
|
402
|
+
self.outputTable(output, 'FOLLOW', self.follow, 1)
|
403
|
+
self.outputTable(output, 'CLEANUP', self.cleanup, 1)
|
404
|
+
self.outputTable(output, 'PASS', [self.pass], 1) if self.pass
|
405
|
+
end
|
406
|
+
|
275
407
|
private
|
276
408
|
def do_production(lhs)
|
277
409
|
rule = find_rule(lhs)
|
@@ -287,16 +419,16 @@ module EBNF
|
|
287
419
|
|
288
420
|
if rule.expr.first == :matches
|
289
421
|
debug("prod") {"Rule is regexp: #{rule}"}
|
290
|
-
|
291
|
-
error("No record of what token #{lhs} can start with") unless rule.first
|
292
422
|
return
|
293
423
|
end
|
294
424
|
|
425
|
+
error("No record of what token #{lhs.inspect} can start with") unless rule.first
|
426
|
+
|
295
427
|
if rule.alt?
|
296
428
|
# A First/Follow conflict appears when _eps is in the first
|
297
429
|
# of one rule and there is a token in the first and
|
298
430
|
# follow of the same rule
|
299
|
-
if rule.first.include?(:_eps) && !(overlap = ((rule.first & (rule.follow || [])) - [:eps])).empty?
|
431
|
+
if Array(rule.first).include?(:_eps) && !(overlap = ((Array(rule.first) & (rule.follow || [])) - [:eps])).empty?
|
300
432
|
error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
|
301
433
|
end
|
302
434
|
|