ebnf 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -9,9 +9,7 @@ module EBNF
|
|
9
9
|
# BRANCH = {
|
10
10
|
# :alt => {
|
11
11
|
# "(" => [:seq, :_alt_1],
|
12
|
-
# :ENUM => [:seq, :_alt_1],
|
13
12
|
# :HEX => [:seq, :_alt_1],
|
14
|
-
# :O_ENUM => [:seq, :_alt_1],
|
15
13
|
# :O_RANGE => [:seq, :_alt_1],
|
16
14
|
# :RANGE => [:seq, :_alt_1],
|
17
15
|
# :STRING1 => [:seq, :_alt_1],
|
@@ -38,8 +36,6 @@ module EBNF
|
|
38
36
|
# :alt => [
|
39
37
|
# :HEX,
|
40
38
|
# :SYMBOL,
|
41
|
-
# :ENUM,
|
42
|
-
# :O_ENUM,
|
43
39
|
# :RANGE,
|
44
40
|
# :O_RANGE,
|
45
41
|
# :STRING1,
|
@@ -54,7 +50,7 @@ module EBNF
|
|
54
50
|
#
|
55
51
|
# TERMINALS = ["(", ")", "-",
|
56
52
|
# "@pass", "@terminals",
|
57
|
-
#
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
58
54
|
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
59
55
|
# ].freeze
|
60
56
|
#
|
@@ -214,8 +210,9 @@ module EBNF
|
|
214
210
|
firsts, follows = 0, 0
|
215
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
216
212
|
#
|
217
|
-
# For sequences, this is the first rule in the sequence.
|
218
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
219
216
|
each(:rule) do |ai|
|
220
217
|
# Fi(a w' ) = { a } for every terminal a
|
221
218
|
ai.terminals(ast).each do |t|
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -576,15 +576,23 @@ module EBNF::LL1
|
|
576
576
|
# @option options [Integer] :depth
|
577
577
|
# Recursion depth for indenting output
|
578
578
|
# @yieldreturn [String] additional string appended to `message`.
|
579
|
-
def debug(*args)
|
579
|
+
def debug(*args, &block)
|
580
580
|
return unless @options[:logger]
|
581
581
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
582
582
|
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
583
583
|
level = options.fetch(:level, 0)
|
584
|
-
|
585
584
|
depth = options[:depth] || self.depth
|
586
|
-
|
587
|
-
|
585
|
+
|
586
|
+
if self.respond_to?(:log_debug)
|
587
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
588
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
589
|
+
elsif @options[:logger].respond_to?(:add)
|
590
|
+
args << yield if block_given?
|
591
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
592
|
+
elsif @options[:logger].respond_to?(:<<)
|
593
|
+
args << yield if block_given?
|
594
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
595
|
+
end
|
588
596
|
end
|
589
597
|
|
590
598
|
private
|
data/lib/ebnf/native.rb
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
module EBNF
|
2
|
+
module Native
|
3
|
+
##
|
4
|
+
# Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
|
5
|
+
#
|
6
|
+
# Iterate over rule strings.
|
7
|
+
# a line that starts with '\[' or '@' starts a new rule
|
8
|
+
#
|
9
|
+
# @param [StringScanner] scanner
|
10
|
+
# @yield rule_string
|
11
|
+
# @yieldparam [String] rule_string
|
12
|
+
def eachRule(scanner)
|
13
|
+
cur_lineno = 1
|
14
|
+
r = ''
|
15
|
+
until scanner.eos?
|
16
|
+
case
|
17
|
+
when s = scanner.scan(%r(\s+)m)
|
18
|
+
# Eat whitespace
|
19
|
+
cur_lineno += s.count("\n")
|
20
|
+
#debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
|
21
|
+
when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
|
22
|
+
# Eat comments /* .. */
|
23
|
+
cur_lineno += s.count("\n")
|
24
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
25
|
+
when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
|
26
|
+
# Eat comments (* .. *)
|
27
|
+
cur_lineno += s.count("\n")
|
28
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
29
|
+
when s = scanner.scan(%r((#(?!x)|//).*$))
|
30
|
+
# Eat comments // & #
|
31
|
+
cur_lineno += s.count("\n")
|
32
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
33
|
+
when s = scanner.scan(/\A["']/)
|
34
|
+
# Found a quote, scan until end of matching quote
|
35
|
+
s += scanner.scan_until(/#{scanner.matched}|$/)
|
36
|
+
r += s
|
37
|
+
when s = scanner.scan(%r(^@terminals))
|
38
|
+
#debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
|
39
|
+
yield(r) unless r.empty?
|
40
|
+
@lineno = cur_lineno
|
41
|
+
yield(s)
|
42
|
+
r = ''
|
43
|
+
when s = scanner.scan(/@pass/)
|
44
|
+
# Found rule start, if we've already collected a rule, yield it
|
45
|
+
#debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
|
46
|
+
yield r unless r.empty?
|
47
|
+
@lineno = cur_lineno
|
48
|
+
r = s
|
49
|
+
when s = scanner.scan(EBNF::Terminals::LHS)
|
50
|
+
# Found rule start, if we've already collected a rule, yield it
|
51
|
+
yield r unless r.empty?
|
52
|
+
#debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
|
53
|
+
@lineno = cur_lineno
|
54
|
+
r = s
|
55
|
+
else
|
56
|
+
# Collect until end of line, or start of comment or quote
|
57
|
+
s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
|
58
|
+
if scanner.matched.length > 0
|
59
|
+
# Back up scan head before ending match
|
60
|
+
scanner.pos = scanner.pos - scanner.matched.length
|
61
|
+
|
62
|
+
# Remove matched from end of string
|
63
|
+
s = s[0..-(scanner.matched.length+1)]
|
64
|
+
end
|
65
|
+
cur_lineno += s.count("\n")
|
66
|
+
#debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
|
67
|
+
r += s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
yield r unless r.empty?
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# Parse a rule into an optional rule number, a symbol and an expression
|
75
|
+
#
|
76
|
+
# @param [String] rule
|
77
|
+
# @return [Rule]
|
78
|
+
def ruleParts(rule)
|
79
|
+
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
80
|
+
num, sym = num_sym.split(']', 2).map(&:strip)
|
81
|
+
num, sym = "", num if sym.nil?
|
82
|
+
num = num[1..-1]
|
83
|
+
r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
|
84
|
+
debug("ruleParts") { r.inspect }
|
85
|
+
r
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Parse a string into an expression tree and a remaining string
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# >>> expression("a b c")
|
93
|
+
# ((seq a b c) '')
|
94
|
+
#
|
95
|
+
# >>> expression("a? b+ c*")
|
96
|
+
# ((seq (opt a) (plus b) (star c)) '')
|
97
|
+
#
|
98
|
+
# >>> expression(" | x xlist")
|
99
|
+
# ((alt (seq) (seq x xlist)) '')
|
100
|
+
#
|
101
|
+
# >>> expression("a | (b - c)")
|
102
|
+
# ((alt a (diff b c)) '')
|
103
|
+
#
|
104
|
+
# >>> expression("a b | c d")
|
105
|
+
# ((alt (seq a b) (seq c d)) '')
|
106
|
+
#
|
107
|
+
# >>> expression("a | b | c")
|
108
|
+
# ((alt a b c) '')
|
109
|
+
#
|
110
|
+
# >>> expression("a) b c")
|
111
|
+
# (a ' b c')
|
112
|
+
#
|
113
|
+
# >>> expression("BaseDecl? PrefixDecl*")
|
114
|
+
# ((seq (opt BaseDecl) (star PrefixDecl)) '')
|
115
|
+
#
|
116
|
+
# >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
117
|
+
# ((alt NCCHAR1 diff
|
118
|
+
# (range '0-9')
|
119
|
+
# (hex '#x00B7')
|
120
|
+
# (range '#x0300-#x036F')
|
121
|
+
# (range, '#x203F-#x2040')) '')
|
122
|
+
#
|
123
|
+
# @param [String] s
|
124
|
+
# @return [Array]
|
125
|
+
def expression(s)
|
126
|
+
debug("expression") {"(#{s.inspect})"}
|
127
|
+
e, s = depth {alt(s)}
|
128
|
+
debug {"=> alt returned #{[e, s].inspect}"}
|
129
|
+
unless s.to_s.empty?
|
130
|
+
t, ss = depth {terminal(s)}
|
131
|
+
debug {"=> terminal returned #{[t, ss].inspect}"}
|
132
|
+
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
133
|
+
end
|
134
|
+
[e, s]
|
135
|
+
end
|
136
|
+
|
137
|
+
##
|
138
|
+
# Parse alt
|
139
|
+
# >>> alt("a | b | c")
|
140
|
+
# ((alt a b c) '')
|
141
|
+
# @param [String] s
|
142
|
+
# @return [Array]
|
143
|
+
def alt(s)
|
144
|
+
debug("alt") {"(#{s.inspect})"}
|
145
|
+
args = []
|
146
|
+
while !s.to_s.empty?
|
147
|
+
e, s = depth {seq(s)}
|
148
|
+
debug {"=> seq returned #{[e, s].inspect}"}
|
149
|
+
if e.to_s.empty?
|
150
|
+
break unless args.empty?
|
151
|
+
e = [:seq, []] # empty sequence
|
152
|
+
end
|
153
|
+
args << e
|
154
|
+
unless s.to_s.empty?
|
155
|
+
t, ss = depth {terminal(s)}
|
156
|
+
break unless t[0] == :alt
|
157
|
+
s = ss
|
158
|
+
end
|
159
|
+
end
|
160
|
+
args.length > 1 ? [args.unshift(:alt), s] : [e, s]
|
161
|
+
end
|
162
|
+
|
163
|
+
##
|
164
|
+
# parse seq
|
165
|
+
#
|
166
|
+
# >>> seq("a b c")
|
167
|
+
# ((seq a b c) '')
|
168
|
+
#
|
169
|
+
# >>> seq("a b? c")
|
170
|
+
# ((seq a (opt b) c) '')
|
171
|
+
def seq(s)
|
172
|
+
debug("seq") {"(#{s.inspect})"}
|
173
|
+
args = []
|
174
|
+
while !s.to_s.empty?
|
175
|
+
e, ss = depth {diff(s)}
|
176
|
+
debug {"=> diff returned #{[e, ss].inspect}"}
|
177
|
+
unless e.to_s.empty?
|
178
|
+
args << e
|
179
|
+
s = ss
|
180
|
+
else
|
181
|
+
break;
|
182
|
+
end
|
183
|
+
end
|
184
|
+
if args.length > 1
|
185
|
+
[args.unshift(:seq), s]
|
186
|
+
elsif args.length == 1
|
187
|
+
args + [s]
|
188
|
+
else
|
189
|
+
["", s]
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# parse diff
|
195
|
+
#
|
196
|
+
# >>> diff("a - b")
|
197
|
+
# ((diff a b) '')
|
198
|
+
def diff(s)
|
199
|
+
debug("diff") {"(#{s.inspect})"}
|
200
|
+
e1, s = depth {postfix(s)}
|
201
|
+
debug {"=> postfix returned #{[e1, s].inspect}"}
|
202
|
+
unless e1.to_s.empty?
|
203
|
+
unless s.to_s.empty?
|
204
|
+
t, ss = depth {terminal(s)}
|
205
|
+
debug {"diff #{[t, ss].inspect}"}
|
206
|
+
if t.is_a?(Array) && t.first == :diff
|
207
|
+
s = ss
|
208
|
+
e2, s = primary(s)
|
209
|
+
unless e2.to_s.empty?
|
210
|
+
return [[:diff, e1, e2], s]
|
211
|
+
else
|
212
|
+
error("diff", "Syntax Error")
|
213
|
+
raise SyntaxError, "diff missing second operand"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
[e1, s]
|
219
|
+
end
|
220
|
+
|
221
|
+
##
|
222
|
+
# parse postfix
|
223
|
+
#
|
224
|
+
# >>> postfix("a b c")
|
225
|
+
# (a ' b c')
|
226
|
+
#
|
227
|
+
# >>> postfix("a? b c")
|
228
|
+
# ((opt, a) ' b c')
|
229
|
+
def postfix(s)
|
230
|
+
debug("postfix") {"(#{s.inspect})"}
|
231
|
+
e, s = depth {primary(s)}
|
232
|
+
debug {"=> primary returned #{[e, s].inspect}"}
|
233
|
+
return ["", s] if e.to_s.empty?
|
234
|
+
if !s.to_s.empty?
|
235
|
+
t, ss = depth {terminal(s)}
|
236
|
+
debug {"=> #{[t, ss].inspect}"}
|
237
|
+
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
238
|
+
return [[t.first, e], ss]
|
239
|
+
end
|
240
|
+
end
|
241
|
+
[e, s]
|
242
|
+
end
|
243
|
+
|
244
|
+
##
|
245
|
+
# parse primary
|
246
|
+
#
|
247
|
+
# >>> primary("a b c")
|
248
|
+
# (a ' b c')
|
249
|
+
def primary(s)
|
250
|
+
debug("primary") {"(#{s.inspect})"}
|
251
|
+
t, s = depth {terminal(s)}
|
252
|
+
debug {"=> terminal returned #{[t, s].inspect}"}
|
253
|
+
if t.is_a?(Symbol) || t.is_a?(String)
|
254
|
+
[t, s]
|
255
|
+
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
256
|
+
[t, s]
|
257
|
+
elsif t.first == :"("
|
258
|
+
e, s = depth {expression(s)}
|
259
|
+
debug {"=> expression returned #{[e, s].inspect}"}
|
260
|
+
[e, s]
|
261
|
+
else
|
262
|
+
["", s]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
##
|
267
|
+
# parse one terminal; return the terminal and the remaining string
|
268
|
+
#
|
269
|
+
# A terminal is represented as a tuple whose 1st item gives the type;
|
270
|
+
# some types have additional info in the tuple.
|
271
|
+
#
|
272
|
+
# @example
|
273
|
+
# >>> terminal("'abc' def")
|
274
|
+
# ('abc' ' def')
|
275
|
+
#
|
276
|
+
# >>> terminal("[0-9]")
|
277
|
+
# ((range '0-9') '')
|
278
|
+
# >>> terminal("#x00B7")
|
279
|
+
# ((hex '#x00B7') '')
|
280
|
+
# >>> terminal ("\[#x0300-#x036F\]")
|
281
|
+
# ((range '#x0300-#x036F') '')
|
282
|
+
# >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
283
|
+
# ((range "^<>'{}|^`") '-\[#x00-#x20\]')
|
284
|
+
def terminal(s)
|
285
|
+
s = s.strip
|
286
|
+
#STDERR.puts s.inspect
|
287
|
+
case m = s[0,1]
|
288
|
+
when '"', "'" # STRING1 or STRING2
|
289
|
+
l, s = s[1..-1].split(m.rstrip, 2)
|
290
|
+
[LL1::Lexer.unescape_string(l), s]
|
291
|
+
when '[' # RANGE, O_RANGE
|
292
|
+
l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
|
293
|
+
[[:range, LL1::Lexer.unescape_string(l)], s]
|
294
|
+
when '#' # HEX
|
295
|
+
s.match(/(#x\h+)(.*)$/)
|
296
|
+
l, s = $1, $2
|
297
|
+
[[:hex, l], s]
|
298
|
+
when /[\w\.]/ # SYMBOL
|
299
|
+
s.match(/([\w\.]+)(.*)$/)
|
300
|
+
l, s = $1, $2
|
301
|
+
[l.to_sym, s]
|
302
|
+
when '-'
|
303
|
+
[[:diff], s[1..-1]]
|
304
|
+
when '?'
|
305
|
+
[[:opt], s[1..-1]]
|
306
|
+
when '|'
|
307
|
+
[[:alt], s[1..-1]]
|
308
|
+
when '+'
|
309
|
+
[[:plus], s[1..-1]]
|
310
|
+
when '*'
|
311
|
+
[[:star], s[1..-1]]
|
312
|
+
when /[\(\)]/ # '(' or ')'
|
313
|
+
[[m.to_sym], s[1..-1]]
|
314
|
+
else
|
315
|
+
error("terminal", "unrecognized terminal: #{s.inspect}")
|
316
|
+
raise SyntaxError, "unrecognized terminal: #{s.inspect}"
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|