ebnf 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +81 -36
- data/VERSION +1 -1
- data/bin/ebnf +34 -18
- data/etc/abnf-core.ebnf +52 -0
- data/etc/abnf.abnf +121 -0
- data/etc/abnf.ebnf +124 -0
- data/etc/abnf.sxp +45 -0
- data/etc/ebnf.ebnf +19 -25
- data/etc/ebnf.html +251 -206
- data/etc/ebnf.ll1.rb +27 -103
- data/etc/ebnf.ll1.sxp +105 -102
- data/etc/ebnf.peg.rb +54 -62
- data/etc/ebnf.peg.sxp +53 -62
- data/etc/ebnf.sxp +22 -19
- data/etc/iso-ebnf.ebnf +140 -0
- data/etc/iso-ebnf.isoebnf +138 -0
- data/etc/iso-ebnf.sxp +65 -0
- data/etc/sparql.ebnf +4 -4
- data/etc/sparql.sxp +8 -7
- data/etc/turtle.ebnf +3 -3
- data/etc/turtle.sxp +22 -20
- data/lib/ebnf.rb +3 -0
- data/lib/ebnf/abnf.rb +301 -0
- data/lib/ebnf/abnf/core.rb +23 -0
- data/lib/ebnf/abnf/meta.rb +111 -0
- data/lib/ebnf/base.rb +87 -44
- data/lib/ebnf/ebnf/meta.rb +90 -0
- data/lib/ebnf/isoebnf.rb +229 -0
- data/lib/ebnf/isoebnf/meta.rb +75 -0
- data/lib/ebnf/ll1.rb +4 -7
- data/lib/ebnf/ll1/parser.rb +12 -4
- data/lib/ebnf/native.rb +320 -0
- data/lib/ebnf/parser.rb +285 -302
- data/lib/ebnf/peg.rb +1 -1
- data/lib/ebnf/peg/parser.rb +24 -5
- data/lib/ebnf/peg/rule.rb +77 -58
- data/lib/ebnf/rule.rb +352 -121
- data/lib/ebnf/terminals.rb +13 -10
- data/lib/ebnf/writer.rb +550 -78
- metadata +48 -6
data/lib/ebnf/isoebnf.rb
ADDED
@@ -0,0 +1,229 @@
|
|
1
|
+
require_relative 'isoebnf/meta'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
# ISO EBNF parser
|
5
|
+
# Parses ISO EBNF into an array of {EBNF::Rule}.
|
6
|
+
module EBNF
|
7
|
+
class ISOEBNF
|
8
|
+
include EBNF::PEG::Parser
|
9
|
+
|
10
|
+
# The base for terminal-character, which omits "'", '"', and '?'.
|
11
|
+
# Could be more optimized, and one might quible
|
12
|
+
# with the overly-strictly defined character set,
|
13
|
+
# but it is correct.
|
14
|
+
TERMINAL_CHARACTER_BASE = %r{
|
15
|
+
[a-zA-Z0-9] | # letter | decimal digit
|
16
|
+
, | # concatenate symbol
|
17
|
+
= | # defining symbol
|
18
|
+
[\|\/!] | # definition separator symbol
|
19
|
+
\*\) | # end comment symbol
|
20
|
+
\) | # end group symbol
|
21
|
+
\] | # end option symbol
|
22
|
+
\} | # end repeat symbol
|
23
|
+
\- | # except symbol
|
24
|
+
#\' | # first quote symbol
|
25
|
+
\* | # repetition symbol
|
26
|
+
#\" | # second quote symbol
|
27
|
+
#\? | # special sequence symbol
|
28
|
+
\(\* | # start comment symbol
|
29
|
+
\( | # start group symbol
|
30
|
+
\[ | # start option symbol
|
31
|
+
\{ | # start repeat symbol
|
32
|
+
[;\.] | # terminator symbol
|
33
|
+
[:+_%@&$<>^\x20\x23\\`~] # other character
|
34
|
+
}x
|
35
|
+
|
36
|
+
TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
|
37
|
+
FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
|
38
|
+
SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
|
39
|
+
SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
|
40
|
+
|
41
|
+
# Abstract syntax tree from parse
|
42
|
+
#
|
43
|
+
# @return [Array<EBNF::Rule>]
|
44
|
+
attr_reader :ast
|
45
|
+
|
46
|
+
# `[14] integer ::= decimal_digit+`
|
47
|
+
terminal(:integer, /\d+/) do |value, prod|
|
48
|
+
value.to_i
|
49
|
+
end
|
50
|
+
|
51
|
+
# `[15] meta_identifier ::= letter meta_identifier_character*`
|
52
|
+
terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
|
53
|
+
value.to_sym
|
54
|
+
end
|
55
|
+
|
56
|
+
# `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
|
57
|
+
# ` | ('"' second_terminal_character+ '"')`
|
58
|
+
terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
|
59
|
+
value[1..-2]
|
60
|
+
end
|
61
|
+
|
62
|
+
# `[20] special_sequence ::= '?' special_sequence_character* '?'`
|
63
|
+
terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
|
64
|
+
|
65
|
+
# `[22] terminal_character ::= [a-zA-Z0-9]`
|
66
|
+
# ` | [,=;*}#x2d?([{;]`
|
67
|
+
# ` | '*)'`
|
68
|
+
# ` | '(*'`
|
69
|
+
# ` | ']'`
|
70
|
+
# ` | other_character`
|
71
|
+
terminal(:terminal_character, TERMINAL_CHARACTER)
|
72
|
+
|
73
|
+
# `[25] empty ::= ''`
|
74
|
+
terminal(:empty, //)
|
75
|
+
|
76
|
+
# `[26] definition_separator_symbol ::= '|' | '/' | '!'`
|
77
|
+
terminal(:definition_separator_symbol, /[\|\/!]/)
|
78
|
+
|
79
|
+
# `[27] terminator_symbol ::= ';' | '.'`
|
80
|
+
terminal(:terminator_symbol, /[;\.]/)
|
81
|
+
|
82
|
+
# `[28] start_option_symbol ::= '['
|
83
|
+
terminal(:start_option_symbol, /\[|(?:\(\/)/)
|
84
|
+
|
85
|
+
# `[29] end_option_symbol ::= ']'`
|
86
|
+
terminal(:end_option_symbol, /\]/)
|
87
|
+
|
88
|
+
# `[30] start_repeat_symbol ::= '{' | '(:'`
|
89
|
+
terminal(:start_repeat_symbol, /{|\(:/)
|
90
|
+
|
91
|
+
# `[31] end_repeat_symbol ::= '}' | ':)'`
|
92
|
+
terminal(:end_repeat_symbol, /}|:\)/)
|
93
|
+
|
94
|
+
# ## Non-terminal productions
|
95
|
+
|
96
|
+
# `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
|
97
|
+
production(:syntax_rule, clear_packrat: true) do |value, data, callback|
|
98
|
+
# value contains an expression.
|
99
|
+
# Invoke callback
|
100
|
+
sym = value[0][:meta_identifier]
|
101
|
+
definitions_list = value[2][:definitions_list]
|
102
|
+
callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
|
103
|
+
nil
|
104
|
+
end
|
105
|
+
|
106
|
+
# Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
|
107
|
+
#
|
108
|
+
# `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
|
109
|
+
start_production(:definitions_list, as_hash: true)
|
110
|
+
production(:definitions_list) do |value|
|
111
|
+
if value[:_definitions_list_1].length > 0
|
112
|
+
[:alt, value[:single_definition]] + value[:_definitions_list_1]
|
113
|
+
else
|
114
|
+
value[:single_definition]
|
115
|
+
end
|
116
|
+
end
|
117
|
+
production(:_definitions_list_1) do |value|
|
118
|
+
Array(value.first)
|
119
|
+
end
|
120
|
+
start_production(:_definitions_list_2, as_hash: true)
|
121
|
+
production(:_definitions_list_2) do |value|
|
122
|
+
if Array(value[:definitions_list]).first == :alt
|
123
|
+
value[:definitions_list][1..-1]
|
124
|
+
else
|
125
|
+
[value[:definitions_list]]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
# `[4] single_definition ::= term (',' term)*`
|
130
|
+
start_production(:single_definition, as_hash: true)
|
131
|
+
production(:single_definition) do |value|
|
132
|
+
if value[:_single_definition_1].length > 0
|
133
|
+
[:seq, value[:term]] + value[:_single_definition_1]
|
134
|
+
else
|
135
|
+
value[:term]
|
136
|
+
end
|
137
|
+
end
|
138
|
+
production(:_single_definition_1) do |value|
|
139
|
+
value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
|
140
|
+
end
|
141
|
+
|
142
|
+
# `[5] term ::= factor ('-' exception)?`
|
143
|
+
start_production(:term, as_hash: true)
|
144
|
+
production(:term) do |value|
|
145
|
+
if value[:_term_1]
|
146
|
+
[:diff, value[:factor], value[:_term_1]]
|
147
|
+
else
|
148
|
+
value[:factor]
|
149
|
+
end
|
150
|
+
end
|
151
|
+
production(:_term_1) do |value|
|
152
|
+
value.last[:exception] if value
|
153
|
+
end
|
154
|
+
|
155
|
+
# `[6] exception ::= factor`
|
156
|
+
start_production(:exception, as_hash: true)
|
157
|
+
production(:exception) do |value|
|
158
|
+
value[:factor]
|
159
|
+
end
|
160
|
+
|
161
|
+
# `[7] factor ::= (integer '*')? primary`
|
162
|
+
start_production(:factor, as_hash: true)
|
163
|
+
production(:factor) do |value|
|
164
|
+
if value[:_factor_1]
|
165
|
+
[:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
|
166
|
+
else
|
167
|
+
value[:primary]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
production(:_factor_2) do |value|
|
171
|
+
value.first[:integer]
|
172
|
+
end
|
173
|
+
|
174
|
+
# `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
|
175
|
+
production(:optional_sequence) do |value|
|
176
|
+
[:opt, value[1][:definitions_list]]
|
177
|
+
end
|
178
|
+
|
179
|
+
# `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
|
180
|
+
production(:repeated_sequence) do |value|
|
181
|
+
[:star, value[1][:definitions_list]]
|
182
|
+
end
|
183
|
+
|
184
|
+
# `[11] grouped_sequence ::= '(' definitions_list ')'`
|
185
|
+
production(:grouped_sequence) do |value|
|
186
|
+
[:seq, value[1][:definitions_list]]
|
187
|
+
end
|
188
|
+
|
189
|
+
# ## Parser invocation.
|
190
|
+
# On start, yield ourselves if a block is given, otherwise, return this parser instance
|
191
|
+
#
|
192
|
+
# @param [#read, #to_s] input
|
193
|
+
# @param [Hash{Symbol => Object}] options
|
194
|
+
# @option options [Boolean] :level
|
195
|
+
# Trace level. 0(debug), 1(info), 2(warn), 3(error).
|
196
|
+
# @return [EBNFParser]
|
197
|
+
def initialize(input, **options, &block)
|
198
|
+
# If the `level` option is set, instantiate a logger for collecting trace information.
|
199
|
+
if options.has_key?(:level)
|
200
|
+
options[:logger] = Logger.new(STDERR)
|
201
|
+
options[:logger].level = options[:level]
|
202
|
+
options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
|
203
|
+
end
|
204
|
+
|
205
|
+
# Read input, if necessary, which will be used in a Scanner.
|
206
|
+
@input = input.respond_to?(:read) ? input.read : input.to_s
|
207
|
+
|
208
|
+
parsing_terminals = false
|
209
|
+
@ast = []
|
210
|
+
parse(@input,
|
211
|
+
:syntax,
|
212
|
+
ISOEBNFMeta::RULES,
|
213
|
+
whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
|
214
|
+
**options
|
215
|
+
) do |context, *data|
|
216
|
+
rule = case context
|
217
|
+
when :rule
|
218
|
+
# A rule which has already been turned into a `Rule` object.
|
219
|
+
rule = data.first
|
220
|
+
rule.kind = :terminal if parsing_terminals
|
221
|
+
rule
|
222
|
+
end
|
223
|
+
@ast << rule if rule
|
224
|
+
end
|
225
|
+
rescue EBNF::PEG::Parser::Error => e
|
226
|
+
raise SyntaxError, e.message
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# This file is automatically generated by ebnf version 2.0.0
|
2
|
+
# Derived from etc/iso-ebnf.ebnf
|
3
|
+
module ISOEBNFMeta
|
4
|
+
RULES = [
|
5
|
+
EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
|
6
|
+
EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
|
7
|
+
EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
|
8
|
+
EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
|
9
|
+
EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
|
10
|
+
EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
|
11
|
+
EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
|
12
|
+
EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
|
13
|
+
EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
|
14
|
+
EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
|
15
|
+
EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
|
16
|
+
EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
|
17
|
+
EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
|
18
|
+
EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
|
19
|
+
EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
|
20
|
+
EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
|
21
|
+
EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
|
22
|
+
EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
|
23
|
+
EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
|
24
|
+
EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
|
25
|
+
EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
|
26
|
+
EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
|
27
|
+
EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
|
28
|
+
EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
|
29
|
+
EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
|
30
|
+
EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
|
31
|
+
EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
|
32
|
+
EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
|
33
|
+
EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
34
|
+
EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
|
35
|
+
EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
|
36
|
+
EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
|
37
|
+
EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
38
|
+
EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
|
39
|
+
EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
|
40
|
+
EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
|
41
|
+
EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
|
42
|
+
EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
43
|
+
EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
44
|
+
EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
45
|
+
EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
|
46
|
+
EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
|
47
|
+
EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
|
48
|
+
EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
|
49
|
+
EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
|
50
|
+
EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
|
51
|
+
EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
|
52
|
+
EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
|
53
|
+
EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
54
|
+
EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
|
55
|
+
EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
|
56
|
+
EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
57
|
+
EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
|
58
|
+
EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
59
|
+
EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
|
60
|
+
EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
|
61
|
+
EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
|
62
|
+
EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
|
63
|
+
EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
|
64
|
+
EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
|
65
|
+
EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
|
66
|
+
EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
|
67
|
+
EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
|
68
|
+
EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
|
69
|
+
EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
|
70
|
+
EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
|
71
|
+
EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
|
72
|
+
EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
|
73
|
+
]
|
74
|
+
end
|
75
|
+
|
data/lib/ebnf/ll1.rb
CHANGED
@@ -9,9 +9,7 @@ module EBNF
|
|
9
9
|
# BRANCH = {
|
10
10
|
# :alt => {
|
11
11
|
# "(" => [:seq, :_alt_1],
|
12
|
-
# :ENUM => [:seq, :_alt_1],
|
13
12
|
# :HEX => [:seq, :_alt_1],
|
14
|
-
# :O_ENUM => [:seq, :_alt_1],
|
15
13
|
# :O_RANGE => [:seq, :_alt_1],
|
16
14
|
# :RANGE => [:seq, :_alt_1],
|
17
15
|
# :STRING1 => [:seq, :_alt_1],
|
@@ -38,8 +36,6 @@ module EBNF
|
|
38
36
|
# :alt => [
|
39
37
|
# :HEX,
|
40
38
|
# :SYMBOL,
|
41
|
-
# :ENUM,
|
42
|
-
# :O_ENUM,
|
43
39
|
# :RANGE,
|
44
40
|
# :O_RANGE,
|
45
41
|
# :STRING1,
|
@@ -54,7 +50,7 @@ module EBNF
|
|
54
50
|
#
|
55
51
|
# TERMINALS = ["(", ")", "-",
|
56
52
|
# "@pass", "@terminals",
|
57
|
-
#
|
53
|
+
# :HEX, :LHS, :O_RANGE,:POSTFIX,
|
58
54
|
# :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
|
59
55
|
# ].freeze
|
60
56
|
#
|
@@ -214,8 +210,9 @@ module EBNF
|
|
214
210
|
firsts, follows = 0, 0
|
215
211
|
# add Fi(wi) to Fi(Ai) for every rule Ai → wi
|
216
212
|
#
|
217
|
-
# For sequences, this is the first rule in the sequence.
|
218
|
-
# For alts, this is every rule in the sequence
|
213
|
+
# * For sequences, this is the first rule in the sequence.
|
214
|
+
# * For alts, this is every rule in the sequence
|
215
|
+
# * Other rules don't matter, as they don't appear in strict BNF
|
219
216
|
each(:rule) do |ai|
|
220
217
|
# Fi(a w' ) = { a } for every terminal a
|
221
218
|
ai.terminals(ast).each do |t|
|
data/lib/ebnf/ll1/parser.rb
CHANGED
@@ -576,15 +576,23 @@ module EBNF::LL1
|
|
576
576
|
# @option options [Integer] :depth
|
577
577
|
# Recursion depth for indenting output
|
578
578
|
# @yieldreturn [String] additional string appended to `message`.
|
579
|
-
def debug(*args)
|
579
|
+
def debug(*args, &block)
|
580
580
|
return unless @options[:logger]
|
581
581
|
options = args.last.is_a?(Hash) ? args.pop : {}
|
582
582
|
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
|
583
583
|
level = options.fetch(:level, 0)
|
584
|
-
|
585
584
|
depth = options[:depth] || self.depth
|
586
|
-
|
587
|
-
|
585
|
+
|
586
|
+
if self.respond_to?(:log_debug)
|
587
|
+
level = [:debug, :info, :warn, :error, :fatal][level]
|
588
|
+
log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
|
589
|
+
elsif @options[:logger].respond_to?(:add)
|
590
|
+
args << yield if block_given?
|
591
|
+
@options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
|
592
|
+
elsif @options[:logger].respond_to?(:<<)
|
593
|
+
args << yield if block_given?
|
594
|
+
@options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
|
595
|
+
end
|
588
596
|
end
|
589
597
|
|
590
598
|
private
|
data/lib/ebnf/native.rb
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
module EBNF
|
2
|
+
module Native
|
3
|
+
##
|
4
|
+
# Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
|
5
|
+
#
|
6
|
+
# Iterate over rule strings.
|
7
|
+
# a line that starts with '\[' or '@' starts a new rule
|
8
|
+
#
|
9
|
+
# @param [StringScanner] scanner
|
10
|
+
# @yield rule_string
|
11
|
+
# @yieldparam [String] rule_string
|
12
|
+
def eachRule(scanner)
|
13
|
+
cur_lineno = 1
|
14
|
+
r = ''
|
15
|
+
until scanner.eos?
|
16
|
+
case
|
17
|
+
when s = scanner.scan(%r(\s+)m)
|
18
|
+
# Eat whitespace
|
19
|
+
cur_lineno += s.count("\n")
|
20
|
+
#debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
|
21
|
+
when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
|
22
|
+
# Eat comments /* .. */
|
23
|
+
cur_lineno += s.count("\n")
|
24
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
25
|
+
when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
|
26
|
+
# Eat comments (* .. *)
|
27
|
+
cur_lineno += s.count("\n")
|
28
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
29
|
+
when s = scanner.scan(%r((#(?!x)|//).*$))
|
30
|
+
# Eat comments // & #
|
31
|
+
cur_lineno += s.count("\n")
|
32
|
+
debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
|
33
|
+
when s = scanner.scan(/\A["']/)
|
34
|
+
# Found a quote, scan until end of matching quote
|
35
|
+
s += scanner.scan_until(/#{scanner.matched}|$/)
|
36
|
+
r += s
|
37
|
+
when s = scanner.scan(%r(^@terminals))
|
38
|
+
#debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
|
39
|
+
yield(r) unless r.empty?
|
40
|
+
@lineno = cur_lineno
|
41
|
+
yield(s)
|
42
|
+
r = ''
|
43
|
+
when s = scanner.scan(/@pass/)
|
44
|
+
# Found rule start, if we've already collected a rule, yield it
|
45
|
+
#debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
|
46
|
+
yield r unless r.empty?
|
47
|
+
@lineno = cur_lineno
|
48
|
+
r = s
|
49
|
+
when s = scanner.scan(EBNF::Terminals::LHS)
|
50
|
+
# Found rule start, if we've already collected a rule, yield it
|
51
|
+
yield r unless r.empty?
|
52
|
+
#debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
|
53
|
+
@lineno = cur_lineno
|
54
|
+
r = s
|
55
|
+
else
|
56
|
+
# Collect until end of line, or start of comment or quote
|
57
|
+
s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
|
58
|
+
if scanner.matched.length > 0
|
59
|
+
# Back up scan head before ending match
|
60
|
+
scanner.pos = scanner.pos - scanner.matched.length
|
61
|
+
|
62
|
+
# Remove matched from end of string
|
63
|
+
s = s[0..-(scanner.matched.length+1)]
|
64
|
+
end
|
65
|
+
cur_lineno += s.count("\n")
|
66
|
+
#debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
|
67
|
+
r += s
|
68
|
+
end
|
69
|
+
end
|
70
|
+
yield r unless r.empty?
|
71
|
+
end
|
72
|
+
|
73
|
+
##
|
74
|
+
# Parse a rule into an optional rule number, a symbol and an expression
|
75
|
+
#
|
76
|
+
# @param [String] rule
|
77
|
+
# @return [Rule]
|
78
|
+
def ruleParts(rule)
|
79
|
+
num_sym, expr = rule.split('::=', 2).map(&:strip)
|
80
|
+
num, sym = num_sym.split(']', 2).map(&:strip)
|
81
|
+
num, sym = "", num if sym.nil?
|
82
|
+
num = num[1..-1]
|
83
|
+
r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
|
84
|
+
debug("ruleParts") { r.inspect }
|
85
|
+
r
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# Parse a string into an expression tree and a remaining string
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# >>> expression("a b c")
|
93
|
+
# ((seq a b c) '')
|
94
|
+
#
|
95
|
+
# >>> expression("a? b+ c*")
|
96
|
+
# ((seq (opt a) (plus b) (star c)) '')
|
97
|
+
#
|
98
|
+
# >>> expression(" | x xlist")
|
99
|
+
# ((alt (seq) (seq x xlist)) '')
|
100
|
+
#
|
101
|
+
# >>> expression("a | (b - c)")
|
102
|
+
# ((alt a (diff b c)) '')
|
103
|
+
#
|
104
|
+
# >>> expression("a b | c d")
|
105
|
+
# ((alt (seq a b) (seq c d)) '')
|
106
|
+
#
|
107
|
+
# >>> expression("a | b | c")
|
108
|
+
# ((alt a b c) '')
|
109
|
+
#
|
110
|
+
# >>> expression("a) b c")
|
111
|
+
# (a ' b c')
|
112
|
+
#
|
113
|
+
# >>> expression("BaseDecl? PrefixDecl*")
|
114
|
+
# ((seq (opt BaseDecl) (star PrefixDecl)) '')
|
115
|
+
#
|
116
|
+
# >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
|
117
|
+
# ((alt NCCHAR1 diff
|
118
|
+
# (range '0-9')
|
119
|
+
# (hex '#x00B7')
|
120
|
+
# (range '#x0300-#x036F')
|
121
|
+
# (range, '#x203F-#x2040')) '')
|
122
|
+
#
|
123
|
+
# @param [String] s
|
124
|
+
# @return [Array]
|
125
|
+
def expression(s)
|
126
|
+
debug("expression") {"(#{s.inspect})"}
|
127
|
+
e, s = depth {alt(s)}
|
128
|
+
debug {"=> alt returned #{[e, s].inspect}"}
|
129
|
+
unless s.to_s.empty?
|
130
|
+
t, ss = depth {terminal(s)}
|
131
|
+
debug {"=> terminal returned #{[t, ss].inspect}"}
|
132
|
+
return [e, ss] if t.is_a?(Array) && t.first == :")"
|
133
|
+
end
|
134
|
+
[e, s]
|
135
|
+
end
|
136
|
+
|
137
|
+
##
|
138
|
+
# Parse alt
|
139
|
+
# >>> alt("a | b | c")
|
140
|
+
# ((alt a b c) '')
|
141
|
+
# @param [String] s
|
142
|
+
# @return [Array]
|
143
|
+
def alt(s)
|
144
|
+
debug("alt") {"(#{s.inspect})"}
|
145
|
+
args = []
|
146
|
+
while !s.to_s.empty?
|
147
|
+
e, s = depth {seq(s)}
|
148
|
+
debug {"=> seq returned #{[e, s].inspect}"}
|
149
|
+
if e.to_s.empty?
|
150
|
+
break unless args.empty?
|
151
|
+
e = [:seq, []] # empty sequence
|
152
|
+
end
|
153
|
+
args << e
|
154
|
+
unless s.to_s.empty?
|
155
|
+
t, ss = depth {terminal(s)}
|
156
|
+
break unless t[0] == :alt
|
157
|
+
s = ss
|
158
|
+
end
|
159
|
+
end
|
160
|
+
args.length > 1 ? [args.unshift(:alt), s] : [e, s]
|
161
|
+
end
|
162
|
+
|
163
|
+
##
|
164
|
+
# parse seq
|
165
|
+
#
|
166
|
+
# >>> seq("a b c")
|
167
|
+
# ((seq a b c) '')
|
168
|
+
#
|
169
|
+
# >>> seq("a b? c")
|
170
|
+
# ((seq a (opt b) c) '')
|
171
|
+
def seq(s)
|
172
|
+
debug("seq") {"(#{s.inspect})"}
|
173
|
+
args = []
|
174
|
+
while !s.to_s.empty?
|
175
|
+
e, ss = depth {diff(s)}
|
176
|
+
debug {"=> diff returned #{[e, ss].inspect}"}
|
177
|
+
unless e.to_s.empty?
|
178
|
+
args << e
|
179
|
+
s = ss
|
180
|
+
else
|
181
|
+
break;
|
182
|
+
end
|
183
|
+
end
|
184
|
+
if args.length > 1
|
185
|
+
[args.unshift(:seq), s]
|
186
|
+
elsif args.length == 1
|
187
|
+
args + [s]
|
188
|
+
else
|
189
|
+
["", s]
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
##
|
194
|
+
# parse diff
|
195
|
+
#
|
196
|
+
# >>> diff("a - b")
|
197
|
+
# ((diff a b) '')
|
198
|
+
def diff(s)
|
199
|
+
debug("diff") {"(#{s.inspect})"}
|
200
|
+
e1, s = depth {postfix(s)}
|
201
|
+
debug {"=> postfix returned #{[e1, s].inspect}"}
|
202
|
+
unless e1.to_s.empty?
|
203
|
+
unless s.to_s.empty?
|
204
|
+
t, ss = depth {terminal(s)}
|
205
|
+
debug {"diff #{[t, ss].inspect}"}
|
206
|
+
if t.is_a?(Array) && t.first == :diff
|
207
|
+
s = ss
|
208
|
+
e2, s = primary(s)
|
209
|
+
unless e2.to_s.empty?
|
210
|
+
return [[:diff, e1, e2], s]
|
211
|
+
else
|
212
|
+
error("diff", "Syntax Error")
|
213
|
+
raise SyntaxError, "diff missing second operand"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
[e1, s]
|
219
|
+
end
|
220
|
+
|
221
|
+
##
|
222
|
+
# parse postfix
|
223
|
+
#
|
224
|
+
# >>> postfix("a b c")
|
225
|
+
# (a ' b c')
|
226
|
+
#
|
227
|
+
# >>> postfix("a? b c")
|
228
|
+
# ((opt, a) ' b c')
|
229
|
+
def postfix(s)
|
230
|
+
debug("postfix") {"(#{s.inspect})"}
|
231
|
+
e, s = depth {primary(s)}
|
232
|
+
debug {"=> primary returned #{[e, s].inspect}"}
|
233
|
+
return ["", s] if e.to_s.empty?
|
234
|
+
if !s.to_s.empty?
|
235
|
+
t, ss = depth {terminal(s)}
|
236
|
+
debug {"=> #{[t, ss].inspect}"}
|
237
|
+
if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
|
238
|
+
return [[t.first, e], ss]
|
239
|
+
end
|
240
|
+
end
|
241
|
+
[e, s]
|
242
|
+
end
|
243
|
+
|
244
|
+
##
|
245
|
+
# parse primary
|
246
|
+
#
|
247
|
+
# >>> primary("a b c")
|
248
|
+
# (a ' b c')
|
249
|
+
def primary(s)
|
250
|
+
debug("primary") {"(#{s.inspect})"}
|
251
|
+
t, s = depth {terminal(s)}
|
252
|
+
debug {"=> terminal returned #{[t, s].inspect}"}
|
253
|
+
if t.is_a?(Symbol) || t.is_a?(String)
|
254
|
+
[t, s]
|
255
|
+
elsif %w(range hex).map(&:to_sym).include?(t.first)
|
256
|
+
[t, s]
|
257
|
+
elsif t.first == :"("
|
258
|
+
e, s = depth {expression(s)}
|
259
|
+
debug {"=> expression returned #{[e, s].inspect}"}
|
260
|
+
[e, s]
|
261
|
+
else
|
262
|
+
["", s]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
##
|
267
|
+
# parse one terminal; return the terminal and the remaining string
|
268
|
+
#
|
269
|
+
# A terminal is represented as a tuple whose 1st item gives the type;
|
270
|
+
# some types have additional info in the tuple.
|
271
|
+
#
|
272
|
+
# @example
|
273
|
+
# >>> terminal("'abc' def")
|
274
|
+
# ('abc' ' def')
|
275
|
+
#
|
276
|
+
# >>> terminal("[0-9]")
|
277
|
+
# ((range '0-9') '')
|
278
|
+
# >>> terminal("#x00B7")
|
279
|
+
# ((hex '#x00B7') '')
|
280
|
+
# >>> terminal ("\[#x0300-#x036F\]")
|
281
|
+
# ((range '#x0300-#x036F') '')
|
282
|
+
# >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
|
283
|
+
# ((range "^<>'{}|^`") '-\[#x00-#x20\]')
|
284
|
+
def terminal(s)
|
285
|
+
s = s.strip
|
286
|
+
#STDERR.puts s.inspect
|
287
|
+
case m = s[0,1]
|
288
|
+
when '"', "'" # STRING1 or STRING2
|
289
|
+
l, s = s[1..-1].split(m.rstrip, 2)
|
290
|
+
[LL1::Lexer.unescape_string(l), s]
|
291
|
+
when '[' # RANGE, O_RANGE
|
292
|
+
l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
|
293
|
+
[[:range, LL1::Lexer.unescape_string(l)], s]
|
294
|
+
when '#' # HEX
|
295
|
+
s.match(/(#x\h+)(.*)$/)
|
296
|
+
l, s = $1, $2
|
297
|
+
[[:hex, l], s]
|
298
|
+
when /[\w\.]/ # SYMBOL
|
299
|
+
s.match(/([\w\.]+)(.*)$/)
|
300
|
+
l, s = $1, $2
|
301
|
+
[l.to_sym, s]
|
302
|
+
when '-'
|
303
|
+
[[:diff], s[1..-1]]
|
304
|
+
when '?'
|
305
|
+
[[:opt], s[1..-1]]
|
306
|
+
when '|'
|
307
|
+
[[:alt], s[1..-1]]
|
308
|
+
when '+'
|
309
|
+
[[:plus], s[1..-1]]
|
310
|
+
when '*'
|
311
|
+
[[:star], s[1..-1]]
|
312
|
+
when /[\(\)]/ # '(' or ')'
|
313
|
+
[[m.to_sym], s[1..-1]]
|
314
|
+
else
|
315
|
+
error("terminal", "unrecognized terminal: #{s.inspect}")
|
316
|
+
raise SyntaxError, "unrecognized terminal: #{s.inspect}"
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
end
|