ebnf 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ require_relative 'isoebnf/meta'
2
+ require 'logger'
3
+
4
+ # ISO EBNF parser
5
+ # Parses ISO EBNF into an array of {EBNF::Rule}.
6
+ module EBNF
7
+ class ISOEBNF
8
+ include EBNF::PEG::Parser
9
+
10
+ # The base for terminal-character, which omits "'", '"', and '?'.
11
+ # Could be more optimized, and one might quible
12
+ # with the overly-strictly defined character set,
13
+ # but it is correct.
14
+ TERMINAL_CHARACTER_BASE = %r{
15
+ [a-zA-Z0-9] | # letter | decimal digit
16
+ , | # concatenate symbol
17
+ = | # defining symbol
18
+ [\|\/!] | # definition separator symbol
19
+ \*\) | # end comment symbol
20
+ \) | # end group symbol
21
+ \] | # end option symbol
22
+ \} | # end repeat symbol
23
+ \- | # except symbol
24
+ #\' | # first quote symbol
25
+ \* | # repetition symbol
26
+ #\" | # second quote symbol
27
+ #\? | # special sequence symbol
28
+ \(\* | # start comment symbol
29
+ \( | # start group symbol
30
+ \[ | # start option symbol
31
+ \{ | # start repeat symbol
32
+ [;\.] | # terminator symbol
33
+ [:+_%@&$<>^\x20\x23\\`~] # other character
34
+ }x
35
+
36
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
37
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
38
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
39
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
40
+
41
+ # Abstract syntax tree from parse
42
+ #
43
+ # @return [Array<EBNF::Rule>]
44
+ attr_reader :ast
45
+
46
+ # `[14] integer ::= decimal_digit+`
47
+ terminal(:integer, /\d+/) do |value, prod|
48
+ value.to_i
49
+ end
50
+
51
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
52
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
53
+ value.to_sym
54
+ end
55
+
56
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
57
+ # ` | ('"' second_terminal_character+ '"')`
58
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
59
+ value[1..-2]
60
+ end
61
+
62
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
63
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
64
+
65
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
66
+ # ` | [,=;*}#x2d?([{;]`
67
+ # ` | '*)'`
68
+ # ` | '(*'`
69
+ # ` | ']'`
70
+ # ` | other_character`
71
+ terminal(:terminal_character, TERMINAL_CHARACTER)
72
+
73
+ # `[25] empty ::= ''`
74
+ terminal(:empty, //)
75
+
76
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
77
+ terminal(:definition_separator_symbol, /[\|\/!]/)
78
+
79
+ # `[27] terminator_symbol ::= ';' | '.'`
80
+ terminal(:terminator_symbol, /[;\.]/)
81
+
82
+ # `[28] start_option_symbol ::= '['
83
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
84
+
85
+ # `[29] end_option_symbol ::= ']'`
86
+ terminal(:end_option_symbol, /\]/)
87
+
88
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
89
+ terminal(:start_repeat_symbol, /{|\(:/)
90
+
91
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
92
+ terminal(:end_repeat_symbol, /}|:\)/)
93
+
94
+ # ## Non-terminal productions
95
+
96
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
97
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
98
+ # value contains an expression.
99
+ # Invoke callback
100
+ sym = value[0][:meta_identifier]
101
+ definitions_list = value[2][:definitions_list]
102
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
103
+ nil
104
+ end
105
+
106
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
107
+ #
108
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
109
+ start_production(:definitions_list, as_hash: true)
110
+ production(:definitions_list) do |value|
111
+ if value[:_definitions_list_1].length > 0
112
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
113
+ else
114
+ value[:single_definition]
115
+ end
116
+ end
117
+ production(:_definitions_list_1) do |value|
118
+ Array(value.first)
119
+ end
120
+ start_production(:_definitions_list_2, as_hash: true)
121
+ production(:_definitions_list_2) do |value|
122
+ if Array(value[:definitions_list]).first == :alt
123
+ value[:definitions_list][1..-1]
124
+ else
125
+ [value[:definitions_list]]
126
+ end
127
+ end
128
+
129
+ # `[4] single_definition ::= term (',' term)*`
130
+ start_production(:single_definition, as_hash: true)
131
+ production(:single_definition) do |value|
132
+ if value[:_single_definition_1].length > 0
133
+ [:seq, value[:term]] + value[:_single_definition_1]
134
+ else
135
+ value[:term]
136
+ end
137
+ end
138
+ production(:_single_definition_1) do |value|
139
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
140
+ end
141
+
142
+ # `[5] term ::= factor ('-' exception)?`
143
+ start_production(:term, as_hash: true)
144
+ production(:term) do |value|
145
+ if value[:_term_1]
146
+ [:diff, value[:factor], value[:_term_1]]
147
+ else
148
+ value[:factor]
149
+ end
150
+ end
151
+ production(:_term_1) do |value|
152
+ value.last[:exception] if value
153
+ end
154
+
155
+ # `[6] exception ::= factor`
156
+ start_production(:exception, as_hash: true)
157
+ production(:exception) do |value|
158
+ value[:factor]
159
+ end
160
+
161
+ # `[7] factor ::= (integer '*')? primary`
162
+ start_production(:factor, as_hash: true)
163
+ production(:factor) do |value|
164
+ if value[:_factor_1]
165
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
166
+ else
167
+ value[:primary]
168
+ end
169
+ end
170
+ production(:_factor_2) do |value|
171
+ value.first[:integer]
172
+ end
173
+
174
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
175
+ production(:optional_sequence) do |value|
176
+ [:opt, value[1][:definitions_list]]
177
+ end
178
+
179
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
180
+ production(:repeated_sequence) do |value|
181
+ [:star, value[1][:definitions_list]]
182
+ end
183
+
184
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
185
+ production(:grouped_sequence) do |value|
186
+ [:seq, value[1][:definitions_list]]
187
+ end
188
+
189
+ # ## Parser invocation.
190
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
191
+ #
192
+ # @param [#read, #to_s] input
193
+ # @param [Hash{Symbol => Object}] options
194
+ # @option options [Boolean] :level
195
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
196
+ # @return [EBNFParser]
197
+ def initialize(input, **options, &block)
198
+ # If the `level` option is set, instantiate a logger for collecting trace information.
199
+ if options.has_key?(:level)
200
+ options[:logger] = Logger.new(STDERR)
201
+ options[:logger].level = options[:level]
202
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
203
+ end
204
+
205
+ # Read input, if necessary, which will be used in a Scanner.
206
+ @input = input.respond_to?(:read) ? input.read : input.to_s
207
+
208
+ parsing_terminals = false
209
+ @ast = []
210
+ parse(@input,
211
+ :syntax,
212
+ ISOEBNFMeta::RULES,
213
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
214
+ **options
215
+ ) do |context, *data|
216
+ rule = case context
217
+ when :rule
218
+ # A rule which has already been turned into a `Rule` object.
219
+ rule = data.first
220
+ rule.kind = :terminal if parsing_terminals
221
+ rule
222
+ end
223
+ @ast << rule if rule
224
+ end
225
+ rescue EBNF::PEG::Parser::Error => e
226
+ raise SyntaxError, e.message
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
@@ -9,9 +9,7 @@ module EBNF
9
9
  # BRANCH = {
10
10
  # :alt => {
11
11
  # "(" => [:seq, :_alt_1],
12
- # :ENUM => [:seq, :_alt_1],
13
12
  # :HEX => [:seq, :_alt_1],
14
- # :O_ENUM => [:seq, :_alt_1],
15
13
  # :O_RANGE => [:seq, :_alt_1],
16
14
  # :RANGE => [:seq, :_alt_1],
17
15
  # :STRING1 => [:seq, :_alt_1],
@@ -38,8 +36,6 @@ module EBNF
38
36
  # :alt => [
39
37
  # :HEX,
40
38
  # :SYMBOL,
41
- # :ENUM,
42
- # :O_ENUM,
43
39
  # :RANGE,
44
40
  # :O_RANGE,
45
41
  # :STRING1,
@@ -54,7 +50,7 @@ module EBNF
54
50
  #
55
51
  # TERMINALS = ["(", ")", "-",
56
52
  # "@pass", "@terminals",
57
- # :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX,
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
58
54
  # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
59
55
  # ].freeze
60
56
  #
@@ -214,8 +210,9 @@ module EBNF
214
210
  firsts, follows = 0, 0
215
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
216
212
  #
217
- # For sequences, this is the first rule in the sequence.
218
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
219
216
  each(:rule) do |ai|
220
217
  # Fi(a w' ) = { a } for every terminal a
221
218
  ai.terminals(ast).each do |t|
@@ -576,15 +576,23 @@ module EBNF::LL1
576
576
  # @option options [Integer] :depth
577
577
  # Recursion depth for indenting output
578
578
  # @yieldreturn [String] additional string appended to `message`.
579
- def debug(*args)
579
+ def debug(*args, &block)
580
580
  return unless @options[:logger]
581
581
  options = args.last.is_a?(Hash) ? args.pop : {}
582
582
  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
583
583
  level = options.fetch(:level, 0)
584
-
585
584
  depth = options[:depth] || self.depth
586
- args << yield if block_given?
587
- @options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" "))
585
+
586
+ if self.respond_to?(:log_debug)
587
+ level = [:debug, :info, :warn, :error, :fatal][level]
588
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
589
+ elsif @options[:logger].respond_to?(:add)
590
+ args << yield if block_given?
591
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
592
+ elsif @options[:logger].respond_to?(:<<)
593
+ args << yield if block_given?
594
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
595
+ end
588
596
  end
589
597
 
590
598
  private
@@ -0,0 +1,320 @@
1
+ module EBNF
2
+ module Native
3
+ ##
4
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
5
+ #
6
+ # Iterate over rule strings.
7
+ # a line that starts with '\[' or '@' starts a new rule
8
+ #
9
+ # @param [StringScanner] scanner
10
+ # @yield rule_string
11
+ # @yieldparam [String] rule_string
12
+ def eachRule(scanner)
13
+ cur_lineno = 1
14
+ r = ''
15
+ until scanner.eos?
16
+ case
17
+ when s = scanner.scan(%r(\s+)m)
18
+ # Eat whitespace
19
+ cur_lineno += s.count("\n")
20
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
21
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
22
+ # Eat comments /* .. */
23
+ cur_lineno += s.count("\n")
24
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
26
+ # Eat comments (* .. *)
27
+ cur_lineno += s.count("\n")
28
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
29
+ when s = scanner.scan(%r((#(?!x)|//).*$))
30
+ # Eat comments // & #
31
+ cur_lineno += s.count("\n")
32
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
33
+ when s = scanner.scan(/\A["']/)
34
+ # Found a quote, scan until end of matching quote
35
+ s += scanner.scan_until(/#{scanner.matched}|$/)
36
+ r += s
37
+ when s = scanner.scan(%r(^@terminals))
38
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ yield(r) unless r.empty?
40
+ @lineno = cur_lineno
41
+ yield(s)
42
+ r = ''
43
+ when s = scanner.scan(/@pass/)
44
+ # Found rule start, if we've already collected a rule, yield it
45
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ yield r unless r.empty?
47
+ @lineno = cur_lineno
48
+ r = s
49
+ when s = scanner.scan(EBNF::Terminals::LHS)
50
+ # Found rule start, if we've already collected a rule, yield it
51
+ yield r unless r.empty?
52
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
53
+ @lineno = cur_lineno
54
+ r = s
55
+ else
56
+ # Collect until end of line, or start of comment or quote
57
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
58
+ if scanner.matched.length > 0
59
+ # Back up scan head before ending match
60
+ scanner.pos = scanner.pos - scanner.matched.length
61
+
62
+ # Remove matched from end of string
63
+ s = s[0..-(scanner.matched.length+1)]
64
+ end
65
+ cur_lineno += s.count("\n")
66
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
67
+ r += s
68
+ end
69
+ end
70
+ yield r unless r.empty?
71
+ end
72
+
73
+ ##
74
+ # Parse a rule into an optional rule number, a symbol and an expression
75
+ #
76
+ # @param [String] rule
77
+ # @return [Rule]
78
+ def ruleParts(rule)
79
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
80
+ num, sym = num_sym.split(']', 2).map(&:strip)
81
+ num, sym = "", num if sym.nil?
82
+ num = num[1..-1]
83
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
84
+ debug("ruleParts") { r.inspect }
85
+ r
86
+ end
87
+
88
+ ##
89
+ # Parse a string into an expression tree and a remaining string
90
+ #
91
+ # @example
92
+ # >>> expression("a b c")
93
+ # ((seq a b c) '')
94
+ #
95
+ # >>> expression("a? b+ c*")
96
+ # ((seq (opt a) (plus b) (star c)) '')
97
+ #
98
+ # >>> expression(" | x xlist")
99
+ # ((alt (seq) (seq x xlist)) '')
100
+ #
101
+ # >>> expression("a | (b - c)")
102
+ # ((alt a (diff b c)) '')
103
+ #
104
+ # >>> expression("a b | c d")
105
+ # ((alt (seq a b) (seq c d)) '')
106
+ #
107
+ # >>> expression("a | b | c")
108
+ # ((alt a b c) '')
109
+ #
110
+ # >>> expression("a) b c")
111
+ # (a ' b c')
112
+ #
113
+ # >>> expression("BaseDecl? PrefixDecl*")
114
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
115
+ #
116
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
117
+ # ((alt NCCHAR1 diff
118
+ # (range '0-9')
119
+ # (hex '#x00B7')
120
+ # (range '#x0300-#x036F')
121
+ # (range, '#x203F-#x2040')) '')
122
+ #
123
+ # @param [String] s
124
+ # @return [Array]
125
+ def expression(s)
126
+ debug("expression") {"(#{s.inspect})"}
127
+ e, s = depth {alt(s)}
128
+ debug {"=> alt returned #{[e, s].inspect}"}
129
+ unless s.to_s.empty?
130
+ t, ss = depth {terminal(s)}
131
+ debug {"=> terminal returned #{[t, ss].inspect}"}
132
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
133
+ end
134
+ [e, s]
135
+ end
136
+
137
+ ##
138
+ # Parse alt
139
+ # >>> alt("a | b | c")
140
+ # ((alt a b c) '')
141
+ # @param [String] s
142
+ # @return [Array]
143
+ def alt(s)
144
+ debug("alt") {"(#{s.inspect})"}
145
+ args = []
146
+ while !s.to_s.empty?
147
+ e, s = depth {seq(s)}
148
+ debug {"=> seq returned #{[e, s].inspect}"}
149
+ if e.to_s.empty?
150
+ break unless args.empty?
151
+ e = [:seq, []] # empty sequence
152
+ end
153
+ args << e
154
+ unless s.to_s.empty?
155
+ t, ss = depth {terminal(s)}
156
+ break unless t[0] == :alt
157
+ s = ss
158
+ end
159
+ end
160
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
161
+ end
162
+
163
+ ##
164
+ # parse seq
165
+ #
166
+ # >>> seq("a b c")
167
+ # ((seq a b c) '')
168
+ #
169
+ # >>> seq("a b? c")
170
+ # ((seq a (opt b) c) '')
171
+ def seq(s)
172
+ debug("seq") {"(#{s.inspect})"}
173
+ args = []
174
+ while !s.to_s.empty?
175
+ e, ss = depth {diff(s)}
176
+ debug {"=> diff returned #{[e, ss].inspect}"}
177
+ unless e.to_s.empty?
178
+ args << e
179
+ s = ss
180
+ else
181
+ break;
182
+ end
183
+ end
184
+ if args.length > 1
185
+ [args.unshift(:seq), s]
186
+ elsif args.length == 1
187
+ args + [s]
188
+ else
189
+ ["", s]
190
+ end
191
+ end
192
+
193
+ ##
194
+ # parse diff
195
+ #
196
+ # >>> diff("a - b")
197
+ # ((diff a b) '')
198
+ def diff(s)
199
+ debug("diff") {"(#{s.inspect})"}
200
+ e1, s = depth {postfix(s)}
201
+ debug {"=> postfix returned #{[e1, s].inspect}"}
202
+ unless e1.to_s.empty?
203
+ unless s.to_s.empty?
204
+ t, ss = depth {terminal(s)}
205
+ debug {"diff #{[t, ss].inspect}"}
206
+ if t.is_a?(Array) && t.first == :diff
207
+ s = ss
208
+ e2, s = primary(s)
209
+ unless e2.to_s.empty?
210
+ return [[:diff, e1, e2], s]
211
+ else
212
+ error("diff", "Syntax Error")
213
+ raise SyntaxError, "diff missing second operand"
214
+ end
215
+ end
216
+ end
217
+ end
218
+ [e1, s]
219
+ end
220
+
221
+ ##
222
+ # parse postfix
223
+ #
224
+ # >>> postfix("a b c")
225
+ # (a ' b c')
226
+ #
227
+ # >>> postfix("a? b c")
228
+ # ((opt, a) ' b c')
229
+ def postfix(s)
230
+ debug("postfix") {"(#{s.inspect})"}
231
+ e, s = depth {primary(s)}
232
+ debug {"=> primary returned #{[e, s].inspect}"}
233
+ return ["", s] if e.to_s.empty?
234
+ if !s.to_s.empty?
235
+ t, ss = depth {terminal(s)}
236
+ debug {"=> #{[t, ss].inspect}"}
237
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
238
+ return [[t.first, e], ss]
239
+ end
240
+ end
241
+ [e, s]
242
+ end
243
+
244
+ ##
245
+ # parse primary
246
+ #
247
+ # >>> primary("a b c")
248
+ # (a ' b c')
249
+ def primary(s)
250
+ debug("primary") {"(#{s.inspect})"}
251
+ t, s = depth {terminal(s)}
252
+ debug {"=> terminal returned #{[t, s].inspect}"}
253
+ if t.is_a?(Symbol) || t.is_a?(String)
254
+ [t, s]
255
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
256
+ [t, s]
257
+ elsif t.first == :"("
258
+ e, s = depth {expression(s)}
259
+ debug {"=> expression returned #{[e, s].inspect}"}
260
+ [e, s]
261
+ else
262
+ ["", s]
263
+ end
264
+ end
265
+
266
+ ##
267
+ # parse one terminal; return the terminal and the remaining string
268
+ #
269
+ # A terminal is represented as a tuple whose 1st item gives the type;
270
+ # some types have additional info in the tuple.
271
+ #
272
+ # @example
273
+ # >>> terminal("'abc' def")
274
+ # ('abc' ' def')
275
+ #
276
+ # >>> terminal("[0-9]")
277
+ # ((range '0-9') '')
278
+ # >>> terminal("#x00B7")
279
+ # ((hex '#x00B7') '')
280
+ # >>> terminal ("\[#x0300-#x036F\]")
281
+ # ((range '#x0300-#x036F') '')
282
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
283
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
284
+ def terminal(s)
285
+ s = s.strip
286
+ #STDERR.puts s.inspect
287
+ case m = s[0,1]
288
+ when '"', "'" # STRING1 or STRING2
289
+ l, s = s[1..-1].split(m.rstrip, 2)
290
+ [LL1::Lexer.unescape_string(l), s]
291
+ when '[' # RANGE, O_RANGE
292
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
293
+ [[:range, LL1::Lexer.unescape_string(l)], s]
294
+ when '#' # HEX
295
+ s.match(/(#x\h+)(.*)$/)
296
+ l, s = $1, $2
297
+ [[:hex, l], s]
298
+ when /[\w\.]/ # SYMBOL
299
+ s.match(/([\w\.]+)(.*)$/)
300
+ l, s = $1, $2
301
+ [l.to_sym, s]
302
+ when '-'
303
+ [[:diff], s[1..-1]]
304
+ when '?'
305
+ [[:opt], s[1..-1]]
306
+ when '|'
307
+ [[:alt], s[1..-1]]
308
+ when '+'
309
+ [[:plus], s[1..-1]]
310
+ when '*'
311
+ [[:star], s[1..-1]]
312
+ when /[\(\)]/ # '(' or ')'
313
+ [[m.to_sym], s[1..-1]]
314
+ else
315
+ error("terminal", "unrecognized terminal: #{s.inspect}")
316
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
317
+ end
318
+ end
319
+ end
320
+ end