ebnf 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,229 @@
1
+ require_relative 'isoebnf/meta'
2
+ require 'logger'
3
+
4
+ # ISO EBNF parser
5
+ # Parses ISO EBNF into an array of {EBNF::Rule}.
6
+ module EBNF
7
+ class ISOEBNF
8
+ include EBNF::PEG::Parser
9
+
10
+ # The base for terminal-character, which omits "'", '"', and '?'.
11
+ # Could be more optimized, and one might quible
12
+ # with the overly-strictly defined character set,
13
+ # but it is correct.
14
+ TERMINAL_CHARACTER_BASE = %r{
15
+ [a-zA-Z0-9] | # letter | decimal digit
16
+ , | # concatenate symbol
17
+ = | # defining symbol
18
+ [\|\/!] | # definition separator symbol
19
+ \*\) | # end comment symbol
20
+ \) | # end group symbol
21
+ \] | # end option symbol
22
+ \} | # end repeat symbol
23
+ \- | # except symbol
24
+ #\' | # first quote symbol
25
+ \* | # repetition symbol
26
+ #\" | # second quote symbol
27
+ #\? | # special sequence symbol
28
+ \(\* | # start comment symbol
29
+ \( | # start group symbol
30
+ \[ | # start option symbol
31
+ \{ | # start repeat symbol
32
+ [;\.] | # terminator symbol
33
+ [:+_%@&$<>^\x20\x23\\`~] # other character
34
+ }x
35
+
36
+ TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"\?]}
37
+ FIRST_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|["\?]}
38
+ SECOND_TERMINAL_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['\?]}
39
+ SPECIAL_SEQUENCE_CHARACTER = %r{#{TERMINAL_CHARACTER_BASE}|['"]}
40
+
41
+ # Abstract syntax tree from parse
42
+ #
43
+ # @return [Array<EBNF::Rule>]
44
+ attr_reader :ast
45
+
46
+ # `[14] integer ::= decimal_digit+`
47
+ terminal(:integer, /\d+/) do |value, prod|
48
+ value.to_i
49
+ end
50
+
51
+ # `[15] meta_identifier ::= letter meta_identifier_character*`
52
+ terminal(:meta_identifier, /[a-zA-Z][a-zA-Z0-9_]*/) do |value|
53
+ value.to_sym
54
+ end
55
+
56
+ # `[17] terminal_string ::= ("'" first_terminal_character+ "'")`
57
+ # ` | ('"' second_terminal_character+ '"')`
58
+ terminal(:terminal_string, /(?:'#{FIRST_TERMINAL_CHARACTER}+')|(?:"#{SECOND_TERMINAL_CHARACTER}+")/x) do |value|
59
+ value[1..-2]
60
+ end
61
+
62
+ # `[20] special_sequence ::= '?' special_sequence_character* '?'`
63
+ terminal(:special_sequence, /\?#{SPECIAL_SEQUENCE_CHARACTER}+\?/)
64
+
65
+ # `[22] terminal_character ::= [a-zA-Z0-9]`
66
+ # ` | [,=;*}#x2d?([{;]`
67
+ # ` | '*)'`
68
+ # ` | '(*'`
69
+ # ` | ']'`
70
+ # ` | other_character`
71
+ terminal(:terminal_character, TERMINAL_CHARACTER)
72
+
73
+ # `[25] empty ::= ''`
74
+ terminal(:empty, //)
75
+
76
+ # `[26] definition_separator_symbol ::= '|' | '/' | '!'`
77
+ terminal(:definition_separator_symbol, /[\|\/!]/)
78
+
79
+ # `[27] terminator_symbol ::= ';' | '.'`
80
+ terminal(:terminator_symbol, /[;\.]/)
81
+
82
+ # `[28] start_option_symbol ::= '['
83
+ terminal(:start_option_symbol, /\[|(?:\(\/)/)
84
+
85
+ # `[29] end_option_symbol ::= ']'`
86
+ terminal(:end_option_symbol, /\]/)
87
+
88
+ # `[30] start_repeat_symbol ::= '{' | '(:'`
89
+ terminal(:start_repeat_symbol, /{|\(:/)
90
+
91
+ # `[31] end_repeat_symbol ::= '}' | ':)'`
92
+ terminal(:end_repeat_symbol, /}|:\)/)
93
+
94
+ # ## Non-terminal productions
95
+
96
+ # `[2] syntax_rule ::= meta_identifier '=' definitions_list terminator_symbol`
97
+ production(:syntax_rule, clear_packrat: true) do |value, data, callback|
98
+ # value contains an expression.
99
+ # Invoke callback
100
+ sym = value[0][:meta_identifier]
101
+ definitions_list = value[2][:definitions_list]
102
+ callback.call(:rule, EBNF::Rule.new(sym.to_sym, nil, definitions_list))
103
+ nil
104
+ end
105
+
106
+ # Setting `as_hash: true` in the start production makes the value of the form of a hash, rather than an array of hashes.
107
+ #
108
+ # `[3] definitions_list ::= single_definition (definition_separator_symbol definitions_list)*`
109
+ start_production(:definitions_list, as_hash: true)
110
+ production(:definitions_list) do |value|
111
+ if value[:_definitions_list_1].length > 0
112
+ [:alt, value[:single_definition]] + value[:_definitions_list_1]
113
+ else
114
+ value[:single_definition]
115
+ end
116
+ end
117
+ production(:_definitions_list_1) do |value|
118
+ Array(value.first)
119
+ end
120
+ start_production(:_definitions_list_2, as_hash: true)
121
+ production(:_definitions_list_2) do |value|
122
+ if Array(value[:definitions_list]).first == :alt
123
+ value[:definitions_list][1..-1]
124
+ else
125
+ [value[:definitions_list]]
126
+ end
127
+ end
128
+
129
+ # `[4] single_definition ::= term (',' term)*`
130
+ start_production(:single_definition, as_hash: true)
131
+ production(:single_definition) do |value|
132
+ if value[:_single_definition_1].length > 0
133
+ [:seq, value[:term]] + value[:_single_definition_1]
134
+ else
135
+ value[:term]
136
+ end
137
+ end
138
+ production(:_single_definition_1) do |value|
139
+ value.map {|a1| a1.last[:term]}.compact # Get rid of '|'
140
+ end
141
+
142
+ # `[5] term ::= factor ('-' exception)?`
143
+ start_production(:term, as_hash: true)
144
+ production(:term) do |value|
145
+ if value[:_term_1]
146
+ [:diff, value[:factor], value[:_term_1]]
147
+ else
148
+ value[:factor]
149
+ end
150
+ end
151
+ production(:_term_1) do |value|
152
+ value.last[:exception] if value
153
+ end
154
+
155
+ # `[6] exception ::= factor`
156
+ start_production(:exception, as_hash: true)
157
+ production(:exception) do |value|
158
+ value[:factor]
159
+ end
160
+
161
+ # `[7] factor ::= (integer '*')? primary`
162
+ start_production(:factor, as_hash: true)
163
+ production(:factor) do |value|
164
+ if value[:_factor_1]
165
+ [:rept, value[:_factor_1], value[:_factor_1], value[:primary]]
166
+ else
167
+ value[:primary]
168
+ end
169
+ end
170
+ production(:_factor_2) do |value|
171
+ value.first[:integer]
172
+ end
173
+
174
+ # `[9] optional_sequence ::= start_option_symbol definitions_list end_option_symbol`
175
+ production(:optional_sequence) do |value|
176
+ [:opt, value[1][:definitions_list]]
177
+ end
178
+
179
+ # `[10] repeated_sequence ::= start_repeat_symbol definitions_list end_repeat_symbol`
180
+ production(:repeated_sequence) do |value|
181
+ [:star, value[1][:definitions_list]]
182
+ end
183
+
184
+ # `[11] grouped_sequence ::= '(' definitions_list ')'`
185
+ production(:grouped_sequence) do |value|
186
+ [:seq, value[1][:definitions_list]]
187
+ end
188
+
189
+ # ## Parser invocation.
190
+ # On start, yield ourselves if a block is given, otherwise, return this parser instance
191
+ #
192
+ # @param [#read, #to_s] input
193
+ # @param [Hash{Symbol => Object}] options
194
+ # @option options [Boolean] :level
195
+ # Trace level. 0(debug), 1(info), 2(warn), 3(error).
196
+ # @return [EBNFParser]
197
+ def initialize(input, **options, &block)
198
+ # If the `level` option is set, instantiate a logger for collecting trace information.
199
+ if options.has_key?(:level)
200
+ options[:logger] = Logger.new(STDERR)
201
+ options[:logger].level = options[:level]
202
+ options[:logger].formatter = lambda {|severity, datetime, progname, msg| "#{severity} #{msg}\n"}
203
+ end
204
+
205
+ # Read input, if necessary, which will be used in a Scanner.
206
+ @input = input.respond_to?(:read) ? input.read : input.to_s
207
+
208
+ parsing_terminals = false
209
+ @ast = []
210
+ parse(@input,
211
+ :syntax,
212
+ ISOEBNFMeta::RULES,
213
+ whitespace: %r{([\x09-\x0d\x20]|(?:\(\*(?:(?:\*[^\)])|[^*])*\*\)))+},
214
+ **options
215
+ ) do |context, *data|
216
+ rule = case context
217
+ when :rule
218
+ # A rule which has already been turned into a `Rule` object.
219
+ rule = data.first
220
+ rule.kind = :terminal if parsing_terminals
221
+ rule
222
+ end
223
+ @ast << rule if rule
224
+ end
225
+ rescue EBNF::PEG::Parser::Error => e
226
+ raise SyntaxError, e.message
227
+ end
228
+ end
229
+ end
@@ -0,0 +1,75 @@
1
+ # This file is automatically generated by ebnf version 2.0.0
2
+ # Derived from etc/iso-ebnf.ebnf
3
+ module ISOEBNFMeta
4
+ RULES = [
5
+ EBNF::Rule.new(:syntax, nil, [:star, :syntax_rule]).extend(EBNF::PEG::Rule),
6
+ EBNF::Rule.new(:syntax_rule, nil, [:seq, :meta_identifier, :defining_symbol, :definitions_list, :terminator_symbol]).extend(EBNF::PEG::Rule),
7
+ EBNF::Rule.new(:definitions_list, nil, [:seq, :single_definition, :_definitions_list_1]).extend(EBNF::PEG::Rule),
8
+ EBNF::Rule.new(:_definitions_list_1, nil, [:star, :_definitions_list_2]).extend(EBNF::PEG::Rule),
9
+ EBNF::Rule.new(:_definitions_list_2, nil, [:seq, :definition_separator_symbol, :definitions_list]).extend(EBNF::PEG::Rule),
10
+ EBNF::Rule.new(:single_definition, nil, [:seq, :term, :_single_definition_1]).extend(EBNF::PEG::Rule),
11
+ EBNF::Rule.new(:_single_definition_1, nil, [:star, :_single_definition_2]).extend(EBNF::PEG::Rule),
12
+ EBNF::Rule.new(:_single_definition_2, nil, [:seq, ",", :term]).extend(EBNF::PEG::Rule),
13
+ EBNF::Rule.new(:term, nil, [:seq, :factor, :_term_1]).extend(EBNF::PEG::Rule),
14
+ EBNF::Rule.new(:_term_1, nil, [:opt, :_term_2]).extend(EBNF::PEG::Rule),
15
+ EBNF::Rule.new(:_term_2, nil, [:seq, "-", :exception]).extend(EBNF::PEG::Rule),
16
+ EBNF::Rule.new(:exception, nil, [:seq, :factor]).extend(EBNF::PEG::Rule),
17
+ EBNF::Rule.new(:factor, nil, [:seq, :_factor_1, :primary]).extend(EBNF::PEG::Rule),
18
+ EBNF::Rule.new(:_factor_1, nil, [:opt, :_factor_2]).extend(EBNF::PEG::Rule),
19
+ EBNF::Rule.new(:_factor_2, nil, [:seq, :integer, "*"]).extend(EBNF::PEG::Rule),
20
+ EBNF::Rule.new(:primary, nil, [:alt, :optional_sequence, :repeated_sequence, :special_sequence, :grouped_sequence, :meta_identifier, :terminal_string, :empty]).extend(EBNF::PEG::Rule),
21
+ EBNF::Rule.new(:optional_sequence, nil, [:seq, :start_option_symbol, :definitions_list, :end_option_symbol]).extend(EBNF::PEG::Rule),
22
+ EBNF::Rule.new(:repeated_sequence, nil, [:seq, :start_repeat_symbol, :definitions_list, :end_repeat_symbol]).extend(EBNF::PEG::Rule),
23
+ EBNF::Rule.new(:grouped_sequence, nil, [:seq, "(", :definitions_list, ")"]).extend(EBNF::PEG::Rule),
24
+ EBNF::Rule.new(:_terminals, nil, [:seq], kind: :terminals).extend(EBNF::PEG::Rule),
25
+ EBNF::Rule.new(:terminal_string, nil, [:alt, :_terminal_string_1, :_terminal_string_2], kind: :terminal).extend(EBNF::PEG::Rule),
26
+ EBNF::Rule.new(:_terminal_string_1, nil, [:seq, "'", :_terminal_string_3, "'"]).extend(EBNF::PEG::Rule),
27
+ EBNF::Rule.new(:_terminal_string_3, nil, [:plus, :first_terminal_character]).extend(EBNF::PEG::Rule),
28
+ EBNF::Rule.new(:_terminal_string_2, nil, [:seq, "\"", :_terminal_string_4, "\""]).extend(EBNF::PEG::Rule),
29
+ EBNF::Rule.new(:_terminal_string_4, nil, [:plus, :second_terminal_character]).extend(EBNF::PEG::Rule),
30
+ EBNF::Rule.new(:meta_identifier, nil, [:seq, :letter, :_meta_identifier_1], kind: :terminal).extend(EBNF::PEG::Rule),
31
+ EBNF::Rule.new(:_meta_identifier_1, nil, [:star, :meta_identifier_character]).extend(EBNF::PEG::Rule),
32
+ EBNF::Rule.new(:integer, nil, [:plus, :decimal_digit], kind: :terminal).extend(EBNF::PEG::Rule),
33
+ EBNF::Rule.new(:special_sequence, nil, [:seq, "?", :_special_sequence_1, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
34
+ EBNF::Rule.new(:_special_sequence_1, nil, [:star, :special_sequence_character]).extend(EBNF::PEG::Rule),
35
+ EBNF::Rule.new(:comment, nil, [:seq, :start_comment_symbol, :_comment_1, :end_comment_symbol], kind: :terminal).extend(EBNF::PEG::Rule),
36
+ EBNF::Rule.new(:_comment_1, nil, [:star, :comment_symbol]).extend(EBNF::PEG::Rule),
37
+ EBNF::Rule.new(:comment_symbol, nil, [:alt, :comment, :commentless_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
38
+ EBNF::Rule.new(:commentless_symbol, nil, [:alt, :terminal_character, :meta_identifier, :integer, :terminal_string, :special_sequence], kind: :terminal).extend(EBNF::PEG::Rule),
39
+ EBNF::Rule.new(:letter, nil, [:range, "a-zA-Z"], kind: :terminal).extend(EBNF::PEG::Rule),
40
+ EBNF::Rule.new(:decimal_digit, nil, [:range, "0-9"], kind: :terminal).extend(EBNF::PEG::Rule),
41
+ EBNF::Rule.new(:meta_identifier_character, nil, [:alt, :letter, :decimal_digit, "_"], kind: :terminal).extend(EBNF::PEG::Rule),
42
+ EBNF::Rule.new(:first_terminal_character, nil, [:diff, :terminal_character, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
43
+ EBNF::Rule.new(:second_terminal_character, nil, [:diff, :terminal_character, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
44
+ EBNF::Rule.new(:special_sequence_character, nil, [:diff, :terminal_character, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
45
+ EBNF::Rule.new(:terminal_character, nil, [:alt, :letter, :decimal_digit, :concatenate_symbol, :defining_symbol, :definition_separator_symbol, :end_comment_symbol, :end_group_symbol, :end_option_symbol, :end_repeat_symbol, :except_symbol, :first_quote_symbol, :repetition_symbol, :second_quote_symbol, :special_sequence_symbol, :start_comment_symbol, :start_group_symbol, :start_option_symbol, :start_repeat_symbol, :terminator_symbol, :other_character], kind: :terminal).extend(EBNF::PEG::Rule),
46
+ EBNF::Rule.new(:other_character, nil, [:alt, :_other_character_1, "\\"], kind: :terminal).extend(EBNF::PEG::Rule),
47
+ EBNF::Rule.new(:_other_character_1, nil, [:range, ":+_%@&$<>^` ̃#x20#x23"], kind: :terminal).extend(EBNF::PEG::Rule),
48
+ EBNF::Rule.new(:gap_separator, nil, [:range, "#x9#xa#xb#xc#xd#x20"], kind: :terminal).extend(EBNF::PEG::Rule),
49
+ EBNF::Rule.new(:_pass, nil, [:alt, :__pass_1, :comment], kind: :pass).extend(EBNF::PEG::Rule),
50
+ EBNF::Rule.new(:__pass_1, nil, [:plus, :gap_separator]).extend(EBNF::PEG::Rule),
51
+ EBNF::Rule.new(:empty, nil, [:seq, ""], kind: :terminal).extend(EBNF::PEG::Rule),
52
+ EBNF::Rule.new(:concatenate_symbol, nil, [:seq, ","], kind: :terminal).extend(EBNF::PEG::Rule),
53
+ EBNF::Rule.new(:repetition_symbol, nil, [:seq, "*"], kind: :terminal).extend(EBNF::PEG::Rule),
54
+ EBNF::Rule.new(:except_symbol, nil, [:seq, "-"], kind: :terminal).extend(EBNF::PEG::Rule),
55
+ EBNF::Rule.new(:first_quote_symbol, nil, [:seq, "'"], kind: :terminal).extend(EBNF::PEG::Rule),
56
+ EBNF::Rule.new(:second_quote_symbol, nil, [:seq, "\""], kind: :terminal).extend(EBNF::PEG::Rule),
57
+ EBNF::Rule.new(:start_comment_symbol, nil, [:seq, "(*"], kind: :terminal).extend(EBNF::PEG::Rule),
58
+ EBNF::Rule.new(:end_comment_symbol, nil, [:seq, "*)"], kind: :terminal).extend(EBNF::PEG::Rule),
59
+ EBNF::Rule.new(:start_group_symbol, nil, [:seq, "("], kind: :terminal).extend(EBNF::PEG::Rule),
60
+ EBNF::Rule.new(:end_group_symbol, nil, [:seq, ")"], kind: :terminal).extend(EBNF::PEG::Rule),
61
+ EBNF::Rule.new(:special_sequence_symbol, nil, [:seq, "?"], kind: :terminal).extend(EBNF::PEG::Rule),
62
+ EBNF::Rule.new(:defining_symbol, nil, [:alt, "=", ":"], kind: :terminal).extend(EBNF::PEG::Rule),
63
+ EBNF::Rule.new(:definition_separator_symbol, nil, [:alt, "|", "/", "!"], kind: :terminal).extend(EBNF::PEG::Rule),
64
+ EBNF::Rule.new(:terminator_symbol, nil, [:alt, ";", "."], kind: :terminal).extend(EBNF::PEG::Rule),
65
+ EBNF::Rule.new(:start_option_symbol, nil, [:seq, "["], kind: :terminal).extend(EBNF::PEG::Rule),
66
+ EBNF::Rule.new(:end_option_symbol, nil, [:seq, "]"], kind: :terminal).extend(EBNF::PEG::Rule),
67
+ EBNF::Rule.new(:start_repeat_symbol, nil, [:alt, "{", "(:"], kind: :terminal).extend(EBNF::PEG::Rule),
68
+ EBNF::Rule.new(:end_repeat_symbol, nil, [:alt, "}", ":)"], kind: :terminal).extend(EBNF::PEG::Rule),
69
+ EBNF::Rule.new(:gap_free_symbol, nil, [:alt, :_gap_free_symbol_1, :terminal_string], kind: :terminal).extend(EBNF::PEG::Rule),
70
+ EBNF::Rule.new(:_gap_free_symbol_1, nil, [:seq, :_gap_free_symbol_3, :terminal_character]).extend(EBNF::PEG::Rule),
71
+ EBNF::Rule.new(:_gap_free_symbol_3, nil, [:not, :_gap_free_symbol_2]).extend(EBNF::PEG::Rule),
72
+ EBNF::Rule.new(:_gap_free_symbol_2, nil, [:range, "'\""], kind: :terminal).extend(EBNF::PEG::Rule),
73
+ ]
74
+ end
75
+
@@ -9,9 +9,7 @@ module EBNF
9
9
  # BRANCH = {
10
10
  # :alt => {
11
11
  # "(" => [:seq, :_alt_1],
12
- # :ENUM => [:seq, :_alt_1],
13
12
  # :HEX => [:seq, :_alt_1],
14
- # :O_ENUM => [:seq, :_alt_1],
15
13
  # :O_RANGE => [:seq, :_alt_1],
16
14
  # :RANGE => [:seq, :_alt_1],
17
15
  # :STRING1 => [:seq, :_alt_1],
@@ -38,8 +36,6 @@ module EBNF
38
36
  # :alt => [
39
37
  # :HEX,
40
38
  # :SYMBOL,
41
- # :ENUM,
42
- # :O_ENUM,
43
39
  # :RANGE,
44
40
  # :O_RANGE,
45
41
  # :STRING1,
@@ -54,7 +50,7 @@ module EBNF
54
50
  #
55
51
  # TERMINALS = ["(", ")", "-",
56
52
  # "@pass", "@terminals",
57
- # :ENUM, :HEX, :LHS, :O_ENUM, :O_RANGE,:POSTFIX,
53
+ # :HEX, :LHS, :O_RANGE,:POSTFIX,
58
54
  # :RANGE, :STRING1, :STRING2, :SYMBOL,"|"
59
55
  # ].freeze
60
56
  #
@@ -214,8 +210,9 @@ module EBNF
214
210
  firsts, follows = 0, 0
215
211
  # add Fi(wi) to Fi(Ai) for every rule Ai → wi
216
212
  #
217
- # For sequences, this is the first rule in the sequence.
218
- # For alts, this is every rule in the sequence
213
+ # * For sequences, this is the first rule in the sequence.
214
+ # * For alts, this is every rule in the sequence
215
+ # * Other rules don't matter, as they don't appear in strict BNF
219
216
  each(:rule) do |ai|
220
217
  # Fi(a w' ) = { a } for every terminal a
221
218
  ai.terminals(ast).each do |t|
@@ -576,15 +576,23 @@ module EBNF::LL1
576
576
  # @option options [Integer] :depth
577
577
  # Recursion depth for indenting output
578
578
  # @yieldreturn [String] additional string appended to `message`.
579
- def debug(*args)
579
+ def debug(*args, &block)
580
580
  return unless @options[:logger]
581
581
  options = args.last.is_a?(Hash) ? args.pop : {}
582
582
  lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
583
583
  level = options.fetch(:level, 0)
584
-
585
584
  depth = options[:depth] || self.depth
586
- args << yield if block_given?
587
- @options[:logger].add(level, "[#{@lineno}]" + (" " * depth) + args.join(" "))
585
+
586
+ if self.respond_to?(:log_debug)
587
+ level = [:debug, :info, :warn, :error, :fatal][level]
588
+ log_debug(*args, **options.merge(level: level, lineno: lineno, depth: depth), &block)
589
+ elsif @options[:logger].respond_to?(:add)
590
+ args << yield if block_given?
591
+ @options[:logger].add(level, "[#{lineno}]" + (" " * depth) + args.join(" "))
592
+ elsif @options[:logger].respond_to?(:<<)
593
+ args << yield if block_given?
594
+ @options[:logger] << "[#{lineno}]" + (" " * depth) + args.join(" ")
595
+ end
588
596
  end
589
597
 
590
598
  private
@@ -0,0 +1,320 @@
1
+ module EBNF
2
+ module Native
3
+ ##
4
+ # Native parser for EBNF; less accurate, but appropriate when changing EBNF grammar, itself.
5
+ #
6
+ # Iterate over rule strings.
7
+ # a line that starts with '\[' or '@' starts a new rule
8
+ #
9
+ # @param [StringScanner] scanner
10
+ # @yield rule_string
11
+ # @yieldparam [String] rule_string
12
+ def eachRule(scanner)
13
+ cur_lineno = 1
14
+ r = ''
15
+ until scanner.eos?
16
+ case
17
+ when s = scanner.scan(%r(\s+)m)
18
+ # Eat whitespace
19
+ cur_lineno += s.count("\n")
20
+ #debug("eachRule(ws)") { "[#{cur_lineno}] #{s.inspect}" }
21
+ when s = scanner.scan(%r(/\*([^\*]|\*[^\/])*\*/)m)
22
+ # Eat comments /* .. */
23
+ cur_lineno += s.count("\n")
24
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
25
+ when s = scanner.scan(%r(\(\*([^\*]|\*[^\)])*\*\))m)
26
+ # Eat comments (* .. *)
27
+ cur_lineno += s.count("\n")
28
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
29
+ when s = scanner.scan(%r((#(?!x)|//).*$))
30
+ # Eat comments // & #
31
+ cur_lineno += s.count("\n")
32
+ debug("eachRule(comment)") { "[#{cur_lineno}] #{s.inspect}" }
33
+ when s = scanner.scan(/\A["']/)
34
+ # Found a quote, scan until end of matching quote
35
+ s += scanner.scan_until(/#{scanner.matched}|$/)
36
+ r += s
37
+ when s = scanner.scan(%r(^@terminals))
38
+ #debug("eachRule(@terminals)") { "[#{cur_lineno}] #{s.inspect}" }
39
+ yield(r) unless r.empty?
40
+ @lineno = cur_lineno
41
+ yield(s)
42
+ r = ''
43
+ when s = scanner.scan(/@pass/)
44
+ # Found rule start, if we've already collected a rule, yield it
45
+ #debug("eachRule(@pass)") { "[#{cur_lineno}] #{s.inspect}" }
46
+ yield r unless r.empty?
47
+ @lineno = cur_lineno
48
+ r = s
49
+ when s = scanner.scan(EBNF::Terminals::LHS)
50
+ # Found rule start, if we've already collected a rule, yield it
51
+ yield r unless r.empty?
52
+ #debug("eachRule(rule)") { "[#{cur_lineno}] #{s.inspect}" }
53
+ @lineno = cur_lineno
54
+ r = s
55
+ else
56
+ # Collect until end of line, or start of comment or quote
57
+ s = scanner.scan_until(%r{(?:[/\(]\*)|#(?!x)|//|["']|$})
58
+ if scanner.matched.length > 0
59
+ # Back up scan head before ending match
60
+ scanner.pos = scanner.pos - scanner.matched.length
61
+
62
+ # Remove matched from end of string
63
+ s = s[0..-(scanner.matched.length+1)]
64
+ end
65
+ cur_lineno += s.count("\n")
66
+ #debug("eachRule(rest)") { "[#{cur_lineno}] #{s.inspect}" }
67
+ r += s
68
+ end
69
+ end
70
+ yield r unless r.empty?
71
+ end
72
+
73
+ ##
74
+ # Parse a rule into an optional rule number, a symbol and an expression
75
+ #
76
+ # @param [String] rule
77
+ # @return [Rule]
78
+ def ruleParts(rule)
79
+ num_sym, expr = rule.split('::=', 2).map(&:strip)
80
+ num, sym = num_sym.split(']', 2).map(&:strip)
81
+ num, sym = "", num if sym.nil?
82
+ num = num[1..-1]
83
+ r = Rule.new(sym && sym.to_sym, num, expression(expr).first, ebnf: self)
84
+ debug("ruleParts") { r.inspect }
85
+ r
86
+ end
87
+
88
+ ##
89
+ # Parse a string into an expression tree and a remaining string
90
+ #
91
+ # @example
92
+ # >>> expression("a b c")
93
+ # ((seq a b c) '')
94
+ #
95
+ # >>> expression("a? b+ c*")
96
+ # ((seq (opt a) (plus b) (star c)) '')
97
+ #
98
+ # >>> expression(" | x xlist")
99
+ # ((alt (seq) (seq x xlist)) '')
100
+ #
101
+ # >>> expression("a | (b - c)")
102
+ # ((alt a (diff b c)) '')
103
+ #
104
+ # >>> expression("a b | c d")
105
+ # ((alt (seq a b) (seq c d)) '')
106
+ #
107
+ # >>> expression("a | b | c")
108
+ # ((alt a b c) '')
109
+ #
110
+ # >>> expression("a) b c")
111
+ # (a ' b c')
112
+ #
113
+ # >>> expression("BaseDecl? PrefixDecl*")
114
+ # ((seq (opt BaseDecl) (star PrefixDecl)) '')
115
+ #
116
+ # >>> expression("NCCHAR1 | diff | [0-9] | #x00B7 | [#x0300-#x036F] | \[#x203F-#x2040\]")
117
+ # ((alt NCCHAR1 diff
118
+ # (range '0-9')
119
+ # (hex '#x00B7')
120
+ # (range '#x0300-#x036F')
121
+ # (range, '#x203F-#x2040')) '')
122
+ #
123
+ # @param [String] s
124
+ # @return [Array]
125
+ def expression(s)
126
+ debug("expression") {"(#{s.inspect})"}
127
+ e, s = depth {alt(s)}
128
+ debug {"=> alt returned #{[e, s].inspect}"}
129
+ unless s.to_s.empty?
130
+ t, ss = depth {terminal(s)}
131
+ debug {"=> terminal returned #{[t, ss].inspect}"}
132
+ return [e, ss] if t.is_a?(Array) && t.first == :")"
133
+ end
134
+ [e, s]
135
+ end
136
+
137
+ ##
138
+ # Parse alt
139
+ # >>> alt("a | b | c")
140
+ # ((alt a b c) '')
141
+ # @param [String] s
142
+ # @return [Array]
143
+ def alt(s)
144
+ debug("alt") {"(#{s.inspect})"}
145
+ args = []
146
+ while !s.to_s.empty?
147
+ e, s = depth {seq(s)}
148
+ debug {"=> seq returned #{[e, s].inspect}"}
149
+ if e.to_s.empty?
150
+ break unless args.empty?
151
+ e = [:seq, []] # empty sequence
152
+ end
153
+ args << e
154
+ unless s.to_s.empty?
155
+ t, ss = depth {terminal(s)}
156
+ break unless t[0] == :alt
157
+ s = ss
158
+ end
159
+ end
160
+ args.length > 1 ? [args.unshift(:alt), s] : [e, s]
161
+ end
162
+
163
+ ##
164
+ # parse seq
165
+ #
166
+ # >>> seq("a b c")
167
+ # ((seq a b c) '')
168
+ #
169
+ # >>> seq("a b? c")
170
+ # ((seq a (opt b) c) '')
171
+ def seq(s)
172
+ debug("seq") {"(#{s.inspect})"}
173
+ args = []
174
+ while !s.to_s.empty?
175
+ e, ss = depth {diff(s)}
176
+ debug {"=> diff returned #{[e, ss].inspect}"}
177
+ unless e.to_s.empty?
178
+ args << e
179
+ s = ss
180
+ else
181
+ break;
182
+ end
183
+ end
184
+ if args.length > 1
185
+ [args.unshift(:seq), s]
186
+ elsif args.length == 1
187
+ args + [s]
188
+ else
189
+ ["", s]
190
+ end
191
+ end
192
+
193
+ ##
194
+ # parse diff
195
+ #
196
+ # >>> diff("a - b")
197
+ # ((diff a b) '')
198
+ def diff(s)
199
+ debug("diff") {"(#{s.inspect})"}
200
+ e1, s = depth {postfix(s)}
201
+ debug {"=> postfix returned #{[e1, s].inspect}"}
202
+ unless e1.to_s.empty?
203
+ unless s.to_s.empty?
204
+ t, ss = depth {terminal(s)}
205
+ debug {"diff #{[t, ss].inspect}"}
206
+ if t.is_a?(Array) && t.first == :diff
207
+ s = ss
208
+ e2, s = primary(s)
209
+ unless e2.to_s.empty?
210
+ return [[:diff, e1, e2], s]
211
+ else
212
+ error("diff", "Syntax Error")
213
+ raise SyntaxError, "diff missing second operand"
214
+ end
215
+ end
216
+ end
217
+ end
218
+ [e1, s]
219
+ end
220
+
221
+ ##
222
+ # parse postfix
223
+ #
224
+ # >>> postfix("a b c")
225
+ # (a ' b c')
226
+ #
227
+ # >>> postfix("a? b c")
228
+ # ((opt, a) ' b c')
229
+ def postfix(s)
230
+ debug("postfix") {"(#{s.inspect})"}
231
+ e, s = depth {primary(s)}
232
+ debug {"=> primary returned #{[e, s].inspect}"}
233
+ return ["", s] if e.to_s.empty?
234
+ if !s.to_s.empty?
235
+ t, ss = depth {terminal(s)}
236
+ debug {"=> #{[t, ss].inspect}"}
237
+ if t.is_a?(Array) && [:opt, :star, :plus].include?(t.first)
238
+ return [[t.first, e], ss]
239
+ end
240
+ end
241
+ [e, s]
242
+ end
243
+
244
+ ##
245
+ # parse primary
246
+ #
247
+ # >>> primary("a b c")
248
+ # (a ' b c')
249
+ def primary(s)
250
+ debug("primary") {"(#{s.inspect})"}
251
+ t, s = depth {terminal(s)}
252
+ debug {"=> terminal returned #{[t, s].inspect}"}
253
+ if t.is_a?(Symbol) || t.is_a?(String)
254
+ [t, s]
255
+ elsif %w(range hex).map(&:to_sym).include?(t.first)
256
+ [t, s]
257
+ elsif t.first == :"("
258
+ e, s = depth {expression(s)}
259
+ debug {"=> expression returned #{[e, s].inspect}"}
260
+ [e, s]
261
+ else
262
+ ["", s]
263
+ end
264
+ end
265
+
266
+ ##
267
+ # parse one terminal; return the terminal and the remaining string
268
+ #
269
+ # A terminal is represented as a tuple whose 1st item gives the type;
270
+ # some types have additional info in the tuple.
271
+ #
272
+ # @example
273
+ # >>> terminal("'abc' def")
274
+ # ('abc' ' def')
275
+ #
276
+ # >>> terminal("[0-9]")
277
+ # ((range '0-9') '')
278
+ # >>> terminal("#x00B7")
279
+ # ((hex '#x00B7') '')
280
+ # >>> terminal ("\[#x0300-#x036F\]")
281
+ # ((range '#x0300-#x036F') '')
282
+ # >>> terminal("\[^<>'{}|^`\]-\[#x00-#x20\]")
283
+ # ((range "^<>'{}|^`") '-\[#x00-#x20\]')
284
+ def terminal(s)
285
+ s = s.strip
286
+ #STDERR.puts s.inspect
287
+ case m = s[0,1]
288
+ when '"', "'" # STRING1 or STRING2
289
+ l, s = s[1..-1].split(m.rstrip, 2)
290
+ [LL1::Lexer.unescape_string(l), s]
291
+ when '[' # RANGE, O_RANGE
292
+ l, s = s[1..-1].split(/(?<=[^\\])\]/, 2)
293
+ [[:range, LL1::Lexer.unescape_string(l)], s]
294
+ when '#' # HEX
295
+ s.match(/(#x\h+)(.*)$/)
296
+ l, s = $1, $2
297
+ [[:hex, l], s]
298
+ when /[\w\.]/ # SYMBOL
299
+ s.match(/([\w\.]+)(.*)$/)
300
+ l, s = $1, $2
301
+ [l.to_sym, s]
302
+ when '-'
303
+ [[:diff], s[1..-1]]
304
+ when '?'
305
+ [[:opt], s[1..-1]]
306
+ when '|'
307
+ [[:alt], s[1..-1]]
308
+ when '+'
309
+ [[:plus], s[1..-1]]
310
+ when '*'
311
+ [[:star], s[1..-1]]
312
+ when /[\(\)]/ # '(' or ')'
313
+ [[m.to_sym], s[1..-1]]
314
+ else
315
+ error("terminal", "unrecognized terminal: #{s.inspect}")
316
+ raise SyntaxError, "unrecognized terminal: #{s.inspect}"
317
+ end
318
+ end
319
+ end
320
+ end