lrama 0.5.5 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -28,7 +28,13 @@ module Lrama
28
28
  if lhs.referred_by?(ref_name)
29
29
  '$'
30
30
  else
31
- rhs.find_index {|token| token.referred_by?(ref_name) } + 1
31
+ index = rhs.find_index {|token| token.referred_by?(ref_name) }
32
+
33
+ if index
34
+ index + 1
35
+ else
36
+ raise "'#{ref_name}' is invalid name."
37
+ end
32
38
  end
33
39
  [ref[0], value, ref[2], ref[3], ref[4]]
34
40
  else
data/lib/lrama/lexer.rb CHANGED
@@ -1,332 +1,174 @@
1
1
  require "strscan"
2
- require "lrama/report/duration"
3
2
  require "lrama/lexer/token"
4
3
 
5
4
  module Lrama
6
- # Lexer for parse.y
7
5
  class Lexer
8
- include Lrama::Report::Duration
9
-
10
- # States
11
- #
12
- # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
13
- Initial = 0
14
- Prologue = 1
15
- BisonDeclarations = 2
16
- GrammarRules = 3
17
- Epilogue = 4
18
-
19
- # Token types
20
-
21
- attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
22
- :bison_declarations_tokens, :grammar_rules_tokens
6
+ attr_accessor :status
7
+ attr_accessor :end_symbol
8
+
9
+ SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
10
+ PERCENT_TOKENS = %w(
11
+ %union
12
+ %token
13
+ %type
14
+ %left
15
+ %right
16
+ %nonassoc
17
+ %expect
18
+ %define
19
+ %require
20
+ %printer
21
+ %lex-param
22
+ %parse-param
23
+ %initial-action
24
+ %precedence
25
+ %prec
26
+ %error-token
27
+ )
23
28
 
24
29
  def initialize(text)
25
- @text = text
26
- @state = Initial
27
- # Array of texts
28
- @prologue = []
29
- @bison_declarations = []
30
- @grammar_rules = []
31
- @epilogue = []
32
-
33
- @bison_declarations_tokens = []
34
- @grammar_rules_tokens = []
35
-
36
- @debug = false
30
+ @scanner = StringScanner.new(text)
31
+ @head = @scanner.pos
32
+ @line = 1
33
+ @status = :initial
34
+ @end_symbol = nil
35
+ end
37
36
 
38
- report_duration(:lex) do
39
- lex_text
40
- lex_bison_declarations_tokens
41
- lex_grammar_rules_tokens
37
+ def next_token
38
+ case @status
39
+ when :initial
40
+ lex_token
41
+ when :c_declaration
42
+ lex_c_code
42
43
  end
43
44
  end
44
45
 
45
- private
46
-
47
- def create_token(type, s_value, line, column)
48
- t = Token.new(type: type, s_value: s_value)
49
- t.line = line
50
- t.column = column
51
-
52
- return t
46
+ def line
47
+ @line
53
48
  end
54
49
 
55
- # TODO: Remove this
56
- def lex_text
57
- @text.each_line.with_index(1) do |string, lineno|
58
- case @state
59
- when Initial
60
- # Skip until "%{"
61
- if string == "%{\n"
62
- @state = Prologue
63
- @prologue << ["", lineno]
64
- next
65
- end
66
- when Prologue
67
- # Between "%{" and "%}"
68
- if string == "%}\n"
69
- @state = BisonDeclarations
70
- @prologue << ["", lineno]
71
- next
72
- end
73
-
74
- @prologue << [string, lineno]
75
- when BisonDeclarations
76
- if string == "%%\n"
77
- @state = GrammarRules
78
- next
79
- end
80
-
81
- @bison_declarations << [string, lineno]
82
- when GrammarRules
83
- # Between "%%" and "%%"
84
- if string == "%%\n"
85
- @state = Epilogue
86
- next
87
- end
88
-
89
- @grammar_rules << [string, lineno]
90
- when Epilogue
91
- @epilogue << [string, lineno]
92
- else
93
- raise "Unknown state: #{@state}"
94
- end
95
- end
50
+ def column
51
+ @scanner.pos - @head
96
52
  end
97
53
 
98
- # See:
99
- # * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
100
- # * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
101
- # * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
102
- def lex_common(lines, tokens)
103
- line = lines.first[1]
104
- column = 0
105
- ss = StringScanner.new(lines.map(&:first).join)
106
-
107
- while !ss.eos? do
54
+ def lex_token
55
+ while !@scanner.eos? do
108
56
  case
109
- when ss.scan(/\n/)
110
- line += 1
111
- column = ss.pos
112
- when ss.scan(/\s+/)
113
- # skip
114
- when ss.scan(/;/)
115
- tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
116
- when ss.scan(/\|/)
117
- tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
118
- when ss.scan(/(\d+)/)
119
- tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
120
- when ss.scan(/(<[a-zA-Z0-9_]+>)/)
121
- tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
122
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
123
- tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
124
- tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
125
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
126
- tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
127
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
128
- tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
129
- when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
130
- tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
131
- when ss.scan(/%expect/)
132
- tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
133
- when ss.scan(/%define/)
134
- tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
135
- when ss.scan(/%printer/)
136
- tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
137
- when ss.scan(/%error-token/)
138
- tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
139
- when ss.scan(/%lex-param/)
140
- tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
141
- when ss.scan(/%parse-param/)
142
- tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
143
- when ss.scan(/%initial-action/)
144
- tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
145
- when ss.scan(/%union/)
146
- tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
147
- when ss.scan(/%token/)
148
- tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
149
- when ss.scan(/%type/)
150
- tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
151
- when ss.scan(/%nonassoc/)
152
- tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
153
- when ss.scan(/%left/)
154
- tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
155
- when ss.scan(/%right/)
156
- tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
157
- when ss.scan(/%precedence/)
158
- tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
159
- when ss.scan(/%prec/)
160
- tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
161
- when ss.scan(/{/)
162
- token, line = lex_user_code(ss, line, ss.pos - column, lines)
163
- tokens << token
164
- when ss.scan(/"/)
165
- string, line = lex_string(ss, "\"", line, lines)
166
- token = create_token(Token::String, string, line, ss.pos - column)
167
- tokens << token
168
- when ss.scan(/\/\*/)
169
- # TODO: Need to keep comment?
170
- line = lex_comment(ss, line, lines, "")
171
- when ss.scan(/\/\//)
172
- line = lex_line_comment(ss, line, "")
173
- when ss.scan(/'(.)'/)
174
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
175
- when ss.scan(/'\\(.)'/) # '\\', '\t'
176
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
177
- when ss.scan(/'\\(\d+)'/) # '\13'
178
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
179
- when ss.scan(/%empty/)
180
- # skip
57
+ when @scanner.scan(/\n/)
58
+ newline
59
+ when @scanner.scan(/\s+/)
60
+ # noop
61
+ when @scanner.scan(/\/\*/)
62
+ lex_comment
63
+ when @scanner.scan(/\/\//)
64
+ @scanner.scan_until(/\n/)
65
+ newline
66
+ when @scanner.scan(/%empty/)
67
+ # noop
181
68
  else
182
- l = line - lines.first[1]
183
- split = ss.string.split("\n")
184
- col = ss.pos - split[0...l].join("\n").length
185
- raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
69
+ break
186
70
  end
187
71
  end
188
- end
189
72
 
190
- def lex_bison_declarations_tokens
191
- lex_common(@bison_declarations, @bison_declarations_tokens)
73
+ @head_line = line
74
+ @head_column = column
75
+
76
+ case
77
+ when @scanner.eos?
78
+ return
79
+ when @scanner.scan(/#{SYMBOLS.join('|')}/)
80
+ return [@scanner.matched, @scanner.matched]
81
+ when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
82
+ return [@scanner.matched, @scanner.matched]
83
+ when @scanner.scan(/<\w+>/)
84
+ return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
85
+ when @scanner.scan(/'.'/)
86
+ return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
87
+ when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
88
+ return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
89
+ when @scanner.scan(/"/)
90
+ return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
91
+ when @scanner.scan(/\d+/)
92
+ return [:INTEGER, Integer(@scanner.matched)]
93
+ when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
94
+ token = build_token(type: Token::Ident, s_value: @scanner.matched)
95
+ type =
96
+ if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
97
+ :IDENT_COLON
98
+ else
99
+ :IDENTIFIER
100
+ end
101
+ return [type, token]
102
+ else
103
+ raise
104
+ end
192
105
  end
193
106
 
194
- def lex_user_code(ss, line, column, lines)
195
- first_line = line
196
- first_column = column
197
- debug("Enter lex_user_code: #{line}")
198
- brace_count = 1
199
- str = "{"
200
- # Array of [type, $n, tag, first column, last column]
201
- # TODO: Is it better to keep string, like "$$", and use gsub?
202
- references = []
203
-
204
- while !ss.eos? do
107
+ def lex_c_code
108
+ nested = 0
109
+ code = ''
110
+ while !@scanner.eos? do
205
111
  case
206
- when ss.scan(/\n/)
207
- line += 1
208
- when ss.scan(/"/)
209
- string, line = lex_string(ss, "\"", line, lines)
210
- str << string
211
- next
212
- when ss.scan(/'/)
213
- string, line = lex_string(ss, "'", line, lines)
214
- str << string
215
- next
216
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
217
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
218
- references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
219
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
220
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
221
- references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
222
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
223
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
224
- references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
225
- when ss.scan(/@\$/) # @$
226
- references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
227
- when ss.scan(/@(\d+)/) # @1
228
- references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
229
- when ss.scan(/{/)
230
- brace_count += 1
231
- when ss.scan(/}/)
232
- brace_count -= 1
233
-
234
- debug("Return lex_user_code: #{line}")
235
- if brace_count == 0
236
- str << ss[0]
237
- user_code = Token.new(type: Token::User_code, s_value: str.freeze)
238
- user_code.line = first_line
239
- user_code.column = first_column
240
- user_code.references = references
241
- return [user_code, line]
112
+ when @scanner.scan(/{/)
113
+ code += @scanner.matched
114
+ nested += 1
115
+ when @scanner.scan(/}/)
116
+ if nested == 0 && @end_symbol == '}'
117
+ @scanner.unscan
118
+ return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
119
+ else
120
+ code += @scanner.matched
121
+ nested -= 1
242
122
  end
243
- when ss.scan(/\/\*/)
244
- str << ss[0]
245
- line = lex_comment(ss, line, lines, str)
246
- when ss.scan(/\/\//)
247
- str << ss[0]
248
- line = lex_line_comment(ss, line, str)
123
+ when @scanner.check(/#{@end_symbol}/)
124
+ return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
125
+ when @scanner.scan(/\n/)
126
+ code += @scanner.matched
127
+ newline
128
+ when @scanner.scan(/"/)
129
+ matched = @scanner.scan_until(/"/)
130
+ code += %Q("#{matched})
131
+ @line += matched.count("\n")
132
+ when @scanner.scan(/'/)
133
+ matched = @scanner.scan_until(/'/)
134
+ code += %Q('#{matched})
249
135
  else
250
- # noop, just consume char
251
- str << ss.getch
252
- next
136
+ code += @scanner.getch
253
137
  end
254
-
255
- str << ss[0]
256
138
  end
257
-
258
- # Reach to end of input but brace does not match
259
- l = line - lines.first[1]
260
- raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
139
+ raise
261
140
  end
262
141
 
263
- def lex_string(ss, terminator, line, lines)
264
- debug("Enter lex_string: #{line}")
265
-
266
- str = terminator.dup
267
-
268
- while (c = ss.getch) do
269
- str << c
270
-
271
- case c
272
- when "\n"
273
- line += 1
274
- when terminator
275
- debug("Return lex_string: #{line}")
276
- return [str, line]
277
- else
278
- # noop
279
- end
280
- end
281
-
282
- # Reach to end of input but quote does not match
283
- l = line - lines.first[1]
284
- raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
285
- end
142
+ private
286
143
 
287
- # /* */ style comment
288
- def lex_comment(ss, line, lines, str)
289
- while !ss.eos? do
144
+ def lex_comment
145
+ while !@scanner.eos? do
290
146
  case
291
- when ss.scan(/\n/)
292
- line += 1
293
- when ss.scan(/\*\//)
294
- return line
147
+ when @scanner.scan(/\n/)
148
+ @line += 1
149
+ @head = @scanner.pos + 1
150
+ when @scanner.scan(/\*\//)
151
+ return
295
152
  else
296
- str << ss.getch
297
- next
153
+ @scanner.getch
298
154
  end
299
-
300
- str << ss[0]
301
155
  end
302
-
303
- # Reach to end of input but quote does not match
304
- l = line - lines.first[1]
305
- raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
306
156
  end
307
157
 
308
- # // style comment
309
- def lex_line_comment(ss, line, str)
310
- while !ss.eos? do
311
- case
312
- when ss.scan(/\n/)
313
- return line + 1
314
- else
315
- str << ss.getch
316
- next
317
- end
158
+ def build_token(type:, s_value:, **options)
159
+ token = Token.new(type: type, s_value: s_value)
160
+ token.line = @head_line
161
+ token.column = @head_column
162
+ options.each do |attr, value|
163
+ token.public_send("#{attr}=", value)
318
164
  end
319
165
 
320
- line # Reach to end of input
321
- end
322
-
323
- def lex_grammar_rules_tokens
324
- lex_common(@grammar_rules, @grammar_rules_tokens)
166
+ token
325
167
  end
326
168
 
327
- def debug(msg)
328
- return unless @debug
329
- puts "#{msg}\n"
169
+ def newline
170
+ @line += 1
171
+ @head = @scanner.pos + 1
330
172
  end
331
173
  end
332
174
  end
@@ -0,0 +1,128 @@
1
+ require 'optparse'
2
+
3
+ module Lrama
4
+ # Handle option parsing for the command line interface.
5
+ class OptionParser
6
+ def initialize
7
+ @options = Options.new
8
+ @trace = []
9
+ @report = []
10
+ end
11
+
12
+ def parse(argv)
13
+ parse_by_option_parser(argv)
14
+
15
+ @options.trace_opts = validate_trace(@trace)
16
+ @options.report_opts = validate_report(@report)
17
+ @options.grammar_file = argv.shift
18
+
19
+ if !@options.grammar_file
20
+ abort "File should be specified\n"
21
+ end
22
+
23
+ if @options.grammar_file == '-'
24
+ @options.grammar_file = argv.shift or abort "File name for STDIN should be specified\n"
25
+ else
26
+ @options.y = File.open(@options.grammar_file, 'r')
27
+ end
28
+
29
+ if !@report.empty? && @options.report_file.nil? && @options.grammar_file
30
+ @options.report_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".output"
31
+ end
32
+
33
+ if !@options.header_file && @options.header
34
+ case
35
+ when @options.outfile
36
+ @options.header_file = File.dirname(@options.outfile) + "/" + File.basename(@options.outfile, ".*") + ".h"
37
+ when @options.grammar_file
38
+ @options.header_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".h"
39
+ end
40
+ end
41
+
42
+ @options
43
+ end
44
+
45
+ private
46
+
47
+ def parse_by_option_parser(argv)
48
+ ::OptionParser.new do |o|
49
+ o.banner = <<~BANNER
50
+ Lrama is LALR (1) parser generator written by Ruby.
51
+
52
+ Usage: lrama [options] FILE
53
+ BANNER
54
+ o.separator ''
55
+ o.separator 'STDIN mode:'
56
+ o.separator 'lrama [options] - FILE read grammar from STDIN'
57
+ o.separator ''
58
+ o.separator 'Tuning the Parser:'
59
+ o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
60
+ o.on('-t', 'reserved, do nothing') { }
61
+ o.separator ''
62
+ o.separator 'Output:'
63
+ o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
64
+ o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
65
+ o.on('-d', 'also produce a header file') { @options.header = true }
66
+ o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
67
+ o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }
68
+ o.on('-o', '--output=FILE', 'leave output to FILE') {|v| @options.outfile = v }
69
+ o.on('--trace=THINGS', Array, 'also output trace logs at runtime') {|v| @trace = v }
70
+ o.on('-v', 'reserved, do nothing') { }
71
+ o.separator ''
72
+ o.separator 'Error Recovery:'
73
+ o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true }
74
+ o.separator ''
75
+ o.separator 'Other options:'
76
+ o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 }
77
+ o.on('--help', "display this help and exit") {|v| puts o; exit 0 }
78
+ o.separator ''
79
+ o.parse!(argv)
80
+ end
81
+ end
82
+
83
+ def validate_report(report)
84
+ bison_list = %w[states itemsets lookaheads solved counterexamples cex all none]
85
+ others = %w[verbose]
86
+ list = bison_list + others
87
+ not_supported = %w[cex none]
88
+ h = { grammar: true }
89
+
90
+ report.each do |r|
91
+ if list.include?(r) && !not_supported.include?(r)
92
+ h[r.to_sym] = true
93
+ else
94
+ raise "Invalid report option \"#{r}\"."
95
+ end
96
+ end
97
+
98
+ if h[:all]
99
+ (bison_list - not_supported).each do |r|
100
+ h[r.to_sym] = true
101
+ end
102
+
103
+ h.delete(:all)
104
+ end
105
+
106
+ return h
107
+ end
108
+
109
+ def validate_trace(trace)
110
+ list = %w[
111
+ none locations scan parse automaton bitsets
112
+ closure grammar resource sets muscles tools
113
+ m4-early m4 skeleton time ielr cex all
114
+ ]
115
+ h = {}
116
+
117
+ trace.each do |t|
118
+ if list.include?(t)
119
+ h[t.to_sym] = true
120
+ else
121
+ raise "Invalid trace option \"#{t}\"."
122
+ end
123
+ end
124
+
125
+ return h
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,23 @@
1
+ module Lrama
2
+ # Command line options.
3
+ class Options
4
+ attr_accessor :skeleton, :header, :header_file,
5
+ :report_file, :outfile,
6
+ :error_recovery, :grammar_file,
7
+ :report_file, :trace_opts, :report_opts, :y
8
+
9
+ def initialize
10
+ @skeleton = "bison/yacc.c"
11
+ @header = false
12
+ @header_file = nil
13
+ @report_file = nil
14
+ @outfile = "y.tab.c"
15
+ @error_recovery = false
16
+ @grammar_file = nil
17
+ @report_file = nil
18
+ @trace_opts = nil
19
+ @report_opts = nil
20
+ @y = STDIN
21
+ end
22
+ end
23
+ end