lrama 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,13 @@ module Lrama
28
28
  if lhs.referred_by?(ref_name)
29
29
  '$'
30
30
  else
31
- rhs.find_index {|token| token.referred_by?(ref_name) } + 1
31
+ index = rhs.find_index {|token| token.referred_by?(ref_name) }
32
+
33
+ if index
34
+ index + 1
35
+ else
36
+ raise "'#{ref_name}' is invalid name."
37
+ end
32
38
  end
33
39
  [ref[0], value, ref[2], ref[3], ref[4]]
34
40
  else
data/lib/lrama/lexer.rb CHANGED
@@ -1,332 +1,174 @@
1
1
  require "strscan"
2
- require "lrama/report/duration"
3
2
  require "lrama/lexer/token"
4
3
 
5
4
  module Lrama
6
- # Lexer for parse.y
7
5
  class Lexer
8
- include Lrama::Report::Duration
9
-
10
- # States
11
- #
12
- # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
13
- Initial = 0
14
- Prologue = 1
15
- BisonDeclarations = 2
16
- GrammarRules = 3
17
- Epilogue = 4
18
-
19
- # Token types
20
-
21
- attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
22
- :bison_declarations_tokens, :grammar_rules_tokens
6
+ attr_accessor :status
7
+ attr_accessor :end_symbol
8
+
9
+ SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
10
+ PERCENT_TOKENS = %w(
11
+ %union
12
+ %token
13
+ %type
14
+ %left
15
+ %right
16
+ %nonassoc
17
+ %expect
18
+ %define
19
+ %require
20
+ %printer
21
+ %lex-param
22
+ %parse-param
23
+ %initial-action
24
+ %precedence
25
+ %prec
26
+ %error-token
27
+ )
23
28
 
24
29
  def initialize(text)
25
- @text = text
26
- @state = Initial
27
- # Array of texts
28
- @prologue = []
29
- @bison_declarations = []
30
- @grammar_rules = []
31
- @epilogue = []
32
-
33
- @bison_declarations_tokens = []
34
- @grammar_rules_tokens = []
35
-
36
- @debug = false
30
+ @scanner = StringScanner.new(text)
31
+ @head = @scanner.pos
32
+ @line = 1
33
+ @status = :initial
34
+ @end_symbol = nil
35
+ end
37
36
 
38
- report_duration(:lex) do
39
- lex_text
40
- lex_bison_declarations_tokens
41
- lex_grammar_rules_tokens
37
+ def next_token
38
+ case @status
39
+ when :initial
40
+ lex_token
41
+ when :c_declaration
42
+ lex_c_code
42
43
  end
43
44
  end
44
45
 
45
- private
46
-
47
- def create_token(type, s_value, line, column)
48
- t = Token.new(type: type, s_value: s_value)
49
- t.line = line
50
- t.column = column
51
-
52
- return t
46
+ def line
47
+ @line
53
48
  end
54
49
 
55
- # TODO: Remove this
56
- def lex_text
57
- @text.each_line.with_index(1) do |string, lineno|
58
- case @state
59
- when Initial
60
- # Skip until "%{"
61
- if string == "%{\n"
62
- @state = Prologue
63
- @prologue << ["", lineno]
64
- next
65
- end
66
- when Prologue
67
- # Between "%{" and "%}"
68
- if string == "%}\n"
69
- @state = BisonDeclarations
70
- @prologue << ["", lineno]
71
- next
72
- end
73
-
74
- @prologue << [string, lineno]
75
- when BisonDeclarations
76
- if string == "%%\n"
77
- @state = GrammarRules
78
- next
79
- end
80
-
81
- @bison_declarations << [string, lineno]
82
- when GrammarRules
83
- # Between "%%" and "%%"
84
- if string == "%%\n"
85
- @state = Epilogue
86
- next
87
- end
88
-
89
- @grammar_rules << [string, lineno]
90
- when Epilogue
91
- @epilogue << [string, lineno]
92
- else
93
- raise "Unknown state: #{@state}"
94
- end
95
- end
50
+ def column
51
+ @scanner.pos - @head
96
52
  end
97
53
 
98
- # See:
99
- # * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
100
- # * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
101
- # * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
102
- def lex_common(lines, tokens)
103
- line = lines.first[1]
104
- column = 0
105
- ss = StringScanner.new(lines.map(&:first).join)
106
-
107
- while !ss.eos? do
54
+ def lex_token
55
+ while !@scanner.eos? do
108
56
  case
109
- when ss.scan(/\n/)
110
- line += 1
111
- column = ss.pos
112
- when ss.scan(/\s+/)
113
- # skip
114
- when ss.scan(/;/)
115
- tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
116
- when ss.scan(/\|/)
117
- tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
118
- when ss.scan(/(\d+)/)
119
- tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
120
- when ss.scan(/(<[a-zA-Z0-9_]+>)/)
121
- tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
122
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
123
- tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
124
- tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
125
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
126
- tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
127
- when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
128
- tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
129
- when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
130
- tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
131
- when ss.scan(/%expect/)
132
- tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
133
- when ss.scan(/%define/)
134
- tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
135
- when ss.scan(/%printer/)
136
- tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
137
- when ss.scan(/%error-token/)
138
- tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
139
- when ss.scan(/%lex-param/)
140
- tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
141
- when ss.scan(/%parse-param/)
142
- tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
143
- when ss.scan(/%initial-action/)
144
- tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
145
- when ss.scan(/%union/)
146
- tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
147
- when ss.scan(/%token/)
148
- tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
149
- when ss.scan(/%type/)
150
- tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
151
- when ss.scan(/%nonassoc/)
152
- tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
153
- when ss.scan(/%left/)
154
- tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
155
- when ss.scan(/%right/)
156
- tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
157
- when ss.scan(/%precedence/)
158
- tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
159
- when ss.scan(/%prec/)
160
- tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
161
- when ss.scan(/{/)
162
- token, line = lex_user_code(ss, line, ss.pos - column, lines)
163
- tokens << token
164
- when ss.scan(/"/)
165
- string, line = lex_string(ss, "\"", line, lines)
166
- token = create_token(Token::String, string, line, ss.pos - column)
167
- tokens << token
168
- when ss.scan(/\/\*/)
169
- # TODO: Need to keep comment?
170
- line = lex_comment(ss, line, lines, "")
171
- when ss.scan(/\/\//)
172
- line = lex_line_comment(ss, line, "")
173
- when ss.scan(/'(.)'/)
174
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
175
- when ss.scan(/'\\(.)'/) # '\\', '\t'
176
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
177
- when ss.scan(/'\\(\d+)'/) # '\13'
178
- tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
179
- when ss.scan(/%empty/)
180
- # skip
57
+ when @scanner.scan(/\n/)
58
+ newline
59
+ when @scanner.scan(/\s+/)
60
+ # noop
61
+ when @scanner.scan(/\/\*/)
62
+ lex_comment
63
+ when @scanner.scan(/\/\//)
64
+ @scanner.scan_until(/\n/)
65
+ newline
66
+ when @scanner.scan(/%empty/)
67
+ # noop
181
68
  else
182
- l = line - lines.first[1]
183
- split = ss.string.split("\n")
184
- col = ss.pos - split[0...l].join("\n").length
185
- raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
69
+ break
186
70
  end
187
71
  end
188
- end
189
72
 
190
- def lex_bison_declarations_tokens
191
- lex_common(@bison_declarations, @bison_declarations_tokens)
73
+ @head_line = line
74
+ @head_column = column
75
+
76
+ case
77
+ when @scanner.eos?
78
+ return
79
+ when @scanner.scan(/#{SYMBOLS.join('|')}/)
80
+ return [@scanner.matched, @scanner.matched]
81
+ when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
82
+ return [@scanner.matched, @scanner.matched]
83
+ when @scanner.scan(/<\w+>/)
84
+ return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
85
+ when @scanner.scan(/'.'/)
86
+ return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
87
+ when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
88
+ return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
89
+ when @scanner.scan(/"/)
90
+ return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
91
+ when @scanner.scan(/\d+/)
92
+ return [:INTEGER, Integer(@scanner.matched)]
93
+ when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
94
+ token = build_token(type: Token::Ident, s_value: @scanner.matched)
95
+ type =
96
+ if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
97
+ :IDENT_COLON
98
+ else
99
+ :IDENTIFIER
100
+ end
101
+ return [type, token]
102
+ else
103
+ raise
104
+ end
192
105
  end
193
106
 
194
- def lex_user_code(ss, line, column, lines)
195
- first_line = line
196
- first_column = column
197
- debug("Enter lex_user_code: #{line}")
198
- brace_count = 1
199
- str = "{"
200
- # Array of [type, $n, tag, first column, last column]
201
- # TODO: Is it better to keep string, like "$$", and use gsub?
202
- references = []
203
-
204
- while !ss.eos? do
107
+ def lex_c_code
108
+ nested = 0
109
+ code = ''
110
+ while !@scanner.eos? do
205
111
  case
206
- when ss.scan(/\n/)
207
- line += 1
208
- when ss.scan(/"/)
209
- string, line = lex_string(ss, "\"", line, lines)
210
- str << string
211
- next
212
- when ss.scan(/'/)
213
- string, line = lex_string(ss, "'", line, lines)
214
- str << string
215
- next
216
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
217
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
218
- references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
219
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
220
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
221
- references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
222
- when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_.][-a-zA-Z0-9_.]*)/) # $foo, $expr, $<long>program
223
- tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
224
- references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
225
- when ss.scan(/@\$/) # @$
226
- references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
227
- when ss.scan(/@(\d+)/) # @1
228
- references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
229
- when ss.scan(/{/)
230
- brace_count += 1
231
- when ss.scan(/}/)
232
- brace_count -= 1
233
-
234
- debug("Return lex_user_code: #{line}")
235
- if brace_count == 0
236
- str << ss[0]
237
- user_code = Token.new(type: Token::User_code, s_value: str.freeze)
238
- user_code.line = first_line
239
- user_code.column = first_column
240
- user_code.references = references
241
- return [user_code, line]
112
+ when @scanner.scan(/{/)
113
+ code += @scanner.matched
114
+ nested += 1
115
+ when @scanner.scan(/}/)
116
+ if nested == 0 && @end_symbol == '}'
117
+ @scanner.unscan
118
+ return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
119
+ else
120
+ code += @scanner.matched
121
+ nested -= 1
242
122
  end
243
- when ss.scan(/\/\*/)
244
- str << ss[0]
245
- line = lex_comment(ss, line, lines, str)
246
- when ss.scan(/\/\//)
247
- str << ss[0]
248
- line = lex_line_comment(ss, line, str)
123
+ when @scanner.check(/#{@end_symbol}/)
124
+ return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
125
+ when @scanner.scan(/\n/)
126
+ code += @scanner.matched
127
+ newline
128
+ when @scanner.scan(/"/)
129
+ matched = @scanner.scan_until(/"/)
130
+ code += %Q("#{matched})
131
+ @line += matched.count("\n")
132
+ when @scanner.scan(/'/)
133
+ matched = @scanner.scan_until(/'/)
134
+ code += %Q('#{matched})
249
135
  else
250
- # noop, just consume char
251
- str << ss.getch
252
- next
136
+ code += @scanner.getch
253
137
  end
254
-
255
- str << ss[0]
256
138
  end
257
-
258
- # Reach to end of input but brace does not match
259
- l = line - lines.first[1]
260
- raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
139
+ raise
261
140
  end
262
141
 
263
- def lex_string(ss, terminator, line, lines)
264
- debug("Enter lex_string: #{line}")
265
-
266
- str = terminator.dup
267
-
268
- while (c = ss.getch) do
269
- str << c
270
-
271
- case c
272
- when "\n"
273
- line += 1
274
- when terminator
275
- debug("Return lex_string: #{line}")
276
- return [str, line]
277
- else
278
- # noop
279
- end
280
- end
281
-
282
- # Reach to end of input but quote does not match
283
- l = line - lines.first[1]
284
- raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
285
- end
142
+ private
286
143
 
287
- # /* */ style comment
288
- def lex_comment(ss, line, lines, str)
289
- while !ss.eos? do
144
+ def lex_comment
145
+ while !@scanner.eos? do
290
146
  case
291
- when ss.scan(/\n/)
292
- line += 1
293
- when ss.scan(/\*\//)
294
- return line
147
+ when @scanner.scan(/\n/)
148
+ @line += 1
149
+ @head = @scanner.pos + 1
150
+ when @scanner.scan(/\*\//)
151
+ return
295
152
  else
296
- str << ss.getch
297
- next
153
+ @scanner.getch
298
154
  end
299
-
300
- str << ss[0]
301
155
  end
302
-
303
- # Reach to end of input but quote does not match
304
- l = line - lines.first[1]
305
- raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
306
156
  end
307
157
 
308
- # // style comment
309
- def lex_line_comment(ss, line, str)
310
- while !ss.eos? do
311
- case
312
- when ss.scan(/\n/)
313
- return line + 1
314
- else
315
- str << ss.getch
316
- next
317
- end
158
+ def build_token(type:, s_value:, **options)
159
+ token = Token.new(type: type, s_value: s_value)
160
+ token.line = @head_line
161
+ token.column = @head_column
162
+ options.each do |attr, value|
163
+ token.public_send("#{attr}=", value)
318
164
  end
319
165
 
320
- line # Reach to end of input
321
- end
322
-
323
- def lex_grammar_rules_tokens
324
- lex_common(@grammar_rules, @grammar_rules_tokens)
166
+ token
325
167
  end
326
168
 
327
- def debug(msg)
328
- return unless @debug
329
- puts "#{msg}\n"
169
+ def newline
170
+ @line += 1
171
+ @head = @scanner.pos + 1
330
172
  end
331
173
  end
332
174
  end
@@ -0,0 +1,128 @@
1
+ require 'optparse'
2
+
3
+ module Lrama
4
+ # Handle option parsing for the command line interface.
5
+ class OptionParser
6
+ def initialize
7
+ @options = Options.new
8
+ @trace = []
9
+ @report = []
10
+ end
11
+
12
+ def parse(argv)
13
+ parse_by_option_parser(argv)
14
+
15
+ @options.trace_opts = validate_trace(@trace)
16
+ @options.report_opts = validate_report(@report)
17
+ @options.grammar_file = argv.shift
18
+
19
+ if !@options.grammar_file
20
+ abort "File should be specified\n"
21
+ end
22
+
23
+ if @options.grammar_file == '-'
24
+ @options.grammar_file = argv.shift or abort "File name for STDIN should be specified\n"
25
+ else
26
+ @options.y = File.open(@options.grammar_file, 'r')
27
+ end
28
+
29
+ if !@report.empty? && @options.report_file.nil? && @options.grammar_file
30
+ @options.report_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".output"
31
+ end
32
+
33
+ if !@options.header_file && @options.header
34
+ case
35
+ when @options.outfile
36
+ @options.header_file = File.dirname(@options.outfile) + "/" + File.basename(@options.outfile, ".*") + ".h"
37
+ when @options.grammar_file
38
+ @options.header_file = File.dirname(@options.grammar_file) + "/" + File.basename(@options.grammar_file, ".*") + ".h"
39
+ end
40
+ end
41
+
42
+ @options
43
+ end
44
+
45
+ private
46
+
47
+ def parse_by_option_parser(argv)
48
+ ::OptionParser.new do |o|
49
+ o.banner = <<~BANNER
50
+ Lrama is LALR (1) parser generator written by Ruby.
51
+
52
+ Usage: lrama [options] FILE
53
+ BANNER
54
+ o.separator ''
55
+ o.separator 'STDIN mode:'
56
+ o.separator 'lrama [options] - FILE read grammar from STDIN'
57
+ o.separator ''
58
+ o.separator 'Tuning the Parser:'
59
+ o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
60
+ o.on('-t', 'reserved, do nothing') { }
61
+ o.separator ''
62
+ o.separator 'Output:'
63
+ o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
64
+ o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
65
+ o.on('-d', 'also produce a header file') { @options.header = true }
66
+ o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
67
+ o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }
68
+ o.on('-o', '--output=FILE', 'leave output to FILE') {|v| @options.outfile = v }
69
+ o.on('--trace=THINGS', Array, 'also output trace logs at runtime') {|v| @trace = v }
70
+ o.on('-v', 'reserved, do nothing') { }
71
+ o.separator ''
72
+ o.separator 'Error Recovery:'
73
+ o.on('-e', 'enable error recovery') {|v| @options.error_recovery = true }
74
+ o.separator ''
75
+ o.separator 'Other options:'
76
+ o.on('-V', '--version', "output version information and exit") {|v| puts "lrama #{Lrama::VERSION}"; exit 0 }
77
+ o.on('--help', "display this help and exit") {|v| puts o; exit 0 }
78
+ o.separator ''
79
+ o.parse!(argv)
80
+ end
81
+ end
82
+
83
+ def validate_report(report)
84
+ bison_list = %w[states itemsets lookaheads solved counterexamples cex all none]
85
+ others = %w[verbose]
86
+ list = bison_list + others
87
+ not_supported = %w[cex none]
88
+ h = { grammar: true }
89
+
90
+ report.each do |r|
91
+ if list.include?(r) && !not_supported.include?(r)
92
+ h[r.to_sym] = true
93
+ else
94
+ raise "Invalid report option \"#{r}\"."
95
+ end
96
+ end
97
+
98
+ if h[:all]
99
+ (bison_list - not_supported).each do |r|
100
+ h[r.to_sym] = true
101
+ end
102
+
103
+ h.delete(:all)
104
+ end
105
+
106
+ return h
107
+ end
108
+
109
+ def validate_trace(trace)
110
+ list = %w[
111
+ none locations scan parse automaton bitsets
112
+ closure grammar resource sets muscles tools
113
+ m4-early m4 skeleton time ielr cex all
114
+ ]
115
+ h = {}
116
+
117
+ trace.each do |t|
118
+ if list.include?(t)
119
+ h[t.to_sym] = true
120
+ else
121
+ raise "Invalid trace option \"#{t}\"."
122
+ end
123
+ end
124
+
125
+ return h
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,23 @@
1
+ module Lrama
2
+ # Command line options.
3
+ class Options
4
+ attr_accessor :skeleton, :header, :header_file,
5
+ :report_file, :outfile,
6
+ :error_recovery, :grammar_file,
7
+ :report_file, :trace_opts, :report_opts, :y
8
+
9
+ def initialize
10
+ @skeleton = "bison/yacc.c"
11
+ @header = false
12
+ @header_file = nil
13
+ @report_file = nil
14
+ @outfile = "y.tab.c"
15
+ @error_recovery = false
16
+ @grammar_file = nil
17
+ @report_file = nil
18
+ @trace_opts = nil
19
+ @report_opts = nil
20
+ @y = STDIN
21
+ end
22
+ end
23
+ end