lrama 0.6.2 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lrama/grammar.rb CHANGED
@@ -1,16 +1,18 @@
1
+ require "forwardable"
1
2
  require "lrama/grammar/auxiliary"
2
3
  require "lrama/grammar/binding"
3
4
  require "lrama/grammar/code"
4
5
  require "lrama/grammar/counter"
5
6
  require "lrama/grammar/error_token"
7
+ require "lrama/grammar/parameterizing_rule"
6
8
  require "lrama/grammar/percent_code"
7
9
  require "lrama/grammar/precedence"
8
10
  require "lrama/grammar/printer"
9
11
  require "lrama/grammar/reference"
10
12
  require "lrama/grammar/rule"
11
13
  require "lrama/grammar/rule_builder"
12
- require "lrama/grammar/parameterizing_rule"
13
14
  require "lrama/grammar/symbol"
15
+ require "lrama/grammar/symbols"
14
16
  require "lrama/grammar/type"
15
17
  require "lrama/grammar/union"
16
18
  require "lrama/lexer"
@@ -18,14 +20,23 @@ require "lrama/lexer"
18
20
  module Lrama
19
21
  # Grammar is the result of parsing an input grammar file
20
22
  class Grammar
23
+ extend Forwardable
24
+
21
25
  attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
22
26
  attr_accessor :union, :expect,
23
27
  :printers, :error_tokens,
24
28
  :lex_param, :parse_param, :initial_action,
25
- :symbols, :types,
29
+ :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
30
+ :symbols_resolver, :types,
26
31
  :rules, :rule_builders,
27
32
  :sym_to_rules, :no_stdlib
28
33
 
34
+ def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
35
+ :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
36
+ :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
37
+ :fill_printer, :fill_error_token, :sort_by_number!
38
+
39
+
29
40
  def initialize(rule_counter)
30
41
  @rule_counter = rule_counter
31
42
 
@@ -33,7 +44,7 @@ module Lrama
33
44
  @percent_codes = []
34
45
  @printers = []
35
46
  @error_tokens = []
36
- @symbols = []
47
+ @symbols_resolver = Grammar::Symbols::Resolver.new
37
48
  @types = []
38
49
  @rule_builders = []
39
50
  @rules = []
@@ -62,44 +73,6 @@ module Lrama
62
73
  @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
63
74
  end
64
75
 
65
- def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
66
- if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
67
- if replace
68
- sym.id = id
69
- sym.alias_name = alias_name
70
- sym.tag = tag
71
- end
72
-
73
- return sym
74
- end
75
-
76
- if (sym = @symbols.find {|s| s.id == id })
77
- return sym
78
- end
79
-
80
- sym = Symbol.new(
81
- id: id, alias_name: alias_name, number: nil, tag: tag,
82
- term: true, token_id: token_id, nullable: false
83
- )
84
- @symbols << sym
85
- @terms = nil
86
-
87
- return sym
88
- end
89
-
90
- def add_nterm(id:, alias_name: nil, tag: nil)
91
- return if @symbols.find {|s| s.id == id }
92
-
93
- sym = Symbol.new(
94
- id: id, alias_name: alias_name, number: nil, tag: tag,
95
- term: false, token_id: nil, nullable: nil,
96
- )
97
- @symbols << sym
98
- @nterms = nil
99
-
100
- return sym
101
- end
102
-
103
76
  def add_type(id:, tag:)
104
77
  @types << Type.new(id: id, tag: tag)
105
78
  end
@@ -165,13 +138,9 @@ module Lrama
165
138
  normalize_rules
166
139
  collect_symbols
167
140
  set_lhs_and_rhs
168
- fill_symbol_number
169
141
  fill_default_precedence
142
+ fill_symbols
170
143
  fill_sym_to_rules
171
- fill_nterm_type
172
- fill_symbol_printer
173
- fill_symbol_error_token
174
- @symbols.sort_by!(&:number)
175
144
  compute_nullable
176
145
  compute_first_set
177
146
  end
@@ -180,40 +149,10 @@ module Lrama
180
149
  #
181
150
  # * Validation for no_declared_type_reference
182
151
  def validate!
183
- validate_symbol_number_uniqueness!
184
- validate_symbol_alias_name_uniqueness!
152
+ @symbols_resolver.validate!
185
153
  validate_rule_lhs_is_nterm!
186
154
  end
187
155
 
188
- def find_symbol_by_s_value(s_value)
189
- @symbols.find do |sym|
190
- sym.id.s_value == s_value
191
- end
192
- end
193
-
194
- def find_symbol_by_s_value!(s_value)
195
- find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
196
- end
197
-
198
- def find_symbol_by_id(id)
199
- @symbols.find do |sym|
200
- sym.id == id || sym.alias_name == id.s_value
201
- end
202
- end
203
-
204
- def find_symbol_by_id!(id)
205
- find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
206
- end
207
-
208
- def find_symbol_by_number!(number)
209
- sym = @symbols[number]
210
-
211
- raise "Symbol not found: #{number}" unless sym
212
- raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
213
-
214
- sym
215
- end
216
-
217
156
  def find_rules_by_symbol!(sym)
218
157
  find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
219
158
  end
@@ -222,22 +161,6 @@ module Lrama
222
161
  @sym_to_rules[sym.number]
223
162
  end
224
163
 
225
- def terms_count
226
- terms.count
227
- end
228
-
229
- def terms
230
- @terms ||= @symbols.select(&:term?)
231
- end
232
-
233
- def nterms_count
234
- nterms.count
235
- end
236
-
237
- def nterms
238
- @nterms ||= @symbols.select(&:nterm?)
239
- end
240
-
241
164
  private
242
165
 
243
166
  def compute_nullable
@@ -284,7 +207,7 @@ module Lrama
284
207
  rule.nullable = false
285
208
  end
286
209
 
287
- nterms.select {|r| r.nullable.nil? }.each do |nterm|
210
+ nterms.select {|e| e.nullable.nil? }.each do |nterm|
288
211
  nterm.nullable = false
289
212
  end
290
213
  end
@@ -330,12 +253,6 @@ module Lrama
330
253
  end
331
254
  end
332
255
 
333
- def find_nterm_by_id!(id)
334
- nterms.find do |nterm|
335
- nterm.id == id
336
- end || (raise "Nterm not found: #{id}")
337
- end
338
-
339
256
  def append_special_symbols
340
257
  # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
341
258
  # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
@@ -397,79 +314,6 @@ module Lrama
397
314
  end
398
315
  end
399
316
 
400
- # Fill #number and #token_id
401
- def fill_symbol_number
402
- # Character literal in grammar file has
403
- # token id corresponding to ASCII code by default,
404
- # so start token_id from 256.
405
- token_id = 256
406
-
407
- # YYEMPTY = -2
408
- # YYEOF = 0
409
- # YYerror = 1
410
- # YYUNDEF = 2
411
- number = 3
412
-
413
- nterm_token_id = 0
414
- used_numbers = {}
415
-
416
- @symbols.map(&:number).each do |n|
417
- used_numbers[n] = true
418
- end
419
-
420
- (@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
421
- while used_numbers[number] do
422
- number += 1
423
- end
424
-
425
- if sym.number.nil?
426
- sym.number = number
427
- number += 1
428
- end
429
-
430
- # If id is Token::Char, it uses ASCII code
431
- if sym.term? && sym.token_id.nil?
432
- if sym.id.is_a?(Lrama::Lexer::Token::Char)
433
- # Ignore ' on the both sides
434
- case sym.id.s_value[1..-2]
435
- when "\\b"
436
- sym.token_id = 8
437
- when "\\f"
438
- sym.token_id = 12
439
- when "\\n"
440
- sym.token_id = 10
441
- when "\\r"
442
- sym.token_id = 13
443
- when "\\t"
444
- sym.token_id = 9
445
- when "\\v"
446
- sym.token_id = 11
447
- when "\""
448
- sym.token_id = 34
449
- when "'"
450
- sym.token_id = 39
451
- when "\\\\"
452
- sym.token_id = 92
453
- when /\A\\(\d+)\z/
454
- sym.token_id = Integer($1, 8)
455
- when /\A(.)\z/
456
- sym.token_id = $1.bytes.first
457
- else
458
- raise "Unknown Char s_value #{sym}"
459
- end
460
- else
461
- sym.token_id = token_id
462
- token_id += 1
463
- end
464
- end
465
-
466
- if sym.nterm? && sym.token_id.nil?
467
- sym.token_id = nterm_token_id
468
- nterm_token_id += 1
469
- end
470
- end
471
- end
472
-
473
317
  def set_lhs_and_rhs
474
318
  @rules.each do |rule|
475
319
  rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
@@ -480,15 +324,6 @@ module Lrama
480
324
  end
481
325
  end
482
326
 
483
- def token_to_symbol(token)
484
- case token
485
- when Lrama::Lexer::Token
486
- find_symbol_by_id!(token)
487
- else
488
- raise "Unknown class: #{token}"
489
- end
490
- end
491
-
492
327
  # Rule inherits precedence from the last term in RHS.
493
328
  #
494
329
  # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
@@ -506,6 +341,14 @@ module Lrama
506
341
  end
507
342
  end
508
343
 
344
+ def fill_symbols
345
+ fill_symbol_number
346
+ fill_nterm_type(@types)
347
+ fill_printer(@printers)
348
+ fill_error_token(@error_tokens)
349
+ sort_by_number!
350
+ end
351
+
509
352
  def fill_sym_to_rules
510
353
  @rules.each do |rule|
511
354
  key = rule.lhs.number
@@ -514,68 +357,6 @@ module Lrama
514
357
  end
515
358
  end
516
359
 
517
- # Fill nterm's tag defined by %type decl
518
- def fill_nterm_type
519
- @types.each do |type|
520
- nterm = find_nterm_by_id!(type.id)
521
- nterm.tag = type.tag
522
- end
523
- end
524
-
525
- def fill_symbol_printer
526
- @symbols.each do |sym|
527
- @printers.each do |printer|
528
- printer.ident_or_tags.each do |ident_or_tag|
529
- case ident_or_tag
530
- when Lrama::Lexer::Token::Ident
531
- sym.printer = printer if sym.id == ident_or_tag
532
- when Lrama::Lexer::Token::Tag
533
- sym.printer = printer if sym.tag == ident_or_tag
534
- else
535
- raise "Unknown token type. #{printer}"
536
- end
537
- end
538
- end
539
- end
540
- end
541
-
542
- def fill_symbol_error_token
543
- @symbols.each do |sym|
544
- @error_tokens.each do |error_token|
545
- error_token.ident_or_tags.each do |ident_or_tag|
546
- case ident_or_tag
547
- when Lrama::Lexer::Token::Ident
548
- sym.error_token = error_token if sym.id == ident_or_tag
549
- when Lrama::Lexer::Token::Tag
550
- sym.error_token = error_token if sym.tag == ident_or_tag
551
- else
552
- raise "Unknown token type. #{error_token}"
553
- end
554
- end
555
- end
556
- end
557
- end
558
-
559
- def validate_symbol_number_uniqueness!
560
- invalid = @symbols.group_by(&:number).select do |number, syms|
561
- syms.count > 1
562
- end
563
-
564
- return if invalid.empty?
565
-
566
- raise "Symbol number is duplicated. #{invalid}"
567
- end
568
-
569
- def validate_symbol_alias_name_uniqueness!
570
- invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
571
- syms.count > 1
572
- end
573
-
574
- return if invalid.empty?
575
-
576
- raise "Symbol alias name is duplicated. #{invalid}"
577
- end
578
-
579
360
  def validate_rule_lhs_is_nterm!
580
361
  errors = []
581
362
 
@@ -38,7 +38,7 @@ module Lrama
38
38
  return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
39
39
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
40
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
- return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
41
+ return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
42
42
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
43
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
44
  return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
@@ -51,11 +51,22 @@ module Lrama
51
51
  when scanner.scan(/@\$/) # @$
52
52
  return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
53
53
  when scanner.scan(/@(\d+)/) # @1
54
- return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
54
+ return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
55
55
  when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
56
  return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
57
57
  when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
58
58
  return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
59
+
60
+ # $: references
61
+ when scanner.scan(/\$:\$/) # $:$
62
+ return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
63
+ when scanner.scan(/\$:(\d+)/) # $:1
64
+ return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
65
+ when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
66
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
67
+ when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
68
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
69
+
59
70
  end
60
71
  end
61
72
  end
data/lib/lrama/lexer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+
2
3
  require "lrama/lexer/grammar_file"
3
4
  require "lrama/lexer/location"
4
5
  require "lrama/lexer/token"
@@ -26,6 +27,11 @@ module Lrama
26
27
  %precedence
27
28
  %prec
28
29
  %error-token
30
+ %before-reduce
31
+ %after-reduce
32
+ %after-shift-error-token
33
+ %after-shift
34
+ %after-pop-stack
29
35
  %empty
30
36
  %code
31
37
  %rule
data/lib/lrama/output.rb CHANGED
@@ -16,8 +16,7 @@ module Lrama
16
16
 
17
17
  def initialize(
18
18
  out:, output_file_path:, template_name:, grammar_file_path:,
19
- header_out: nil, header_file_path: nil,
20
- context:, grammar:, error_recovery: false
19
+ context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false
21
20
  )
22
21
  @out = out
23
22
  @output_file_path = output_file_path
@@ -162,6 +161,61 @@ module Lrama
162
161
  STR
163
162
  end
164
163
 
164
+ def after_shift_function(comment = "")
165
+ return "" unless @grammar.after_shift
166
+
167
+ <<-STR
168
+ #{comment}
169
+ #line #{@grammar.after_shift.line} "#{@grammar_file_path}"
170
+ {#{@grammar.after_shift.s_value}(#{parse_param_name});}
171
+ #line [@oline@] [@ofile@]
172
+ STR
173
+ end
174
+
175
+ def before_reduce_function(comment = "")
176
+ return "" unless @grammar.before_reduce
177
+
178
+ <<-STR
179
+ #{comment}
180
+ #line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
181
+ {#{@grammar.before_reduce.s_value}(yylen#{user_args});}
182
+ #line [@oline@] [@ofile@]
183
+ STR
184
+ end
185
+
186
+ def after_reduce_function(comment = "")
187
+ return "" unless @grammar.after_reduce
188
+
189
+ <<-STR
190
+ #{comment}
191
+ #line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
192
+ {#{@grammar.after_reduce.s_value}(yylen#{user_args});}
193
+ #line [@oline@] [@ofile@]
194
+ STR
195
+ end
196
+
197
+ def after_shift_error_token_function(comment = "")
198
+ return "" unless @grammar.after_shift_error_token
199
+
200
+ <<-STR
201
+ #{comment}
202
+ #line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
203
+ {#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
204
+ #line [@oline@] [@ofile@]
205
+ STR
206
+ end
207
+
208
+ def after_pop_stack_function(len, comment = "")
209
+ return "" unless @grammar.after_pop_stack
210
+
211
+ <<-STR
212
+ #{comment}
213
+ #line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
214
+ {#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
215
+ #line [@oline@] [@ofile@]
216
+ STR
217
+ end
218
+
165
219
  def symbol_actions_for_error_token
166
220
  str = ""
167
221