lrama 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lrama/grammar.rb CHANGED
@@ -1,16 +1,18 @@
1
+ require "forwardable"
1
2
  require "lrama/grammar/auxiliary"
2
3
  require "lrama/grammar/binding"
3
4
  require "lrama/grammar/code"
4
5
  require "lrama/grammar/counter"
5
6
  require "lrama/grammar/error_token"
7
+ require "lrama/grammar/parameterizing_rule"
6
8
  require "lrama/grammar/percent_code"
7
9
  require "lrama/grammar/precedence"
8
10
  require "lrama/grammar/printer"
9
11
  require "lrama/grammar/reference"
10
12
  require "lrama/grammar/rule"
11
13
  require "lrama/grammar/rule_builder"
12
- require "lrama/grammar/parameterizing_rule"
13
14
  require "lrama/grammar/symbol"
15
+ require "lrama/grammar/symbols"
14
16
  require "lrama/grammar/type"
15
17
  require "lrama/grammar/union"
16
18
  require "lrama/lexer"
@@ -18,14 +20,23 @@ require "lrama/lexer"
18
20
  module Lrama
19
21
  # Grammar is the result of parsing an input grammar file
20
22
  class Grammar
23
+ extend Forwardable
24
+
21
25
  attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
22
26
  attr_accessor :union, :expect,
23
27
  :printers, :error_tokens,
24
28
  :lex_param, :parse_param, :initial_action,
25
- :symbols, :types,
29
+ :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
30
+ :symbols_resolver, :types,
26
31
  :rules, :rule_builders,
27
32
  :sym_to_rules, :no_stdlib
28
33
 
34
+ def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
35
+ :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
36
+ :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
37
+ :fill_printer, :fill_error_token, :sort_by_number!
38
+
39
+
29
40
  def initialize(rule_counter)
30
41
  @rule_counter = rule_counter
31
42
 
@@ -33,7 +44,7 @@ module Lrama
33
44
  @percent_codes = []
34
45
  @printers = []
35
46
  @error_tokens = []
36
- @symbols = []
47
+ @symbols_resolver = Grammar::Symbols::Resolver.new
37
48
  @types = []
38
49
  @rule_builders = []
39
50
  @rules = []
@@ -62,44 +73,6 @@ module Lrama
62
73
  @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
63
74
  end
64
75
 
65
- def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
66
- if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
67
- if replace
68
- sym.id = id
69
- sym.alias_name = alias_name
70
- sym.tag = tag
71
- end
72
-
73
- return sym
74
- end
75
-
76
- if (sym = @symbols.find {|s| s.id == id })
77
- return sym
78
- end
79
-
80
- sym = Symbol.new(
81
- id: id, alias_name: alias_name, number: nil, tag: tag,
82
- term: true, token_id: token_id, nullable: false
83
- )
84
- @symbols << sym
85
- @terms = nil
86
-
87
- return sym
88
- end
89
-
90
- def add_nterm(id:, alias_name: nil, tag: nil)
91
- return if @symbols.find {|s| s.id == id }
92
-
93
- sym = Symbol.new(
94
- id: id, alias_name: alias_name, number: nil, tag: tag,
95
- term: false, token_id: nil, nullable: nil,
96
- )
97
- @symbols << sym
98
- @nterms = nil
99
-
100
- return sym
101
- end
102
-
103
76
  def add_type(id:, tag:)
104
77
  @types << Type.new(id: id, tag: tag)
105
78
  end
@@ -165,13 +138,9 @@ module Lrama
165
138
  normalize_rules
166
139
  collect_symbols
167
140
  set_lhs_and_rhs
168
- fill_symbol_number
169
141
  fill_default_precedence
142
+ fill_symbols
170
143
  fill_sym_to_rules
171
- fill_nterm_type
172
- fill_symbol_printer
173
- fill_symbol_error_token
174
- @symbols.sort_by!(&:number)
175
144
  compute_nullable
176
145
  compute_first_set
177
146
  end
@@ -180,40 +149,10 @@ module Lrama
180
149
  #
181
150
  # * Validation for no_declared_type_reference
182
151
  def validate!
183
- validate_symbol_number_uniqueness!
184
- validate_symbol_alias_name_uniqueness!
152
+ @symbols_resolver.validate!
185
153
  validate_rule_lhs_is_nterm!
186
154
  end
187
155
 
188
- def find_symbol_by_s_value(s_value)
189
- @symbols.find do |sym|
190
- sym.id.s_value == s_value
191
- end
192
- end
193
-
194
- def find_symbol_by_s_value!(s_value)
195
- find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
196
- end
197
-
198
- def find_symbol_by_id(id)
199
- @symbols.find do |sym|
200
- sym.id == id || sym.alias_name == id.s_value
201
- end
202
- end
203
-
204
- def find_symbol_by_id!(id)
205
- find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
206
- end
207
-
208
- def find_symbol_by_number!(number)
209
- sym = @symbols[number]
210
-
211
- raise "Symbol not found: #{number}" unless sym
212
- raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
213
-
214
- sym
215
- end
216
-
217
156
  def find_rules_by_symbol!(sym)
218
157
  find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
219
158
  end
@@ -222,22 +161,6 @@ module Lrama
222
161
  @sym_to_rules[sym.number]
223
162
  end
224
163
 
225
- def terms_count
226
- terms.count
227
- end
228
-
229
- def terms
230
- @terms ||= @symbols.select(&:term?)
231
- end
232
-
233
- def nterms_count
234
- nterms.count
235
- end
236
-
237
- def nterms
238
- @nterms ||= @symbols.select(&:nterm?)
239
- end
240
-
241
164
  private
242
165
 
243
166
  def compute_nullable
@@ -284,7 +207,7 @@ module Lrama
284
207
  rule.nullable = false
285
208
  end
286
209
 
287
- nterms.select {|r| r.nullable.nil? }.each do |nterm|
210
+ nterms.select {|e| e.nullable.nil? }.each do |nterm|
288
211
  nterm.nullable = false
289
212
  end
290
213
  end
@@ -330,12 +253,6 @@ module Lrama
330
253
  end
331
254
  end
332
255
 
333
- def find_nterm_by_id!(id)
334
- nterms.find do |nterm|
335
- nterm.id == id
336
- end || (raise "Nterm not found: #{id}")
337
- end
338
-
339
256
  def append_special_symbols
340
257
  # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
341
258
  # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
@@ -397,79 +314,6 @@ module Lrama
397
314
  end
398
315
  end
399
316
 
400
- # Fill #number and #token_id
401
- def fill_symbol_number
402
- # Character literal in grammar file has
403
- # token id corresponding to ASCII code by default,
404
- # so start token_id from 256.
405
- token_id = 256
406
-
407
- # YYEMPTY = -2
408
- # YYEOF = 0
409
- # YYerror = 1
410
- # YYUNDEF = 2
411
- number = 3
412
-
413
- nterm_token_id = 0
414
- used_numbers = {}
415
-
416
- @symbols.map(&:number).each do |n|
417
- used_numbers[n] = true
418
- end
419
-
420
- (@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
421
- while used_numbers[number] do
422
- number += 1
423
- end
424
-
425
- if sym.number.nil?
426
- sym.number = number
427
- number += 1
428
- end
429
-
430
- # If id is Token::Char, it uses ASCII code
431
- if sym.term? && sym.token_id.nil?
432
- if sym.id.is_a?(Lrama::Lexer::Token::Char)
433
- # Ignore ' on the both sides
434
- case sym.id.s_value[1..-2]
435
- when "\\b"
436
- sym.token_id = 8
437
- when "\\f"
438
- sym.token_id = 12
439
- when "\\n"
440
- sym.token_id = 10
441
- when "\\r"
442
- sym.token_id = 13
443
- when "\\t"
444
- sym.token_id = 9
445
- when "\\v"
446
- sym.token_id = 11
447
- when "\""
448
- sym.token_id = 34
449
- when "'"
450
- sym.token_id = 39
451
- when "\\\\"
452
- sym.token_id = 92
453
- when /\A\\(\d+)\z/
454
- sym.token_id = Integer($1, 8)
455
- when /\A(.)\z/
456
- sym.token_id = $1.bytes.first
457
- else
458
- raise "Unknown Char s_value #{sym}"
459
- end
460
- else
461
- sym.token_id = token_id
462
- token_id += 1
463
- end
464
- end
465
-
466
- if sym.nterm? && sym.token_id.nil?
467
- sym.token_id = nterm_token_id
468
- nterm_token_id += 1
469
- end
470
- end
471
- end
472
-
473
317
  def set_lhs_and_rhs
474
318
  @rules.each do |rule|
475
319
  rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
@@ -480,15 +324,6 @@ module Lrama
480
324
  end
481
325
  end
482
326
 
483
- def token_to_symbol(token)
484
- case token
485
- when Lrama::Lexer::Token
486
- find_symbol_by_id!(token)
487
- else
488
- raise "Unknown class: #{token}"
489
- end
490
- end
491
-
492
327
  # Rule inherits precedence from the last term in RHS.
493
328
  #
494
329
  # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
@@ -506,6 +341,14 @@ module Lrama
506
341
  end
507
342
  end
508
343
 
344
+ def fill_symbols
345
+ fill_symbol_number
346
+ fill_nterm_type(@types)
347
+ fill_printer(@printers)
348
+ fill_error_token(@error_tokens)
349
+ sort_by_number!
350
+ end
351
+
509
352
  def fill_sym_to_rules
510
353
  @rules.each do |rule|
511
354
  key = rule.lhs.number
@@ -514,68 +357,6 @@ module Lrama
514
357
  end
515
358
  end
516
359
 
517
- # Fill nterm's tag defined by %type decl
518
- def fill_nterm_type
519
- @types.each do |type|
520
- nterm = find_nterm_by_id!(type.id)
521
- nterm.tag = type.tag
522
- end
523
- end
524
-
525
- def fill_symbol_printer
526
- @symbols.each do |sym|
527
- @printers.each do |printer|
528
- printer.ident_or_tags.each do |ident_or_tag|
529
- case ident_or_tag
530
- when Lrama::Lexer::Token::Ident
531
- sym.printer = printer if sym.id == ident_or_tag
532
- when Lrama::Lexer::Token::Tag
533
- sym.printer = printer if sym.tag == ident_or_tag
534
- else
535
- raise "Unknown token type. #{printer}"
536
- end
537
- end
538
- end
539
- end
540
- end
541
-
542
- def fill_symbol_error_token
543
- @symbols.each do |sym|
544
- @error_tokens.each do |error_token|
545
- error_token.ident_or_tags.each do |ident_or_tag|
546
- case ident_or_tag
547
- when Lrama::Lexer::Token::Ident
548
- sym.error_token = error_token if sym.id == ident_or_tag
549
- when Lrama::Lexer::Token::Tag
550
- sym.error_token = error_token if sym.tag == ident_or_tag
551
- else
552
- raise "Unknown token type. #{error_token}"
553
- end
554
- end
555
- end
556
- end
557
- end
558
-
559
- def validate_symbol_number_uniqueness!
560
- invalid = @symbols.group_by(&:number).select do |number, syms|
561
- syms.count > 1
562
- end
563
-
564
- return if invalid.empty?
565
-
566
- raise "Symbol number is duplicated. #{invalid}"
567
- end
568
-
569
- def validate_symbol_alias_name_uniqueness!
570
- invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
571
- syms.count > 1
572
- end
573
-
574
- return if invalid.empty?
575
-
576
- raise "Symbol alias name is duplicated. #{invalid}"
577
- end
578
-
579
360
  def validate_rule_lhs_is_nterm!
580
361
  errors = []
581
362
 
@@ -38,7 +38,7 @@ module Lrama
38
38
  return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
39
39
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
40
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
- return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
41
+ return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
42
42
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
43
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
44
  return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
@@ -51,11 +51,22 @@ module Lrama
51
51
  when scanner.scan(/@\$/) # @$
52
52
  return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
53
53
  when scanner.scan(/@(\d+)/) # @1
54
- return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
54
+ return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
55
55
  when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
56
  return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
57
57
  when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
58
58
  return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
59
+
60
+ # $: references
61
+ when scanner.scan(/\$:\$/) # $:$
62
+ return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
63
+ when scanner.scan(/\$:(\d+)/) # $:1
64
+ return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
65
+ when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
66
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
67
+ when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
68
+ return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
69
+
59
70
  end
60
71
  end
61
72
  end
data/lib/lrama/lexer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+
2
3
  require "lrama/lexer/grammar_file"
3
4
  require "lrama/lexer/location"
4
5
  require "lrama/lexer/token"
@@ -26,6 +27,11 @@ module Lrama
26
27
  %precedence
27
28
  %prec
28
29
  %error-token
30
+ %before-reduce
31
+ %after-reduce
32
+ %after-shift-error-token
33
+ %after-shift
34
+ %after-pop-stack
29
35
  %empty
30
36
  %code
31
37
  %rule
data/lib/lrama/output.rb CHANGED
@@ -16,8 +16,7 @@ module Lrama
16
16
 
17
17
  def initialize(
18
18
  out:, output_file_path:, template_name:, grammar_file_path:,
19
- header_out: nil, header_file_path: nil,
20
- context:, grammar:, error_recovery: false
19
+ context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false
21
20
  )
22
21
  @out = out
23
22
  @output_file_path = output_file_path
@@ -162,6 +161,61 @@ module Lrama
162
161
  STR
163
162
  end
164
163
 
164
+ def after_shift_function(comment = "")
165
+ return "" unless @grammar.after_shift
166
+
167
+ <<-STR
168
+ #{comment}
169
+ #line #{@grammar.after_shift.line} "#{@grammar_file_path}"
170
+ {#{@grammar.after_shift.s_value}(#{parse_param_name});}
171
+ #line [@oline@] [@ofile@]
172
+ STR
173
+ end
174
+
175
+ def before_reduce_function(comment = "")
176
+ return "" unless @grammar.before_reduce
177
+
178
+ <<-STR
179
+ #{comment}
180
+ #line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
181
+ {#{@grammar.before_reduce.s_value}(yylen#{user_args});}
182
+ #line [@oline@] [@ofile@]
183
+ STR
184
+ end
185
+
186
+ def after_reduce_function(comment = "")
187
+ return "" unless @grammar.after_reduce
188
+
189
+ <<-STR
190
+ #{comment}
191
+ #line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
192
+ {#{@grammar.after_reduce.s_value}(yylen#{user_args});}
193
+ #line [@oline@] [@ofile@]
194
+ STR
195
+ end
196
+
197
+ def after_shift_error_token_function(comment = "")
198
+ return "" unless @grammar.after_shift_error_token
199
+
200
+ <<-STR
201
+ #{comment}
202
+ #line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
203
+ {#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
204
+ #line [@oline@] [@ofile@]
205
+ STR
206
+ end
207
+
208
+ def after_pop_stack_function(len, comment = "")
209
+ return "" unless @grammar.after_pop_stack
210
+
211
+ <<-STR
212
+ #{comment}
213
+ #line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
214
+ {#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
215
+ #line [@oline@] [@ofile@]
216
+ STR
217
+ end
218
+
165
219
  def symbol_actions_for_error_token
166
220
  str = ""
167
221