lrama 0.5.11 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +2 -2
  3. data/Gemfile +1 -1
  4. data/LEGAL.md +1 -0
  5. data/NEWS.md +187 -0
  6. data/README.md +15 -4
  7. data/Steepfile +3 -0
  8. data/lib/lrama/grammar/code/printer_code.rb +1 -1
  9. data/lib/lrama/grammar/code/rule_action.rb +19 -3
  10. data/lib/lrama/grammar/code.rb +19 -7
  11. data/lib/lrama/grammar/parameterizing_rule.rb +6 -0
  12. data/lib/lrama/grammar/parameterizing_rule_builder.rb +34 -0
  13. data/lib/lrama/grammar/parameterizing_rule_resolver.rb +30 -0
  14. data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +53 -0
  15. data/lib/lrama/grammar/rule_builder.rb +26 -22
  16. data/lib/lrama/grammar.rb +15 -41
  17. data/lib/lrama/lexer/grammar_file.rb +21 -0
  18. data/lib/lrama/lexer/location.rb +77 -2
  19. data/lib/lrama/lexer/token/instantiate_rule.rb +18 -0
  20. data/lib/lrama/lexer/token/user_code.rb +10 -10
  21. data/lib/lrama/lexer/token.rb +1 -1
  22. data/lib/lrama/lexer.rb +21 -11
  23. data/lib/lrama/parser.rb +619 -454
  24. data/lib/lrama/states_reporter.rb +1 -1
  25. data/lib/lrama/version.rb +1 -1
  26. data/parser.y +95 -30
  27. data/sig/lrama/grammar/code/printer_code.rbs +1 -1
  28. data/sig/lrama/grammar/code.rbs +5 -5
  29. data/sig/lrama/grammar/parameterizing_rule.rbs +10 -0
  30. data/sig/lrama/grammar/parameterizing_rule_builder.rbs +19 -0
  31. data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +16 -0
  32. data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +18 -0
  33. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +5 -3
  34. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +2 -0
  35. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +2 -0
  36. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +4 -3
  37. data/sig/lrama/grammar/rule_builder.rbs +2 -4
  38. data/sig/lrama/lexer/grammar_file.rbs +15 -0
  39. data/sig/lrama/lexer/location.rbs +13 -1
  40. data/sig/lrama/lexer/token/instantiate_rule.rbs +12 -0
  41. metadata +16 -6
  42. data/doc/TODO.md +0 -59
  43. data/lib/lrama/lexer/token/parameterizing.rb +0 -34
  44. data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
data/lib/lrama/grammar.rb CHANGED
@@ -8,6 +8,10 @@ require "lrama/grammar/printer"
8
8
  require "lrama/grammar/reference"
9
9
  require "lrama/grammar/rule"
10
10
  require "lrama/grammar/rule_builder"
11
+ require "lrama/grammar/parameterizing_rule_builder"
12
+ require "lrama/grammar/parameterizing_rule_resolver"
13
+ require "lrama/grammar/parameterizing_rule_rhs_builder"
14
+ require "lrama/grammar/parameterizing_rule"
11
15
  require "lrama/grammar/symbol"
12
16
  require "lrama/grammar/type"
13
17
  require "lrama/grammar/union"
@@ -36,6 +40,7 @@ module Lrama
36
40
  @rule_builders = []
37
41
  @rules = []
38
42
  @sym_to_rules = {}
43
+ @parameterizing_resolver = ParameterizingRuleResolver.new
39
44
  @empty_symbol = nil
40
45
  @eof_symbol = nil
41
46
  @error_symbol = nil
@@ -69,7 +74,7 @@ module Lrama
69
74
  return sym
70
75
  end
71
76
 
72
- if sym = @symbols.find {|s| s.id == id }
77
+ if (sym = @symbols.find {|s| s.id == id })
73
78
  return sym
74
79
  end
75
80
 
@@ -129,6 +134,10 @@ module Lrama
129
134
  @rule_builders << builder
130
135
  end
131
136
 
137
+ def add_parameterizing_rule_builder(builder)
138
+ @parameterizing_resolver.add_parameterizing_rule_builder(builder)
139
+ end
140
+
132
141
  def prologue_first_lineno=(prologue_first_lineno)
133
142
  @aux.prologue_first_lineno = prologue_first_lineno
134
143
  end
@@ -310,7 +319,7 @@ module Lrama
310
319
 
311
320
  def setup_rules
312
321
  @rule_builders.each do |builder|
313
- builder.setup_rules
322
+ builder.setup_rules(@parameterizing_resolver)
314
323
  end
315
324
  end
316
325
 
@@ -350,56 +359,21 @@ module Lrama
350
359
  @accept_symbol = term
351
360
  end
352
361
 
353
- # 1. Add $accept rule to the top of rules
354
- # 2. Extract action in the middle of RHS into new Empty rule
355
- # 3. Append id and extract action then create Rule
356
- #
357
- # Bison 3.8.2 uses different orders for symbol number and rule number
358
- # when a rule has actions in the middle of a rule.
359
- #
360
- # For example,
361
- #
362
- # `program: $@1 top_compstmt`
363
- #
364
- # Rules are ordered like below,
365
- #
366
- # 1 $@1: ε
367
- # 2 program: $@1 top_compstmt
368
- #
369
- # Symbols are ordered like below,
370
- #
371
- # 164 program
372
- # 165 $@1
373
- #
374
362
  def normalize_rules
375
- # 1. Add $accept rule to the top of rules
376
- accept = @accept_symbol
377
- eof = @eof_symbol
363
+ # Add $accept rule to the top of rules
378
364
  lineno = @rule_builders.first ? @rule_builders.first.line : 0
379
- @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
365
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
380
366
 
381
367
  setup_rules
382
368
 
383
369
  @rule_builders.each do |builder|
384
- # Extract actions in the middle of RHS into new rules.
385
- builder.midrule_action_rules.each do |rule|
386
- @rules << rule
387
- end
388
-
389
370
  builder.rules.each do |rule|
390
- add_nterm(id: rule._lhs)
391
- @rules << rule
392
- end
393
-
394
- builder.parameterizing_rules.each do |rule|
395
371
  add_nterm(id: rule._lhs, tag: rule.lhs_tag)
396
372
  @rules << rule
397
373
  end
398
-
399
- builder.midrule_action_rules.each do |rule|
400
- add_nterm(id: rule._lhs)
401
- end
402
374
  end
375
+
376
+ @rules.sort_by!(&:id)
403
377
  end
404
378
 
405
379
  # Collect symbols from rules
@@ -0,0 +1,21 @@
1
+ module Lrama
2
+ class Lexer
3
+ class GrammarFile
4
+ attr_reader :path, :text
5
+
6
+ def initialize(path, text)
7
+ @path = path
8
+ @text = text
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.path == other.path
14
+ end
15
+
16
+ def lines
17
+ @lines ||= text.split("\n")
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,9 +1,10 @@
1
1
  module Lrama
2
2
  class Lexer
3
3
  class Location
4
- attr_reader :first_line, :first_column, :last_line, :last_column
4
+ attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
5
5
 
6
- def initialize(first_line:, first_column:, last_line:, last_column:)
6
+ def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
7
+ @grammar_file = grammar_file
7
8
  @first_line = first_line
8
9
  @first_column = first_column
9
10
  @last_line = last_line
@@ -12,11 +13,85 @@ module Lrama
12
13
 
13
14
  def ==(other)
14
15
  self.class == other.class &&
16
+ self.grammar_file == other.grammar_file &&
15
17
  self.first_line == other.first_line &&
16
18
  self.first_column == other.first_column &&
17
19
  self.last_line == other.last_line &&
18
20
  self.last_column == other.last_column
19
21
  end
22
+
23
+ def partial_location(left, right)
24
+ offset = -first_column
25
+ new_first_line = -1
26
+ new_first_column = -1
27
+ new_last_line = -1
28
+ new_last_column = -1
29
+
30
+ _text.each.with_index do |line, index|
31
+ new_offset = offset + line.length + 1
32
+
33
+ if offset <= left && left <= new_offset
34
+ new_first_line = first_line + index
35
+ new_first_column = left - offset
36
+ end
37
+
38
+ if offset <= right && right <= new_offset
39
+ new_last_line = first_line + index
40
+ new_last_column = right - offset
41
+ end
42
+
43
+ offset = new_offset
44
+ end
45
+
46
+ Location.new(
47
+ grammar_file: grammar_file,
48
+ first_line: new_first_line, first_column: new_first_column,
49
+ last_line: new_last_line, last_column: new_last_column
50
+ )
51
+ end
52
+
53
+ def to_s
54
+ "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
55
+ end
56
+
57
+ def generate_error_message(error_message)
58
+ <<~ERROR.chomp
59
+ #{path}:#{first_line}:#{first_column}: #{error_message}
60
+ #{line_with_carets}
61
+ ERROR
62
+ end
63
+
64
+ def line_with_carets
65
+ <<~TEXT
66
+ #{text}
67
+ #{carets}
68
+ TEXT
69
+ end
70
+
71
+ private
72
+
73
+ def path
74
+ grammar_file.path
75
+ end
76
+
77
+ def blanks
78
+ (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
79
+ end
80
+
81
+ def carets
82
+ blanks + '^' * (last_column - first_column)
83
+ end
84
+
85
+ def text
86
+ @text ||= _text.join("\n")
87
+ end
88
+
89
+ def _text
90
+ @_text ||=begin
91
+ range = (first_line - 1)...last_line
92
+ grammar_file.lines[range] or raise "#{range} is invalid"
93
+ end
94
+ end
20
95
  end
21
96
  end
22
97
  end
@@ -0,0 +1,18 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class InstantiateRule < Token
5
+ attr_accessor :args
6
+
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [])
8
+ super s_value: s_value, alias_name: alias_name, location: location
9
+ @args = args
10
+ end
11
+
12
+ def rule_name
13
+ s_value
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -35,27 +35,27 @@ module Lrama
35
35
  # It need to wrap an identifier with brackets to use ".-" for identifiers
36
36
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
37
37
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
38
- return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
38
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
39
39
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
40
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
- return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
41
+ return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
42
42
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
43
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
45
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
44
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
45
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
46
46
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
47
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
47
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
48
48
 
49
49
  # @ references
50
50
  # It need to wrap an identifier with brackets to use ".-" for identifiers
51
51
  when scanner.scan(/@\$/) # @$
52
- return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
52
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
53
53
  when scanner.scan(/@(\d+)/) # @1
54
- return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
54
+ return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
55
55
  when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
57
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
58
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
56
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
57
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
58
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
59
59
  end
60
60
  end
61
61
  end
@@ -1,6 +1,6 @@
1
1
  require 'lrama/lexer/token/char'
2
2
  require 'lrama/lexer/token/ident'
3
- require 'lrama/lexer/token/parameterizing'
3
+ require 'lrama/lexer/token/instantiate_rule'
4
4
  require 'lrama/lexer/token/tag'
5
5
  require 'lrama/lexer/token/user_code'
6
6
 
data/lib/lrama/lexer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+ require "lrama/lexer/grammar_file"
2
3
  require "lrama/lexer/location"
3
4
  require "lrama/lexer/token"
4
5
 
@@ -28,10 +29,12 @@ module Lrama
28
29
  %error-token
29
30
  %empty
30
31
  %code
32
+ %rule
31
33
  )
32
34
 
33
- def initialize(text)
34
- @scanner = StringScanner.new(text)
35
+ def initialize(grammar_file)
36
+ @grammar_file = grammar_file
37
+ @scanner = StringScanner.new(grammar_file.text)
35
38
  @head_column = @head = @scanner.pos
36
39
  @head_line = @line = 1
37
40
  @status = :initial
@@ -57,8 +60,9 @@ module Lrama
57
60
 
58
61
  def location
59
62
  Location.new(
63
+ grammar_file: @grammar_file,
60
64
  first_line: @head_line, first_column: @head_column,
61
- last_line: @line, last_column: column
65
+ last_line: line, last_column: column
62
66
  )
63
67
  end
64
68
 
@@ -78,8 +82,7 @@ module Lrama
78
82
  end
79
83
  end
80
84
 
81
- @head_line = line
82
- @head_column = column
85
+ reset_first_position
83
86
 
84
87
  case
85
88
  when @scanner.eos?
@@ -117,6 +120,8 @@ module Lrama
117
120
  def lex_c_code
118
121
  nested = 0
119
122
  code = ''
123
+ reset_first_position
124
+
120
125
  while !@scanner.eos? do
121
126
  case
122
127
  when @scanner.scan(/{/)
@@ -140,12 +145,12 @@ module Lrama
140
145
  @line += @scanner.matched.count("\n")
141
146
  when @scanner.scan(/'.*?'/)
142
147
  code += %Q(#{@scanner.matched})
148
+ when @scanner.scan(/[^\"'\{\}\n]+/)
149
+ code += @scanner.matched
150
+ when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
151
+ code += @scanner.matched
143
152
  else
144
- if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
145
- code += @scanner.matched
146
- else
147
- code += @scanner.getch
148
- end
153
+ code += @scanner.getch
149
154
  end
150
155
  end
151
156
  raise ParseError, "Unexpected code: #{code}."
@@ -166,9 +171,14 @@ module Lrama
166
171
  end
167
172
  end
168
173
 
174
+ def reset_first_position
175
+ @head_line = line
176
+ @head_column = column
177
+ end
178
+
169
179
  def newline
170
180
  @line += 1
171
- @head = @scanner.pos + 1
181
+ @head = @scanner.pos
172
182
  end
173
183
  end
174
184
  end