lrama 0.5.11 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +2 -2
  3. data/Gemfile +1 -1
  4. data/LEGAL.md +1 -0
  5. data/NEWS.md +187 -0
  6. data/README.md +15 -4
  7. data/Steepfile +3 -0
  8. data/lib/lrama/grammar/code/printer_code.rb +1 -1
  9. data/lib/lrama/grammar/code/rule_action.rb +19 -3
  10. data/lib/lrama/grammar/code.rb +19 -7
  11. data/lib/lrama/grammar/parameterizing_rule.rb +6 -0
  12. data/lib/lrama/grammar/parameterizing_rule_builder.rb +34 -0
  13. data/lib/lrama/grammar/parameterizing_rule_resolver.rb +30 -0
  14. data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +53 -0
  15. data/lib/lrama/grammar/rule_builder.rb +26 -22
  16. data/lib/lrama/grammar.rb +15 -41
  17. data/lib/lrama/lexer/grammar_file.rb +21 -0
  18. data/lib/lrama/lexer/location.rb +77 -2
  19. data/lib/lrama/lexer/token/instantiate_rule.rb +18 -0
  20. data/lib/lrama/lexer/token/user_code.rb +10 -10
  21. data/lib/lrama/lexer/token.rb +1 -1
  22. data/lib/lrama/lexer.rb +21 -11
  23. data/lib/lrama/parser.rb +619 -454
  24. data/lib/lrama/states_reporter.rb +1 -1
  25. data/lib/lrama/version.rb +1 -1
  26. data/parser.y +95 -30
  27. data/sig/lrama/grammar/code/printer_code.rbs +1 -1
  28. data/sig/lrama/grammar/code.rbs +5 -5
  29. data/sig/lrama/grammar/parameterizing_rule.rbs +10 -0
  30. data/sig/lrama/grammar/parameterizing_rule_builder.rbs +19 -0
  31. data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +16 -0
  32. data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +18 -0
  33. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +5 -3
  34. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +2 -0
  35. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +2 -0
  36. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +4 -3
  37. data/sig/lrama/grammar/rule_builder.rbs +2 -4
  38. data/sig/lrama/lexer/grammar_file.rbs +15 -0
  39. data/sig/lrama/lexer/location.rbs +13 -1
  40. data/sig/lrama/lexer/token/instantiate_rule.rbs +12 -0
  41. metadata +16 -6
  42. data/doc/TODO.md +0 -59
  43. data/lib/lrama/lexer/token/parameterizing.rb +0 -34
  44. data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
data/lib/lrama/grammar.rb CHANGED
@@ -8,6 +8,10 @@ require "lrama/grammar/printer"
8
8
  require "lrama/grammar/reference"
9
9
  require "lrama/grammar/rule"
10
10
  require "lrama/grammar/rule_builder"
11
+ require "lrama/grammar/parameterizing_rule_builder"
12
+ require "lrama/grammar/parameterizing_rule_resolver"
13
+ require "lrama/grammar/parameterizing_rule_rhs_builder"
14
+ require "lrama/grammar/parameterizing_rule"
11
15
  require "lrama/grammar/symbol"
12
16
  require "lrama/grammar/type"
13
17
  require "lrama/grammar/union"
@@ -36,6 +40,7 @@ module Lrama
36
40
  @rule_builders = []
37
41
  @rules = []
38
42
  @sym_to_rules = {}
43
+ @parameterizing_resolver = ParameterizingRuleResolver.new
39
44
  @empty_symbol = nil
40
45
  @eof_symbol = nil
41
46
  @error_symbol = nil
@@ -69,7 +74,7 @@ module Lrama
69
74
  return sym
70
75
  end
71
76
 
72
- if sym = @symbols.find {|s| s.id == id }
77
+ if (sym = @symbols.find {|s| s.id == id })
73
78
  return sym
74
79
  end
75
80
 
@@ -129,6 +134,10 @@ module Lrama
129
134
  @rule_builders << builder
130
135
  end
131
136
 
137
+ def add_parameterizing_rule_builder(builder)
138
+ @parameterizing_resolver.add_parameterizing_rule_builder(builder)
139
+ end
140
+
132
141
  def prologue_first_lineno=(prologue_first_lineno)
133
142
  @aux.prologue_first_lineno = prologue_first_lineno
134
143
  end
@@ -310,7 +319,7 @@ module Lrama
310
319
 
311
320
  def setup_rules
312
321
  @rule_builders.each do |builder|
313
- builder.setup_rules
322
+ builder.setup_rules(@parameterizing_resolver)
314
323
  end
315
324
  end
316
325
 
@@ -350,56 +359,21 @@ module Lrama
350
359
  @accept_symbol = term
351
360
  end
352
361
 
353
- # 1. Add $accept rule to the top of rules
354
- # 2. Extract action in the middle of RHS into new Empty rule
355
- # 3. Append id and extract action then create Rule
356
- #
357
- # Bison 3.8.2 uses different orders for symbol number and rule number
358
- # when a rule has actions in the middle of a rule.
359
- #
360
- # For example,
361
- #
362
- # `program: $@1 top_compstmt`
363
- #
364
- # Rules are ordered like below,
365
- #
366
- # 1 $@1: ε
367
- # 2 program: $@1 top_compstmt
368
- #
369
- # Symbols are ordered like below,
370
- #
371
- # 164 program
372
- # 165 $@1
373
- #
374
362
  def normalize_rules
375
- # 1. Add $accept rule to the top of rules
376
- accept = @accept_symbol
377
- eof = @eof_symbol
363
+ # Add $accept rule to the top of rules
378
364
  lineno = @rule_builders.first ? @rule_builders.first.line : 0
379
- @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
365
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
380
366
 
381
367
  setup_rules
382
368
 
383
369
  @rule_builders.each do |builder|
384
- # Extract actions in the middle of RHS into new rules.
385
- builder.midrule_action_rules.each do |rule|
386
- @rules << rule
387
- end
388
-
389
370
  builder.rules.each do |rule|
390
- add_nterm(id: rule._lhs)
391
- @rules << rule
392
- end
393
-
394
- builder.parameterizing_rules.each do |rule|
395
371
  add_nterm(id: rule._lhs, tag: rule.lhs_tag)
396
372
  @rules << rule
397
373
  end
398
-
399
- builder.midrule_action_rules.each do |rule|
400
- add_nterm(id: rule._lhs)
401
- end
402
374
  end
375
+
376
+ @rules.sort_by!(&:id)
403
377
  end
404
378
 
405
379
  # Collect symbols from rules
@@ -0,0 +1,21 @@
1
+ module Lrama
2
+ class Lexer
3
+ class GrammarFile
4
+ attr_reader :path, :text
5
+
6
+ def initialize(path, text)
7
+ @path = path
8
+ @text = text
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.path == other.path
14
+ end
15
+
16
+ def lines
17
+ @lines ||= text.split("\n")
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,9 +1,10 @@
1
1
  module Lrama
2
2
  class Lexer
3
3
  class Location
4
- attr_reader :first_line, :first_column, :last_line, :last_column
4
+ attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
5
5
 
6
- def initialize(first_line:, first_column:, last_line:, last_column:)
6
+ def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
7
+ @grammar_file = grammar_file
7
8
  @first_line = first_line
8
9
  @first_column = first_column
9
10
  @last_line = last_line
@@ -12,11 +13,85 @@ module Lrama
12
13
 
13
14
  def ==(other)
14
15
  self.class == other.class &&
16
+ self.grammar_file == other.grammar_file &&
15
17
  self.first_line == other.first_line &&
16
18
  self.first_column == other.first_column &&
17
19
  self.last_line == other.last_line &&
18
20
  self.last_column == other.last_column
19
21
  end
22
+
23
+ def partial_location(left, right)
24
+ offset = -first_column
25
+ new_first_line = -1
26
+ new_first_column = -1
27
+ new_last_line = -1
28
+ new_last_column = -1
29
+
30
+ _text.each.with_index do |line, index|
31
+ new_offset = offset + line.length + 1
32
+
33
+ if offset <= left && left <= new_offset
34
+ new_first_line = first_line + index
35
+ new_first_column = left - offset
36
+ end
37
+
38
+ if offset <= right && right <= new_offset
39
+ new_last_line = first_line + index
40
+ new_last_column = right - offset
41
+ end
42
+
43
+ offset = new_offset
44
+ end
45
+
46
+ Location.new(
47
+ grammar_file: grammar_file,
48
+ first_line: new_first_line, first_column: new_first_column,
49
+ last_line: new_last_line, last_column: new_last_column
50
+ )
51
+ end
52
+
53
+ def to_s
54
+ "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
55
+ end
56
+
57
+ def generate_error_message(error_message)
58
+ <<~ERROR.chomp
59
+ #{path}:#{first_line}:#{first_column}: #{error_message}
60
+ #{line_with_carets}
61
+ ERROR
62
+ end
63
+
64
+ def line_with_carets
65
+ <<~TEXT
66
+ #{text}
67
+ #{carets}
68
+ TEXT
69
+ end
70
+
71
+ private
72
+
73
+ def path
74
+ grammar_file.path
75
+ end
76
+
77
+ def blanks
78
+ (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
79
+ end
80
+
81
+ def carets
82
+ blanks + '^' * (last_column - first_column)
83
+ end
84
+
85
+ def text
86
+ @text ||= _text.join("\n")
87
+ end
88
+
89
+ def _text
90
+ @_text ||=begin
91
+ range = (first_line - 1)...last_line
92
+ grammar_file.lines[range] or raise "#{range} is invalid"
93
+ end
94
+ end
20
95
  end
21
96
  end
22
97
  end
@@ -0,0 +1,18 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class InstantiateRule < Token
5
+ attr_accessor :args
6
+
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [])
8
+ super s_value: s_value, alias_name: alias_name, location: location
9
+ @args = args
10
+ end
11
+
12
+ def rule_name
13
+ s_value
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -35,27 +35,27 @@ module Lrama
35
35
  # It need to wrap an identifier with brackets to use ".-" for identifiers
36
36
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
37
37
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
38
- return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
38
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
39
39
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
40
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
- return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
41
+ return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
42
42
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
43
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
45
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
44
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
45
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
46
46
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
47
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
47
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
48
48
 
49
49
  # @ references
50
50
  # It need to wrap an identifier with brackets to use ".-" for identifiers
51
51
  when scanner.scan(/@\$/) # @$
52
- return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
52
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
53
53
  when scanner.scan(/@(\d+)/) # @1
54
- return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
54
+ return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
55
55
  when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
57
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
58
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
56
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
57
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
58
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
59
59
  end
60
60
  end
61
61
  end
@@ -1,6 +1,6 @@
1
1
  require 'lrama/lexer/token/char'
2
2
  require 'lrama/lexer/token/ident'
3
- require 'lrama/lexer/token/parameterizing'
3
+ require 'lrama/lexer/token/instantiate_rule'
4
4
  require 'lrama/lexer/token/tag'
5
5
  require 'lrama/lexer/token/user_code'
6
6
 
data/lib/lrama/lexer.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "strscan"
2
+ require "lrama/lexer/grammar_file"
2
3
  require "lrama/lexer/location"
3
4
  require "lrama/lexer/token"
4
5
 
@@ -28,10 +29,12 @@ module Lrama
28
29
  %error-token
29
30
  %empty
30
31
  %code
32
+ %rule
31
33
  )
32
34
 
33
- def initialize(text)
34
- @scanner = StringScanner.new(text)
35
+ def initialize(grammar_file)
36
+ @grammar_file = grammar_file
37
+ @scanner = StringScanner.new(grammar_file.text)
35
38
  @head_column = @head = @scanner.pos
36
39
  @head_line = @line = 1
37
40
  @status = :initial
@@ -57,8 +60,9 @@ module Lrama
57
60
 
58
61
  def location
59
62
  Location.new(
63
+ grammar_file: @grammar_file,
60
64
  first_line: @head_line, first_column: @head_column,
61
- last_line: @line, last_column: column
65
+ last_line: line, last_column: column
62
66
  )
63
67
  end
64
68
 
@@ -78,8 +82,7 @@ module Lrama
78
82
  end
79
83
  end
80
84
 
81
- @head_line = line
82
- @head_column = column
85
+ reset_first_position
83
86
 
84
87
  case
85
88
  when @scanner.eos?
@@ -117,6 +120,8 @@ module Lrama
117
120
  def lex_c_code
118
121
  nested = 0
119
122
  code = ''
123
+ reset_first_position
124
+
120
125
  while !@scanner.eos? do
121
126
  case
122
127
  when @scanner.scan(/{/)
@@ -140,12 +145,12 @@ module Lrama
140
145
  @line += @scanner.matched.count("\n")
141
146
  when @scanner.scan(/'.*?'/)
142
147
  code += %Q(#{@scanner.matched})
148
+ when @scanner.scan(/[^\"'\{\}\n]+/)
149
+ code += @scanner.matched
150
+ when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
151
+ code += @scanner.matched
143
152
  else
144
- if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
145
- code += @scanner.matched
146
- else
147
- code += @scanner.getch
148
- end
153
+ code += @scanner.getch
149
154
  end
150
155
  end
151
156
  raise ParseError, "Unexpected code: #{code}."
@@ -166,9 +171,14 @@ module Lrama
166
171
  end
167
172
  end
168
173
 
174
+ def reset_first_position
175
+ @head_line = line
176
+ @head_column = column
177
+ end
178
+
169
179
  def newline
170
180
  @line += 1
171
- @head = @scanner.pos + 1
181
+ @head = @scanner.pos
172
182
  end
173
183
  end
174
184
  end