lrama 0.5.12 → 0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +20 -5
  3. data/Gemfile +1 -1
  4. data/NEWS.md +66 -0
  5. data/README.md +14 -3
  6. data/Steepfile +2 -0
  7. data/lib/lrama/command.rb +17 -3
  8. data/lib/lrama/context.rb +2 -22
  9. data/lib/lrama/grammar/binding.rb +24 -0
  10. data/lib/lrama/grammar/code/printer_code.rb +1 -1
  11. data/lib/lrama/grammar/code/rule_action.rb +2 -2
  12. data/lib/lrama/grammar/code.rb +19 -7
  13. data/lib/lrama/grammar/parameterizing_rule/resolver.rb +39 -0
  14. data/lib/lrama/grammar/parameterizing_rule/rhs.rb +15 -0
  15. data/lib/lrama/grammar/parameterizing_rule/rule.rb +16 -0
  16. data/lib/lrama/grammar/parameterizing_rule.rb +3 -0
  17. data/lib/lrama/grammar/percent_code.rb +3 -3
  18. data/lib/lrama/grammar/rule_builder.rb +69 -31
  19. data/lib/lrama/grammar/type.rb +13 -1
  20. data/lib/lrama/grammar.rb +15 -43
  21. data/lib/lrama/lexer/grammar_file.rb +21 -0
  22. data/lib/lrama/lexer/location.rb +77 -2
  23. data/lib/lrama/lexer/token/instantiate_rule.rb +23 -0
  24. data/lib/lrama/lexer/token/user_code.rb +10 -10
  25. data/lib/lrama/lexer/token.rb +6 -1
  26. data/lib/lrama/lexer.rb +23 -18
  27. data/lib/lrama/output.rb +2 -2
  28. data/lib/lrama/parser.rb +641 -458
  29. data/lib/lrama/states_reporter.rb +1 -1
  30. data/lib/lrama/version.rb +1 -1
  31. data/parser.y +97 -32
  32. data/sig/lrama/grammar/binding.rbs +16 -0
  33. data/sig/lrama/grammar/code/printer_code.rbs +1 -1
  34. data/sig/lrama/grammar/code.rbs +5 -5
  35. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +21 -0
  36. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +13 -0
  37. data/sig/lrama/grammar/parameterizing_rule/rule.rbs +14 -0
  38. data/sig/lrama/grammar/parameterizing_rule.rbs +6 -0
  39. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +2 -2
  40. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +1 -1
  41. data/sig/lrama/grammar/percent_code.rbs +3 -3
  42. data/sig/lrama/grammar/rule_builder.rbs +9 -8
  43. data/sig/lrama/lexer/grammar_file.rbs +15 -0
  44. data/sig/lrama/lexer/location.rbs +13 -1
  45. data/sig/lrama/lexer/token/instantiate_rule.rbs +14 -0
  46. data/sig/lrama/lexer/token.rbs +1 -0
  47. metadata +17 -5
  48. data/lib/lrama/lexer/token/parameterizing.rb +0 -34
  49. data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
@@ -3,21 +3,22 @@ require 'lrama/grammar/parameterizing_rules/builder'
3
3
  module Lrama
4
4
  class Grammar
5
5
  class RuleBuilder
6
- attr_accessor :lhs, :lhs_tag, :line
7
- attr_reader :rhs, :user_code, :precedence_sym
6
+ attr_accessor :lhs, :line
7
+ attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
8
8
 
9
- def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
9
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
10
10
  @rule_counter = rule_counter
11
11
  @midrule_action_counter = midrule_action_counter
12
12
  @position_in_original_rule_rhs = position_in_original_rule_rhs
13
13
  @skip_preprocess_references = skip_preprocess_references
14
14
 
15
15
  @lhs = nil
16
+ @lhs_tag = lhs_tag
16
17
  @rhs = []
17
- @lhs_tag = nil
18
18
  @user_code = nil
19
19
  @precedence_sym = nil
20
20
  @line = nil
21
+ @rule_builders_for_parameterizing_rules = []
21
22
  @rule_builders_for_derived_rules = []
22
23
  end
23
24
 
@@ -33,7 +34,7 @@ module Lrama
33
34
 
34
35
  def user_code=(user_code)
35
36
  if !@line
36
- @line = user_code.line
37
+ @line = user_code&.line
37
38
  end
38
39
 
39
40
  flush_user_code
@@ -51,22 +52,14 @@ module Lrama
51
52
  freeze_rhs
52
53
  end
53
54
 
54
- def setup_rules
55
+ def setup_rules(parameterizing_rule_resolver)
55
56
  preprocess_references unless @skip_preprocess_references
56
- process_rhs
57
+ process_rhs(parameterizing_rule_resolver)
57
58
  build_rules
58
59
  end
59
60
 
60
- def parameterizing_rules
61
- @parameterizing_rules
62
- end
63
-
64
- def midrule_action_rules
65
- @midrule_action_rules
66
- end
67
-
68
61
  def rules
69
- @rules
62
+ @parameterizing_rules + @old_parameterizing_rules + @midrule_action_rules + @rules
70
63
  end
71
64
 
72
65
  private
@@ -83,10 +76,13 @@ module Lrama
83
76
  tokens = @replaced_rhs
84
77
 
85
78
  rule = Rule.new(
86
- id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
79
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
87
80
  position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
88
81
  )
89
82
  @rules = [rule]
83
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
84
+ rule_builder.rules
85
+ end.flatten
90
86
  @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
91
87
  rule_builder.rules
92
88
  end.flatten
@@ -95,13 +91,13 @@ module Lrama
95
91
  end
96
92
  end
97
93
 
98
- # rhs is a mixture of variety type of tokens like `Ident`, `Parameterizing`, `UserCode` and so on.
94
+ # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
99
95
  # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
100
- def process_rhs
96
+ def process_rhs(parameterizing_rule_resolver)
101
97
  return if @replaced_rhs
102
98
 
103
99
  @replaced_rhs = []
104
- @parameterizing_rules = []
100
+ @old_parameterizing_rules = []
105
101
 
106
102
  rhs.each_with_index do |token, i|
107
103
  case token
@@ -109,22 +105,47 @@ module Lrama
109
105
  @replaced_rhs << token
110
106
  when Lrama::Lexer::Token::Ident
111
107
  @replaced_rhs << token
112
- when Lrama::Lexer::Token::Parameterizing
113
- parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
114
- parameterizing.build.each do |r|
115
- @parameterizing_rules << r
108
+ when Lrama::Lexer::Token::InstantiateRule
109
+ if parameterizing_rule_resolver.defined?(token)
110
+ parameterizing_rule = parameterizing_rule_resolver.find(token)
111
+ raise "Unexpected token. #{token}" unless parameterizing_rule
112
+
113
+ bindings = Binding.new(parameterizing_rule, token.args)
114
+ lhs_s_value = lhs_s_value(token, bindings)
115
+ if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
116
+ @replaced_rhs << created_lhs
117
+ else
118
+ lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
119
+ @replaced_rhs << lhs_token
120
+ parameterizing_rule_resolver.created_lhs_list << lhs_token
121
+ parameterizing_rule.rhs_list.each do |r|
122
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: token.lhs_tag, skip_preprocess_references: true)
123
+ rule_builder.lhs = lhs_token
124
+ r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
125
+ rule_builder.line = line
126
+ rule_builder.user_code = r.user_code
127
+ rule_builder.precedence_sym = r.precedence_sym
128
+ rule_builder.complete_input
129
+ rule_builder.setup_rules(parameterizing_rule_resolver)
130
+ @rule_builders_for_parameterizing_rules << rule_builder
131
+ end
132
+ end
133
+ else
134
+ # TODO: Delete when the standard library will defined as a grammar file.
135
+ parameterizing_rule = ParameterizingRules::Builder.new(token, @rule_counter, token.lhs_tag, user_code, precedence_sym, line)
136
+ @old_parameterizing_rules = @old_parameterizing_rules + parameterizing_rule.build
137
+ @replaced_rhs << parameterizing_rule.build_token
116
138
  end
117
- @replaced_rhs << parameterizing.build_token
118
139
  when Lrama::Lexer::Token::UserCode
119
140
  prefix = token.referred ? "@" : "$@"
120
141
  new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
121
142
  @replaced_rhs << new_token
122
143
 
123
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
144
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: lhs_tag, skip_preprocess_references: true)
124
145
  rule_builder.lhs = new_token
125
146
  rule_builder.user_code = token
126
147
  rule_builder.complete_input
127
- rule_builder.setup_rules
148
+ rule_builder.setup_rules(parameterizing_rule_resolver)
128
149
 
129
150
  @rule_builders_for_derived_rules << rule_builder
130
151
  else
@@ -133,6 +154,18 @@ module Lrama
133
154
  end
134
155
  end
135
156
 
157
+ def lhs_s_value(token, bindings)
158
+ s_values = token.args.map do |arg|
159
+ resolved = bindings.resolve_symbol(arg)
160
+ if resolved.is_a?(Lexer::Token::InstantiateRule)
161
+ [resolved.s_value, resolved.args.map(&:s_value)]
162
+ else
163
+ resolved.s_value
164
+ end
165
+ end
166
+ "#{token.rule_name}_#{s_values.join('_')}"
167
+ end
168
+
136
169
  def numberize_references
137
170
  # Bison n'th component is 1-origin
138
171
  (rhs + [user_code]).compact.each.with_index(1) do |token, i|
@@ -146,8 +179,13 @@ module Lrama
146
179
  else
147
180
  candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
148
181
 
149
- raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2
150
- raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first
182
+ if candidates.size >= 2
183
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
184
+ end
185
+
186
+ unless (referring_symbol = candidates.first)
187
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
188
+ end
151
189
 
152
190
  ref.index = referring_symbol[1] + 1
153
191
  end
@@ -159,7 +197,7 @@ module Lrama
159
197
  if ref.index
160
198
  # TODO: Prohibit $0 even so Bison allows it?
161
199
  # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
162
- raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
200
+ token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
163
201
  rhs[ref.index - 1].referred = true
164
202
  end
165
203
  end
@@ -167,7 +205,7 @@ module Lrama
167
205
  end
168
206
 
169
207
  def flush_user_code
170
- if c = @user_code
208
+ if (c = @user_code)
171
209
  @rhs << c
172
210
  @user_code = nil
173
211
  end
@@ -1,6 +1,18 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Type < Struct.new(:id, :tag, keyword_init: true)
3
+ class Type
4
+ attr_reader :id, :tag
5
+
6
+ def initialize(id:, tag:)
7
+ @id = id
8
+ @tag = tag
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.id == other.id &&
14
+ self.tag == other.tag
15
+ end
4
16
  end
5
17
  end
6
18
  end
data/lib/lrama/grammar.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "lrama/grammar/auxiliary"
2
+ require "lrama/grammar/binding"
2
3
  require "lrama/grammar/code"
3
4
  require "lrama/grammar/counter"
4
5
  require "lrama/grammar/error_token"
@@ -8,6 +9,7 @@ require "lrama/grammar/printer"
8
9
  require "lrama/grammar/reference"
9
10
  require "lrama/grammar/rule"
10
11
  require "lrama/grammar/rule_builder"
12
+ require "lrama/grammar/parameterizing_rule"
11
13
  require "lrama/grammar/symbol"
12
14
  require "lrama/grammar/type"
13
15
  require "lrama/grammar/union"
@@ -36,6 +38,7 @@ module Lrama
36
38
  @rule_builders = []
37
39
  @rules = []
38
40
  @sym_to_rules = {}
41
+ @parameterizing_rule_resolver = ParameterizingRule::Resolver.new
39
42
  @empty_symbol = nil
40
43
  @eof_symbol = nil
41
44
  @error_symbol = nil
@@ -47,7 +50,7 @@ module Lrama
47
50
  end
48
51
 
49
52
  def add_percent_code(id:, code:)
50
- @percent_codes << PercentCode.new(id, code)
53
+ @percent_codes << PercentCode.new(id.s_value, code.s_value)
51
54
  end
52
55
 
53
56
  def add_printer(ident_or_tags:, token_code:, lineno:)
@@ -69,7 +72,7 @@ module Lrama
69
72
  return sym
70
73
  end
71
74
 
72
- if sym = @symbols.find {|s| s.id == id }
75
+ if (sym = @symbols.find {|s| s.id == id })
73
76
  return sym
74
77
  end
75
78
 
@@ -129,6 +132,10 @@ module Lrama
129
132
  @rule_builders << builder
130
133
  end
131
134
 
135
+ def add_parameterizing_rule(rule)
136
+ @parameterizing_rule_resolver.add_parameterizing_rule(rule)
137
+ end
138
+
132
139
  def prologue_first_lineno=(prologue_first_lineno)
133
140
  @aux.prologue_first_lineno = prologue_first_lineno
134
141
  end
@@ -162,7 +169,7 @@ module Lrama
162
169
 
163
170
  # TODO: More validation methods
164
171
  #
165
- # * Validaiton for no_declared_type_reference
172
+ # * Validation for no_declared_type_reference
166
173
  def validate!
167
174
  validate_symbol_number_uniqueness!
168
175
  validate_symbol_alias_name_uniqueness!
@@ -310,7 +317,7 @@ module Lrama
310
317
 
311
318
  def setup_rules
312
319
  @rule_builders.each do |builder|
313
- builder.setup_rules
320
+ builder.setup_rules(@parameterizing_rule_resolver)
314
321
  end
315
322
  end
316
323
 
@@ -350,56 +357,21 @@ module Lrama
350
357
  @accept_symbol = term
351
358
  end
352
359
 
353
- # 1. Add $accept rule to the top of rules
354
- # 2. Extract action in the middle of RHS into new Empty rule
355
- # 3. Append id and extract action then create Rule
356
- #
357
- # Bison 3.8.2 uses different orders for symbol number and rule number
358
- # when a rule has actions in the middle of a rule.
359
- #
360
- # For example,
361
- #
362
- # `program: $@1 top_compstmt`
363
- #
364
- # Rules are ordered like below,
365
- #
366
- # 1 $@1: ε
367
- # 2 program: $@1 top_compstmt
368
- #
369
- # Symbols are ordered like below,
370
- #
371
- # 164 program
372
- # 165 $@1
373
- #
374
360
  def normalize_rules
375
- # 1. Add $accept rule to the top of rules
376
- accept = @accept_symbol
377
- eof = @eof_symbol
361
+ # Add $accept rule to the top of rules
378
362
  lineno = @rule_builders.first ? @rule_builders.first.line : 0
379
- @rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
363
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
380
364
 
381
365
  setup_rules
382
366
 
383
367
  @rule_builders.each do |builder|
384
- # Extract actions in the middle of RHS into new rules.
385
- builder.midrule_action_rules.each do |rule|
386
- @rules << rule
387
- end
388
-
389
368
  builder.rules.each do |rule|
390
- add_nterm(id: rule._lhs)
391
- @rules << rule
392
- end
393
-
394
- builder.parameterizing_rules.each do |rule|
395
369
  add_nterm(id: rule._lhs, tag: rule.lhs_tag)
396
370
  @rules << rule
397
371
  end
398
-
399
- builder.midrule_action_rules.each do |rule|
400
- add_nterm(id: rule._lhs)
401
- end
402
372
  end
373
+
374
+ @rules.sort_by!(&:id)
403
375
  end
404
376
 
405
377
  # Collect symbols from rules
@@ -0,0 +1,21 @@
1
+ module Lrama
2
+ class Lexer
3
+ class GrammarFile
4
+ attr_reader :path, :text
5
+
6
+ def initialize(path, text)
7
+ @path = path
8
+ @text = text.freeze
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.path == other.path
14
+ end
15
+
16
+ def lines
17
+ @lines ||= text.split("\n")
18
+ end
19
+ end
20
+ end
21
+ end
@@ -1,9 +1,10 @@
1
1
  module Lrama
2
2
  class Lexer
3
3
  class Location
4
- attr_reader :first_line, :first_column, :last_line, :last_column
4
+ attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
5
5
 
6
- def initialize(first_line:, first_column:, last_line:, last_column:)
6
+ def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
7
+ @grammar_file = grammar_file
7
8
  @first_line = first_line
8
9
  @first_column = first_column
9
10
  @last_line = last_line
@@ -12,11 +13,85 @@ module Lrama
12
13
 
13
14
  def ==(other)
14
15
  self.class == other.class &&
16
+ self.grammar_file == other.grammar_file &&
15
17
  self.first_line == other.first_line &&
16
18
  self.first_column == other.first_column &&
17
19
  self.last_line == other.last_line &&
18
20
  self.last_column == other.last_column
19
21
  end
22
+
23
+ def partial_location(left, right)
24
+ offset = -first_column
25
+ new_first_line = -1
26
+ new_first_column = -1
27
+ new_last_line = -1
28
+ new_last_column = -1
29
+
30
+ _text.each.with_index do |line, index|
31
+ new_offset = offset + line.length + 1
32
+
33
+ if offset <= left && left <= new_offset
34
+ new_first_line = first_line + index
35
+ new_first_column = left - offset
36
+ end
37
+
38
+ if offset <= right && right <= new_offset
39
+ new_last_line = first_line + index
40
+ new_last_column = right - offset
41
+ end
42
+
43
+ offset = new_offset
44
+ end
45
+
46
+ Location.new(
47
+ grammar_file: grammar_file,
48
+ first_line: new_first_line, first_column: new_first_column,
49
+ last_line: new_last_line, last_column: new_last_column
50
+ )
51
+ end
52
+
53
+ def to_s
54
+ "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
55
+ end
56
+
57
+ def generate_error_message(error_message)
58
+ <<~ERROR.chomp
59
+ #{path}:#{first_line}:#{first_column}: #{error_message}
60
+ #{line_with_carets}
61
+ ERROR
62
+ end
63
+
64
+ def line_with_carets
65
+ <<~TEXT
66
+ #{text}
67
+ #{carets}
68
+ TEXT
69
+ end
70
+
71
+ private
72
+
73
+ def path
74
+ grammar_file.path
75
+ end
76
+
77
+ def blanks
78
+ (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
79
+ end
80
+
81
+ def carets
82
+ blanks + '^' * (last_column - first_column)
83
+ end
84
+
85
+ def text
86
+ @text ||= _text.join("\n")
87
+ end
88
+
89
+ def _text
90
+ @_text ||=begin
91
+ range = (first_line - 1)...last_line
92
+ grammar_file.lines[range] or raise "#{range} is invalid"
93
+ end
94
+ end
20
95
  end
21
96
  end
22
97
  end
@@ -0,0 +1,23 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class InstantiateRule < Token
5
+ attr_reader :args, :lhs_tag
6
+
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
8
+ super s_value: s_value, alias_name: alias_name, location: location
9
+ @args = args
10
+ @lhs_tag = lhs_tag
11
+ end
12
+
13
+ def rule_name
14
+ s_value
15
+ end
16
+
17
+ def args_count
18
+ args.count
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -35,27 +35,27 @@ module Lrama
35
35
  # It need to wrap an identifier with brackets to use ".-" for identifiers
36
36
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
37
37
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
38
- return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
38
+ return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
39
39
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
40
40
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
41
- return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
41
+ return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
42
42
  when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
43
43
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
44
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
45
- when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
44
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
45
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
46
46
  tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
47
- return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
47
+ return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
48
48
 
49
49
  # @ references
50
50
  # It need to wrap an identifier with brackets to use ".-" for identifiers
51
51
  when scanner.scan(/@\$/) # @$
52
- return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
52
+ return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
53
53
  when scanner.scan(/@(\d+)/) # @1
54
- return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
54
+ return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
55
55
  when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
56
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
57
- when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
58
- return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
56
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
57
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
58
+ return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
59
59
  end
60
60
  end
61
61
  end
@@ -1,6 +1,6 @@
1
1
  require 'lrama/lexer/token/char'
2
2
  require 'lrama/lexer/token/ident'
3
- require 'lrama/lexer/token/parameterizing'
3
+ require 'lrama/lexer/token/instantiate_rule'
4
4
  require 'lrama/lexer/token/tag'
5
5
  require 'lrama/lexer/token/user_code'
6
6
 
@@ -46,6 +46,11 @@ module Lrama
46
46
  def last_column
47
47
  location.last_column
48
48
  end
49
+
50
+ def invalid_ref(ref, message)
51
+ location = self.location.partial_location(ref.first_column, ref.last_column)
52
+ raise location.generate_error_message(message)
53
+ end
49
54
  end
50
55
  end
51
56
  end
data/lib/lrama/lexer.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  require "strscan"
2
+ require "lrama/lexer/grammar_file"
2
3
  require "lrama/lexer/location"
3
4
  require "lrama/lexer/token"
4
5
 
5
6
  module Lrama
6
7
  class Lexer
7
- attr_reader :head_line, :head_column
8
- attr_accessor :status
9
- attr_accessor :end_symbol
8
+ attr_reader :head_line, :head_column, :line
9
+ attr_accessor :status, :end_symbol
10
10
 
11
11
  SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
12
12
  PERCENT_TOKENS = %w(
@@ -28,10 +28,12 @@ module Lrama
28
28
  %error-token
29
29
  %empty
30
30
  %code
31
+ %rule
31
32
  )
32
33
 
33
- def initialize(text)
34
- @scanner = StringScanner.new(text)
34
+ def initialize(grammar_file)
35
+ @grammar_file = grammar_file
36
+ @scanner = StringScanner.new(grammar_file.text)
35
37
  @head_column = @head = @scanner.pos
36
38
  @head_line = @line = 1
37
39
  @status = :initial
@@ -47,18 +49,15 @@ module Lrama
47
49
  end
48
50
  end
49
51
 
50
- def line
51
- @line
52
- end
53
-
54
52
  def column
55
53
  @scanner.pos - @head
56
54
  end
57
55
 
58
56
  def location
59
57
  Location.new(
58
+ grammar_file: @grammar_file,
60
59
  first_line: @head_line, first_column: @head_column,
61
- last_line: @line, last_column: column
60
+ last_line: line, last_column: column
62
61
  )
63
62
  end
64
63
 
@@ -78,8 +77,7 @@ module Lrama
78
77
  end
79
78
  end
80
79
 
81
- @head_line = line
82
- @head_column = column
80
+ reset_first_position
83
81
 
84
82
  case
85
83
  when @scanner.eos?
@@ -117,6 +115,8 @@ module Lrama
117
115
  def lex_c_code
118
116
  nested = 0
119
117
  code = ''
118
+ reset_first_position
119
+
120
120
  while !@scanner.eos? do
121
121
  case
122
122
  when @scanner.scan(/{/)
@@ -140,12 +140,12 @@ module Lrama
140
140
  @line += @scanner.matched.count("\n")
141
141
  when @scanner.scan(/'.*?'/)
142
142
  code += %Q(#{@scanner.matched})
143
+ when @scanner.scan(/[^\"'\{\}\n]+/)
144
+ code += @scanner.matched
145
+ when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
146
+ code += @scanner.matched
143
147
  else
144
- if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
145
- code += @scanner.matched
146
- else
147
- code += @scanner.getch
148
- end
148
+ code += @scanner.getch
149
149
  end
150
150
  end
151
151
  raise ParseError, "Unexpected code: #{code}."
@@ -166,9 +166,14 @@ module Lrama
166
166
  end
167
167
  end
168
168
 
169
+ def reset_first_position
170
+ @head_line = line
171
+ @head_column = column
172
+ end
173
+
169
174
  def newline
170
175
  @line += 1
171
- @head = @scanner.pos + 1
176
+ @head = @scanner.pos
172
177
  end
173
178
  end
174
179
  end
data/lib/lrama/output.rb CHANGED
@@ -352,9 +352,9 @@ module Lrama
352
352
  # b4_percent_code_get
353
353
  def percent_code(name)
354
354
  @grammar.percent_codes.select do |percent_code|
355
- percent_code.id.s_value == name
355
+ percent_code.name == name
356
356
  end.map do |percent_code|
357
- percent_code.code.s_value
357
+ percent_code.code
358
358
  end.join
359
359
  end
360
360