lrama 0.6.9 → 0.6.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (131) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +24 -1
  3. data/.gitignore +2 -0
  4. data/Gemfile +6 -3
  5. data/NEWS.md +269 -14
  6. data/README.md +41 -4
  7. data/Rakefile +2 -0
  8. data/Steepfile +9 -17
  9. data/doc/development/compressed_state_table/main.md +635 -0
  10. data/doc/development/compressed_state_table/parse.output +174 -0
  11. data/doc/development/compressed_state_table/parse.y +22 -0
  12. data/doc/development/compressed_state_table/parser.rb +282 -0
  13. data/exe/lrama +1 -0
  14. data/lib/lrama/bitmap.rb +3 -1
  15. data/lib/lrama/command.rb +8 -14
  16. data/lib/lrama/context.rb +11 -9
  17. data/lib/lrama/counterexamples/derivation.rb +8 -5
  18. data/lib/lrama/counterexamples/example.rb +9 -4
  19. data/lib/lrama/counterexamples/path.rb +6 -0
  20. data/lib/lrama/counterexamples/production_path.rb +2 -0
  21. data/lib/lrama/counterexamples/start_path.rb +2 -0
  22. data/lib/lrama/counterexamples/state_item.rb +2 -0
  23. data/lib/lrama/counterexamples/transition_path.rb +2 -0
  24. data/lib/lrama/counterexamples/triple.rb +2 -0
  25. data/lib/lrama/counterexamples.rb +36 -24
  26. data/lib/lrama/diagnostics.rb +36 -0
  27. data/lib/lrama/digraph.rb +2 -0
  28. data/lib/lrama/grammar/auxiliary.rb +2 -0
  29. data/lib/lrama/grammar/binding.rb +12 -1
  30. data/lib/lrama/grammar/code/destructor_code.rb +2 -0
  31. data/lib/lrama/grammar/code/initial_action_code.rb +2 -0
  32. data/lib/lrama/grammar/code/no_reference_code.rb +2 -0
  33. data/lib/lrama/grammar/code/printer_code.rb +2 -0
  34. data/lib/lrama/grammar/code/rule_action.rb +7 -3
  35. data/lib/lrama/grammar/code.rb +7 -5
  36. data/lib/lrama/grammar/counter.rb +2 -0
  37. data/lib/lrama/grammar/destructor.rb +2 -0
  38. data/lib/lrama/grammar/error_token.rb +2 -0
  39. data/lib/lrama/grammar/parameterizing_rule/resolver.rb +7 -1
  40. data/lib/lrama/grammar/parameterizing_rule/rhs.rb +6 -3
  41. data/lib/lrama/grammar/parameterizing_rule/rule.rb +6 -0
  42. data/lib/lrama/grammar/parameterizing_rule.rb +2 -0
  43. data/lib/lrama/grammar/percent_code.rb +2 -0
  44. data/lib/lrama/grammar/precedence.rb +2 -0
  45. data/lib/lrama/grammar/printer.rb +2 -0
  46. data/lib/lrama/grammar/reference.rb +2 -0
  47. data/lib/lrama/grammar/rule.rb +10 -3
  48. data/lib/lrama/grammar/rule_builder.rb +64 -65
  49. data/lib/lrama/grammar/symbol.rb +2 -0
  50. data/lib/lrama/grammar/symbols/resolver.rb +9 -1
  51. data/lib/lrama/grammar/symbols.rb +2 -0
  52. data/lib/lrama/grammar/type.rb +2 -0
  53. data/lib/lrama/grammar/union.rb +2 -0
  54. data/lib/lrama/grammar.rb +53 -32
  55. data/lib/lrama/grammar_validator.rb +37 -0
  56. data/lib/lrama/lexer/grammar_file.rb +2 -0
  57. data/lib/lrama/lexer/location.rb +2 -0
  58. data/lib/lrama/lexer/token/char.rb +2 -0
  59. data/lib/lrama/lexer/token/ident.rb +2 -0
  60. data/lib/lrama/lexer/token/instantiate_rule.rb +2 -0
  61. data/lib/lrama/lexer/token/tag.rb +2 -0
  62. data/lib/lrama/lexer/token/user_code.rb +4 -2
  63. data/lib/lrama/lexer/token.rb +7 -5
  64. data/lib/lrama/lexer.rb +12 -8
  65. data/lib/lrama/{warning.rb → logger.rb} +5 -13
  66. data/lib/lrama/option_parser.rb +58 -33
  67. data/lib/lrama/options.rb +5 -2
  68. data/lib/lrama/output.rb +38 -69
  69. data/lib/lrama/parser.rb +677 -773
  70. data/lib/lrama/report/duration.rb +2 -0
  71. data/lib/lrama/report/profile.rb +2 -0
  72. data/lib/lrama/report.rb +4 -2
  73. data/lib/lrama/state/reduce.rb +4 -2
  74. data/lib/lrama/state/reduce_reduce_conflict.rb +2 -0
  75. data/lib/lrama/state/resolved_conflict.rb +3 -1
  76. data/lib/lrama/state/shift.rb +2 -0
  77. data/lib/lrama/state/shift_reduce_conflict.rb +2 -0
  78. data/lib/lrama/state.rb +7 -5
  79. data/lib/lrama/states/item.rb +5 -3
  80. data/lib/lrama/states.rb +18 -46
  81. data/lib/lrama/states_reporter.rb +60 -19
  82. data/lib/lrama/trace_reporter.rb +30 -0
  83. data/lib/lrama/version.rb +3 -1
  84. data/lib/lrama.rb +22 -17
  85. data/lrama.gemspec +3 -1
  86. data/parser.y +129 -237
  87. data/rbs_collection.lock.yaml +10 -2
  88. data/sig/lrama/counterexamples/derivation.rbs +33 -0
  89. data/sig/lrama/counterexamples/example.rbs +45 -0
  90. data/sig/lrama/counterexamples/path.rbs +21 -0
  91. data/sig/lrama/counterexamples/production_path.rbs +11 -0
  92. data/sig/lrama/counterexamples/start_path.rbs +13 -0
  93. data/sig/lrama/counterexamples/state_item.rbs +10 -0
  94. data/sig/lrama/counterexamples/transition_path.rbs +11 -0
  95. data/sig/lrama/counterexamples/triple.rbs +20 -0
  96. data/sig/lrama/counterexamples.rbs +29 -0
  97. data/sig/lrama/grammar/auxiliary.rbs +10 -0
  98. data/sig/lrama/grammar/binding.rbs +4 -0
  99. data/sig/lrama/grammar/code/destructor_code.rbs +3 -4
  100. data/sig/lrama/grammar/code/initial_action_code.rbs +15 -0
  101. data/sig/lrama/grammar/code/no_reference_code.rbs +15 -0
  102. data/sig/lrama/grammar/code/printer_code.rbs +3 -4
  103. data/sig/lrama/grammar/code/rule_action.rbs +19 -0
  104. data/sig/lrama/grammar/code.rbs +3 -3
  105. data/sig/lrama/grammar/destructor.rbs +3 -1
  106. data/sig/lrama/grammar/error_token.rbs +4 -2
  107. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +2 -1
  108. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +1 -1
  109. data/sig/lrama/grammar/precedence.rbs +3 -1
  110. data/sig/lrama/grammar/printer.rbs +3 -1
  111. data/sig/lrama/grammar/rule.rbs +35 -3
  112. data/sig/lrama/grammar/rule_builder.rbs +10 -9
  113. data/sig/lrama/grammar/symbol.rbs +6 -6
  114. data/sig/lrama/grammar/symbols/resolver.rbs +24 -5
  115. data/sig/lrama/grammar/type.rbs +2 -2
  116. data/sig/lrama/grammar/union.rbs +12 -0
  117. data/sig/lrama/grammar.rbs +104 -1
  118. data/sig/lrama/options.rbs +3 -2
  119. data/sig/lrama/state/reduce.rbs +20 -0
  120. data/sig/lrama/state/reduce_reduce_conflict.rbs +13 -0
  121. data/sig/lrama/state/resolved_conflict.rbs +14 -0
  122. data/sig/lrama/state/shift.rbs +14 -0
  123. data/sig/lrama/state/shift_reduce_conflict.rbs +13 -0
  124. data/sig/lrama/state.rbs +79 -0
  125. data/sig/lrama/states/item.rbs +30 -0
  126. data/sig/lrama/states.rbs +101 -0
  127. data/template/bison/yacc.c +24 -19
  128. metadata +32 -6
  129. data/sample/calc.output +0 -263
  130. data/sample/calc.y +0 -101
  131. data/sample/parse.y +0 -59
@@ -1,12 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Grammar
3
5
  class RuleBuilder
4
6
  attr_accessor :lhs, :line
5
7
  attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
6
8
 
7
- def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
9
+ def initialize(rule_counter, midrule_action_counter, parameterizing_rule_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
8
10
  @rule_counter = rule_counter
9
11
  @midrule_action_counter = midrule_action_counter
12
+ @parameterizing_rule_resolver = parameterizing_rule_resolver
10
13
  @position_in_original_rule_rhs = position_in_original_rule_rhs
11
14
  @skip_preprocess_references = skip_preprocess_references
12
15
 
@@ -19,16 +22,12 @@ module Lrama
19
22
  @rules = []
20
23
  @rule_builders_for_parameterizing_rules = []
21
24
  @rule_builders_for_derived_rules = []
22
- @rule_builders_for_inline_rules = []
23
25
  @parameterizing_rules = []
24
- @inline_rules = []
25
26
  @midrule_action_rules = []
26
27
  end
27
28
 
28
29
  def add_rhs(rhs)
29
- if !@line
30
- @line = rhs.line
31
- end
30
+ @line ||= rhs.line
32
31
 
33
32
  flush_user_code
34
33
 
@@ -36,9 +35,7 @@ module Lrama
36
35
  end
37
36
 
38
37
  def user_code=(user_code)
39
- if !@line
40
- @line = user_code&.line
41
- end
38
+ @line ||= user_code&.line
42
39
 
43
40
  flush_user_code
44
41
 
@@ -55,18 +52,41 @@ module Lrama
55
52
  freeze_rhs
56
53
  end
57
54
 
58
- def setup_rules(parameterizing_rule_resolver)
55
+ def setup_rules
59
56
  preprocess_references unless @skip_preprocess_references
60
- if rhs.any? { |token| parameterizing_rule_resolver.find_inline(token) }
61
- resolve_inline(parameterizing_rule_resolver)
62
- else
63
- process_rhs(parameterizing_rule_resolver)
64
- end
57
+ process_rhs
65
58
  build_rules
66
59
  end
67
60
 
68
61
  def rules
69
- @parameterizing_rules + @inline_rules + @midrule_action_rules + @rules
62
+ @parameterizing_rules + @midrule_action_rules + @rules
63
+ end
64
+
65
+ def has_inline_rules?
66
+ rhs.any? { |token| @parameterizing_rule_resolver.find_inline(token) }
67
+ end
68
+
69
+ def resolve_inline_rules
70
+ resolved_builders = [] #: Array[RuleBuilder]
71
+ rhs.each_with_index do |token, i|
72
+ if (inline_rule = @parameterizing_rule_resolver.find_inline(token))
73
+ inline_rule.rhs_list.each do |inline_rhs|
74
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: lhs_tag)
75
+ if token.is_a?(Lexer::Token::InstantiateRule)
76
+ resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule, token.args))
77
+ else
78
+ resolve_inline_rhs(rule_builder, inline_rhs, i)
79
+ end
80
+ rule_builder.lhs = lhs
81
+ rule_builder.line = line
82
+ rule_builder.precedence_sym = precedence_sym
83
+ rule_builder.user_code = replace_inline_user_code(inline_rhs, i)
84
+ resolved_builders << rule_builder
85
+ end
86
+ break
87
+ end
88
+ end
89
+ resolved_builders
70
90
  end
71
91
 
72
92
  private
@@ -82,31 +102,25 @@ module Lrama
82
102
  def build_rules
83
103
  tokens = @replaced_rhs
84
104
 
85
- if tokens
86
- rule = Rule.new(
87
- id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
88
- position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
89
- )
90
- @rules = [rule]
91
- @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
92
- rule_builder.rules
93
- end.flatten
94
- @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
95
- rule_builder.rules
96
- end.flatten
97
- @midrule_action_rules.each do |r|
98
- r.original_rule = rule
99
- end
100
- else
101
- @inline_rules = @rule_builders_for_inline_rules.map do |rule_builder|
102
- rule_builder.rules
103
- end.flatten
105
+ rule = Rule.new(
106
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
107
+ position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
108
+ )
109
+ @rules = [rule]
110
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
111
+ rule_builder.rules
112
+ end.flatten
113
+ @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
114
+ rule_builder.rules
115
+ end.flatten
116
+ @midrule_action_rules.each do |r|
117
+ r.original_rule = rule
104
118
  end
105
119
  end
106
120
 
107
121
  # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
108
122
  # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
109
- def process_rhs(parameterizing_rule_resolver)
123
+ def process_rhs
110
124
  return if @replaced_rhs
111
125
 
112
126
  @replaced_rhs = []
@@ -118,26 +132,26 @@ module Lrama
118
132
  when Lrama::Lexer::Token::Ident
119
133
  @replaced_rhs << token
120
134
  when Lrama::Lexer::Token::InstantiateRule
121
- parameterizing_rule = parameterizing_rule_resolver.find_rule(token)
135
+ parameterizing_rule = @parameterizing_rule_resolver.find_rule(token)
122
136
  raise "Unexpected token. #{token}" unless parameterizing_rule
123
137
 
124
138
  bindings = Binding.new(parameterizing_rule, token.args)
125
139
  lhs_s_value = lhs_s_value(token, bindings)
126
- if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
140
+ if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value))
127
141
  @replaced_rhs << created_lhs
128
142
  else
129
143
  lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
130
144
  @replaced_rhs << lhs_token
131
- parameterizing_rule_resolver.created_lhs_list << lhs_token
145
+ @parameterizing_rule_resolver.created_lhs_list << lhs_token
132
146
  parameterizing_rule.rhs_list.each do |r|
133
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
147
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
134
148
  rule_builder.lhs = lhs_token
135
149
  r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
136
150
  rule_builder.line = line
137
151
  rule_builder.precedence_sym = r.precedence_sym
138
152
  rule_builder.user_code = r.resolve_user_code(bindings)
139
153
  rule_builder.complete_input
140
- rule_builder.setup_rules(parameterizing_rule_resolver)
154
+ rule_builder.setup_rules
141
155
  @rule_builders_for_parameterizing_rules << rule_builder
142
156
  end
143
157
  end
@@ -147,11 +161,11 @@ module Lrama
147
161
  new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
148
162
  @replaced_rhs << new_token
149
163
 
150
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: tag, skip_preprocess_references: true)
164
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, i, lhs_tag: tag, skip_preprocess_references: true)
151
165
  rule_builder.lhs = new_token
152
166
  rule_builder.user_code = token
153
167
  rule_builder.complete_input
154
- rule_builder.setup_rules(parameterizing_rule_resolver)
168
+ rule_builder.setup_rules
155
169
 
156
170
  @rule_builders_for_derived_rules << rule_builder
157
171
  else
@@ -172,27 +186,10 @@ module Lrama
172
186
  "#{token.rule_name}_#{s_values.join('_')}"
173
187
  end
174
188
 
175
- def resolve_inline(parameterizing_rule_resolver)
176
- rhs.each_with_index do |token, i|
177
- if inline_rule = parameterizing_rule_resolver.find_inline(token)
178
- inline_rule.rhs_list.each_with_index do |inline_rhs|
179
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: lhs_tag, skip_preprocess_references: true)
180
- resolve_inline_rhs(rule_builder, inline_rhs, i)
181
- rule_builder.lhs = lhs
182
- rule_builder.line = line
183
- rule_builder.user_code = replace_inline_user_code(inline_rhs, i)
184
- rule_builder.complete_input
185
- rule_builder.setup_rules(parameterizing_rule_resolver)
186
- @rule_builders_for_inline_rules << rule_builder
187
- end
188
- end
189
- end
190
- end
191
-
192
- def resolve_inline_rhs(rule_builder, inline_rhs, index)
189
+ def resolve_inline_rhs(rule_builder, inline_rhs, index, bindings = nil)
193
190
  rhs.each_with_index do |token, i|
194
191
  if index == i
195
- inline_rhs.symbols.each { |sym| rule_builder.add_rhs(sym) }
192
+ inline_rhs.symbols.each { |sym| rule_builder.add_rhs(bindings.nil? ? sym : bindings.resolve_symbol(sym)) }
196
193
  else
197
194
  rule_builder.add_rhs(token)
198
195
  end
@@ -204,6 +201,11 @@ module Lrama
204
201
  return user_code if user_code.nil?
205
202
 
206
203
  code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value)
204
+ user_code.references.each do |ref|
205
+ next if ref.index.nil? || ref.index <= index # nil is a case for `$$`
206
+ code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (inline_rhs.symbols.count-1)}")
207
+ code = code.gsub(/@#{ref.index}/, "@#{ref.index + (inline_rhs.symbols.count-1)}")
208
+ end
207
209
  Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location)
208
210
  end
209
211
 
@@ -238,9 +240,6 @@ module Lrama
238
240
  end
239
241
 
240
242
  if ref.number
241
- # TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
242
- # `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
243
- # it needs resolves from number to index.
244
243
  ref.index = ref.number
245
244
  end
246
245
 
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Symbol is both of nterm and term
2
4
  # `number` is both for nterm and term
3
5
  # `token_id` is tokentype for term, internal sequence number for nterm
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Grammar
3
5
  class Symbols
@@ -42,7 +44,9 @@ module Lrama
42
44
  end
43
45
 
44
46
  def add_nterm(id:, alias_name: nil, tag: nil)
45
- return if find_symbol_by_id(id)
47
+ if (sym = find_symbol_by_id(id))
48
+ return sym
49
+ end
46
50
 
47
51
  @symbols = nil
48
52
  nterm = Symbol.new(
@@ -53,6 +57,10 @@ module Lrama
53
57
  nterm
54
58
  end
55
59
 
60
+ def find_term_by_s_value(s_value)
61
+ terms.find { |s| s.id.s_value == s_value }
62
+ end
63
+
56
64
  def find_symbol_by_s_value(s_value)
57
65
  symbols.find { |s| s.id.s_value == s_value }
58
66
  end
@@ -1 +1,3 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative "symbols/resolver"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Grammar
3
5
  class Type
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Grammar
3
5
  class Union < Struct.new(:code, :lineno, keyword_init: true)
data/lib/lrama/grammar.rb CHANGED
@@ -1,43 +1,40 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "forwardable"
2
- require "lrama/grammar/auxiliary"
3
- require "lrama/grammar/binding"
4
- require "lrama/grammar/code"
5
- require "lrama/grammar/counter"
6
- require "lrama/grammar/destructor"
7
- require "lrama/grammar/error_token"
8
- require "lrama/grammar/parameterizing_rule"
9
- require "lrama/grammar/percent_code"
10
- require "lrama/grammar/precedence"
11
- require "lrama/grammar/printer"
12
- require "lrama/grammar/reference"
13
- require "lrama/grammar/rule"
14
- require "lrama/grammar/rule_builder"
15
- require "lrama/grammar/symbol"
16
- require "lrama/grammar/symbols"
17
- require "lrama/grammar/type"
18
- require "lrama/grammar/union"
19
- require "lrama/lexer"
4
+ require_relative "grammar/auxiliary"
5
+ require_relative "grammar/binding"
6
+ require_relative "grammar/code"
7
+ require_relative "grammar/counter"
8
+ require_relative "grammar/destructor"
9
+ require_relative "grammar/error_token"
10
+ require_relative "grammar/parameterizing_rule"
11
+ require_relative "grammar/percent_code"
12
+ require_relative "grammar/precedence"
13
+ require_relative "grammar/printer"
14
+ require_relative "grammar/reference"
15
+ require_relative "grammar/rule"
16
+ require_relative "grammar/rule_builder"
17
+ require_relative "grammar/symbol"
18
+ require_relative "grammar/symbols"
19
+ require_relative "grammar/type"
20
+ require_relative "grammar/union"
21
+ require_relative "lexer"
20
22
 
21
23
  module Lrama
22
24
  # Grammar is the result of parsing an input grammar file
23
25
  class Grammar
24
26
  extend Forwardable
25
27
 
26
- attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
27
- attr_accessor :union, :expect,
28
- :printers, :error_tokens,
29
- :lex_param, :parse_param, :initial_action,
28
+ attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver
29
+ attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action,
30
30
  :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
31
- :symbols_resolver, :types,
32
- :rules, :rule_builders,
33
- :sym_to_rules, :no_stdlib
31
+ :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations
34
32
 
35
- def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
33
+ def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value,
36
34
  :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
37
35
  :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
38
36
  :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number!
39
37
 
40
-
41
38
  def initialize(rule_counter)
42
39
  @rule_counter = rule_counter
43
40
 
@@ -59,10 +56,15 @@ module Lrama
59
56
  @accept_symbol = nil
60
57
  @aux = Auxiliary.new
61
58
  @no_stdlib = false
59
+ @locations = false
62
60
 
63
61
  append_special_symbols
64
62
  end
65
63
 
64
+ def create_rule_builder(rule_counter, midrule_action_counter)
65
+ RuleBuilder.new(rule_counter, midrule_action_counter, @parameterizing_rule_resolver)
66
+ end
67
+
66
68
  def add_percent_code(id:, code:)
67
69
  @percent_codes << PercentCode.new(id.s_value, code.s_value)
68
70
  end
@@ -141,6 +143,7 @@ module Lrama
141
143
  end
142
144
 
143
145
  def prepare
146
+ resolve_inline_rules
144
147
  normalize_rules
145
148
  collect_symbols
146
149
  set_lhs_and_rhs
@@ -149,6 +152,7 @@ module Lrama
149
152
  fill_sym_to_rules
150
153
  compute_nullable
151
154
  compute_first_set
155
+ set_locations
152
156
  end
153
157
 
154
158
  # TODO: More validation methods
@@ -255,7 +259,7 @@ module Lrama
255
259
 
256
260
  def setup_rules
257
261
  @rule_builders.each do |builder|
258
- builder.setup_rules(@parameterizing_rule_resolver)
262
+ builder.setup_rules
259
263
  end
260
264
  end
261
265
 
@@ -289,10 +293,23 @@ module Lrama
289
293
  @accept_symbol = term
290
294
  end
291
295
 
296
+ def resolve_inline_rules
297
+ while @rule_builders.any? {|r| r.has_inline_rules? } do
298
+ @rule_builders = @rule_builders.flat_map do |builder|
299
+ if builder.has_inline_rules?
300
+ builder.resolve_inline_rules
301
+ else
302
+ builder
303
+ end
304
+ end
305
+ end
306
+ end
307
+
292
308
  def normalize_rules
293
309
  # Add $accept rule to the top of rules
294
- lineno = @rule_builders.first ? @rule_builders.first.line : 0
295
- @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
310
+ rule_builder = @rule_builders.first # : RuleBuilder
311
+ lineno = rule_builder ? rule_builder.line : 0
312
+ @rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [rule_builder.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
296
313
 
297
314
  setup_rules
298
315
 
@@ -365,17 +382,21 @@ module Lrama
365
382
  end
366
383
 
367
384
  def validate_rule_lhs_is_nterm!
368
- errors = []
385
+ errors = [] #: Array[String]
369
386
 
370
387
  rules.each do |rule|
371
388
  next if rule.lhs.nterm?
372
389
 
373
- errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
390
+ errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is term. It should be nterm."
374
391
  end
375
392
 
376
393
  return if errors.empty?
377
394
 
378
395
  raise errors.join("\n")
379
396
  end
397
+
398
+ def set_locations
399
+ @locations = @locations || @rules.any? {|rule| rule.contains_at_reference? }
400
+ end
380
401
  end
381
402
  end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Lrama
4
+ class GrammarValidator
5
+ def initialize(grammar, states, logger)
6
+ @grammar = grammar
7
+ @states = states
8
+ @logger = logger
9
+ end
10
+
11
+ def valid?
12
+ conflicts_within_threshold?
13
+ end
14
+
15
+ private
16
+
17
+ def conflicts_within_threshold?
18
+ return true unless @grammar.expect
19
+
20
+ [sr_conflicts_within_threshold(@grammar.expect), rr_conflicts_within_threshold(0)].all?
21
+ end
22
+
23
+ def sr_conflicts_within_threshold(expected)
24
+ return true if expected == @states.sr_conflicts_count
25
+
26
+ @logger.error("shift/reduce conflicts: #{@states.sr_conflicts_count} found, #{expected} expected")
27
+ false
28
+ end
29
+
30
+ def rr_conflicts_within_threshold(expected)
31
+ return true if expected == @states.rr_conflicts_count
32
+
33
+ @logger.error("reduce/reduce conflicts: #{@states.rr_conflicts_count} found, #{expected} expected")
34
+ false
35
+ end
36
+ end
37
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class GrammarFile
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Location
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Token
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Token
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Token
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Lrama
2
4
  class Lexer
3
5
  class Token
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "strscan"
2
4
 
3
5
  module Lrama
@@ -14,9 +16,9 @@ module Lrama
14
16
 
15
17
  def _references
16
18
  scanner = StringScanner.new(s_value)
17
- references = []
19
+ references = [] #: Array[Grammar::Reference]
18
20
 
19
- while !scanner.eos? do
21
+ until scanner.eos? do
20
22
  case
21
23
  when reference = scan_reference(scanner)
22
24
  references << reference
@@ -1,8 +1,10 @@
1
- require 'lrama/lexer/token/char'
2
- require 'lrama/lexer/token/ident'
3
- require 'lrama/lexer/token/instantiate_rule'
4
- require 'lrama/lexer/token/tag'
5
- require 'lrama/lexer/token/user_code'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'token/char'
4
+ require_relative 'token/ident'
5
+ require_relative 'token/instantiate_rule'
6
+ require_relative 'token/tag'
7
+ require_relative 'token/user_code'
6
8
 
7
9
  module Lrama
8
10
  class Lexer
data/lib/lrama/lexer.rb CHANGED
@@ -1,19 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "strscan"
2
4
 
3
- require "lrama/lexer/grammar_file"
4
- require "lrama/lexer/location"
5
- require "lrama/lexer/token"
5
+ require_relative "lexer/grammar_file"
6
+ require_relative "lexer/location"
7
+ require_relative "lexer/token"
6
8
 
7
9
  module Lrama
8
10
  class Lexer
9
11
  attr_reader :head_line, :head_column, :line
10
12
  attr_accessor :status, :end_symbol
11
13
 
12
- SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
14
+ SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze
13
15
  PERCENT_TOKENS = %w(
14
16
  %union
15
17
  %token
16
18
  %type
19
+ %nterm
17
20
  %left
18
21
  %right
19
22
  %nonassoc
@@ -38,7 +41,8 @@ module Lrama
38
41
  %rule
39
42
  %no-stdlib
40
43
  %inline
41
- )
44
+ %locations
45
+ ).freeze
42
46
 
43
47
  def initialize(grammar_file)
44
48
  @grammar_file = grammar_file
@@ -71,7 +75,7 @@ module Lrama
71
75
  end
72
76
 
73
77
  def lex_token
74
- while !@scanner.eos? do
78
+ until @scanner.eos? do
75
79
  case
76
80
  when @scanner.scan(/\n/)
77
81
  newline
@@ -126,7 +130,7 @@ module Lrama
126
130
  code = ''
127
131
  reset_first_position
128
132
 
129
- while !@scanner.eos? do
133
+ until @scanner.eos? do
130
134
  case
131
135
  when @scanner.scan(/{/)
132
136
  code += @scanner.matched
@@ -163,7 +167,7 @@ module Lrama
163
167
  private
164
168
 
165
169
  def lex_comment
166
- while !@scanner.eos? do
170
+ until @scanner.eos? do
167
171
  case
168
172
  when @scanner.scan(/\n/)
169
173
  newline
@@ -1,25 +1,17 @@
1
- module Lrama
2
- class Warning
3
- attr_reader :errors, :warns
1
+ # frozen_string_literal: true
4
2
 
3
+ module Lrama
4
+ class Logger
5
5
  def initialize(out = STDERR)
6
6
  @out = out
7
- @errors = []
8
- @warns = []
9
- end
10
-
11
- def error(message)
12
- @out << message << "\n"
13
- @errors << message
14
7
  end
15
8
 
16
9
  def warn(message)
17
10
  @out << message << "\n"
18
- @warns << message
19
11
  end
20
12
 
21
- def has_error?
22
- !@errors.empty?
13
+ def error(message)
14
+ @out << message << "\n"
23
15
  end
24
16
  end
25
17
  end