lrama 0.6.0 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +27 -7
  3. data/Gemfile +1 -1
  4. data/NEWS.md +55 -0
  5. data/Steepfile +2 -3
  6. data/lib/lrama/command.rb +25 -3
  7. data/lib/lrama/context.rb +3 -23
  8. data/lib/lrama/counterexamples/example.rb +2 -2
  9. data/lib/lrama/grammar/binding.rb +24 -0
  10. data/lib/lrama/grammar/code/rule_action.rb +1 -1
  11. data/lib/lrama/grammar/code.rb +1 -1
  12. data/lib/lrama/grammar/parameterizing_rule/resolver.rb +47 -0
  13. data/lib/lrama/grammar/parameterizing_rule/rhs.rb +15 -0
  14. data/lib/lrama/grammar/parameterizing_rule/rule.rb +16 -0
  15. data/lib/lrama/grammar/parameterizing_rule.rb +3 -6
  16. data/lib/lrama/grammar/percent_code.rb +3 -3
  17. data/lib/lrama/grammar/rule.rb +2 -2
  18. data/lib/lrama/grammar/rule_builder.rb +60 -31
  19. data/lib/lrama/grammar/stdlib.y +80 -0
  20. data/lib/lrama/grammar/type.rb +13 -1
  21. data/lib/lrama/grammar.rb +18 -11
  22. data/lib/lrama/lexer/grammar_file.rb +1 -1
  23. data/lib/lrama/lexer/token/instantiate_rule.rb +7 -2
  24. data/lib/lrama/lexer/token.rb +5 -0
  25. data/lib/lrama/lexer.rb +3 -7
  26. data/lib/lrama/output.rb +2 -2
  27. data/lib/lrama/parser.rb +508 -467
  28. data/lib/lrama/states/item.rb +17 -13
  29. data/lib/lrama/states_reporter.rb +8 -10
  30. data/lib/lrama/version.rb +1 -1
  31. data/parser.y +12 -13
  32. data/sig/lrama/grammar/binding.rbs +16 -0
  33. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +22 -0
  34. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +13 -0
  35. data/sig/lrama/grammar/parameterizing_rule/rule.rbs +14 -0
  36. data/sig/lrama/grammar/parameterizing_rule.rbs +0 -4
  37. data/sig/lrama/grammar/percent_code.rbs +3 -3
  38. data/sig/lrama/grammar/rule_builder.rbs +9 -6
  39. data/sig/lrama/lexer/token/instantiate_rule.rbs +4 -2
  40. data/sig/lrama/lexer/token.rbs +1 -0
  41. metadata +12 -23
  42. data/lib/lrama/grammar/parameterizing_rule_builder.rb +0 -34
  43. data/lib/lrama/grammar/parameterizing_rule_resolver.rb +0 -30
  44. data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +0 -53
  45. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +0 -36
  46. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +0 -28
  47. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +0 -28
  48. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +0 -28
  49. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +0 -39
  50. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +0 -34
  51. data/lib/lrama/grammar/parameterizing_rules/builder.rb +0 -60
  52. data/sig/lrama/grammar/parameterizing_rule_builder.rbs +0 -19
  53. data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +0 -16
  54. data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +0 -18
  55. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +0 -28
  56. data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +0 -10
  57. data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +0 -10
  58. data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +0 -10
  59. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +0 -13
  60. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +0 -13
  61. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +0 -24
@@ -1,23 +1,22 @@
1
- require 'lrama/grammar/parameterizing_rules/builder'
2
-
3
1
  module Lrama
4
2
  class Grammar
5
3
  class RuleBuilder
6
- attr_accessor :lhs, :lhs_tag, :line
7
- attr_reader :rhs, :user_code, :precedence_sym
4
+ attr_accessor :lhs, :line
5
+ attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
8
6
 
9
- def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
7
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
10
8
  @rule_counter = rule_counter
11
9
  @midrule_action_counter = midrule_action_counter
12
10
  @position_in_original_rule_rhs = position_in_original_rule_rhs
13
11
  @skip_preprocess_references = skip_preprocess_references
14
12
 
15
13
  @lhs = nil
14
+ @lhs_tag = lhs_tag
16
15
  @rhs = []
17
- @lhs_tag = nil
18
16
  @user_code = nil
19
17
  @precedence_sym = nil
20
18
  @line = nil
19
+ @rule_builders_for_parameterizing_rules = []
21
20
  @rule_builders_for_derived_rules = []
22
21
  end
23
22
 
@@ -33,7 +32,7 @@ module Lrama
33
32
 
34
33
  def user_code=(user_code)
35
34
  if !@line
36
- @line = user_code.line
35
+ @line = user_code&.line
37
36
  end
38
37
 
39
38
  flush_user_code
@@ -51,9 +50,9 @@ module Lrama
51
50
  freeze_rhs
52
51
  end
53
52
 
54
- def setup_rules(parameterizing_resolver)
53
+ def setup_rules(parameterizing_rule_resolver)
55
54
  preprocess_references unless @skip_preprocess_references
56
- process_rhs(parameterizing_resolver)
55
+ process_rhs(parameterizing_rule_resolver)
57
56
  build_rules
58
57
  end
59
58
 
@@ -75,10 +74,13 @@ module Lrama
75
74
  tokens = @replaced_rhs
76
75
 
77
76
  rule = Rule.new(
78
- id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
77
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
79
78
  position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
80
79
  )
81
80
  @rules = [rule]
81
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
82
+ rule_builder.rules
83
+ end.flatten
82
84
  @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
83
85
  rule_builder.rules
84
86
  end.flatten
@@ -89,11 +91,10 @@ module Lrama
89
91
 
90
92
  # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
91
93
  # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
92
- def process_rhs(parameterizing_resolver)
94
+ def process_rhs(parameterizing_rule_resolver)
93
95
  return if @replaced_rhs
94
96
 
95
97
  @replaced_rhs = []
96
- @parameterizing_rules = []
97
98
 
98
99
  rhs.each_with_index do |token, i|
99
100
  case token
@@ -102,26 +103,39 @@ module Lrama
102
103
  when Lrama::Lexer::Token::Ident
103
104
  @replaced_rhs << token
104
105
  when Lrama::Lexer::Token::InstantiateRule
105
- if parameterizing_resolver.defined?(token.rule_name)
106
- parameterizing = parameterizing_resolver.build_rules(token, @rule_counter, @lhs_tag, line)
107
- @parameterizing_rules = @parameterizing_rules + parameterizing.map(&:rules).flatten
108
- @replaced_rhs = @replaced_rhs + parameterizing.map(&:token).flatten.uniq
106
+ parameterizing_rule = parameterizing_rule_resolver.find(token)
107
+ raise "Unexpected token. #{token}" unless parameterizing_rule
108
+
109
+ bindings = Binding.new(parameterizing_rule, token.args)
110
+ lhs_s_value = lhs_s_value(token, bindings)
111
+ if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
112
+ @replaced_rhs << created_lhs
109
113
  else
110
- # TODO: Delete when the standard library will defined as a grammar file.
111
- parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
112
- @parameterizing_rules = @parameterizing_rules + parameterizing.build
113
- @replaced_rhs << parameterizing.build_token
114
+ lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
115
+ @replaced_rhs << lhs_token
116
+ parameterizing_rule_resolver.created_lhs_list << lhs_token
117
+ parameterizing_rule.rhs_list.each do |r|
118
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: token.lhs_tag, skip_preprocess_references: true)
119
+ rule_builder.lhs = lhs_token
120
+ r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
121
+ rule_builder.line = line
122
+ rule_builder.user_code = r.user_code
123
+ rule_builder.precedence_sym = r.precedence_sym
124
+ rule_builder.complete_input
125
+ rule_builder.setup_rules(parameterizing_rule_resolver)
126
+ @rule_builders_for_parameterizing_rules << rule_builder
127
+ end
114
128
  end
115
129
  when Lrama::Lexer::Token::UserCode
116
130
  prefix = token.referred ? "@" : "$@"
117
131
  new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
118
132
  @replaced_rhs << new_token
119
133
 
120
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
134
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: lhs_tag, skip_preprocess_references: true)
121
135
  rule_builder.lhs = new_token
122
136
  rule_builder.user_code = token
123
137
  rule_builder.complete_input
124
- rule_builder.setup_rules(parameterizing_resolver)
138
+ rule_builder.setup_rules(parameterizing_rule_resolver)
125
139
 
126
140
  @rule_builders_for_derived_rules << rule_builder
127
141
  else
@@ -130,6 +144,18 @@ module Lrama
130
144
  end
131
145
  end
132
146
 
147
+ def lhs_s_value(token, bindings)
148
+ s_values = token.args.map do |arg|
149
+ resolved = bindings.resolve_symbol(arg)
150
+ if resolved.is_a?(Lexer::Token::InstantiateRule)
151
+ [resolved.s_value, resolved.args.map(&:s_value)]
152
+ else
153
+ resolved.s_value
154
+ end
155
+ end
156
+ "#{token.rule_name}_#{s_values.join('_')}"
157
+ end
158
+
133
159
  def numberize_references
134
160
  # Bison n'th component is 1-origin
135
161
  (rhs + [user_code]).compact.each.with_index(1) do |token, i|
@@ -137,23 +163,26 @@ module Lrama
137
163
 
138
164
  token.references.each do |ref|
139
165
  ref_name = ref.name
140
- if ref_name && ref_name != '$'
141
- if lhs.referred_by?(ref_name)
166
+
167
+ if ref_name
168
+ if ref_name == '$'
142
169
  ref.name = '$'
143
170
  else
144
- candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
171
+ candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) }
145
172
 
146
173
  if candidates.size >= 2
147
- location = token.location.partial_location(ref.first_column, ref.last_column)
148
- raise location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.")
174
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
149
175
  end
150
176
 
151
177
  unless (referring_symbol = candidates.first)
152
- location = token.location.partial_location(ref.first_column, ref.last_column)
153
- raise location.generate_error_message("Referring symbol `#{ref_name}` is not found.")
178
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
154
179
  end
155
180
 
156
- ref.index = referring_symbol[1] + 1
181
+ if referring_symbol[1] == 0 # Refers to LHS
182
+ ref.name = '$'
183
+ else
184
+ ref.index = referring_symbol[1]
185
+ end
157
186
  end
158
187
  end
159
188
 
@@ -163,7 +192,7 @@ module Lrama
163
192
  if ref.index
164
193
  # TODO: Prohibit $0 even so Bison allows it?
165
194
  # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
166
- raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
195
+ token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
167
196
  rhs[ref.index - 1].referred = true
168
197
  end
169
198
  end
@@ -0,0 +1,80 @@
1
+ /**********************************************************************
2
+
3
+ stdlib.y
4
+
5
+ This is lrama's standard library. It provides a number of
6
+ parameterizing rule definitions, such as options and lists,
7
+ that should be useful in a number of situations.
8
+
9
+ **********************************************************************/
10
+
11
+ /*
12
+ * program: option(number)
13
+ *
14
+ * =>
15
+ *
16
+ * program: option_number
17
+ * option_number: %empty
18
+ * option_number: number
19
+ */
20
+ %rule option(X): /* empty */
21
+ | X
22
+ ;
23
+
24
+ /*
25
+ * program: list(number)
26
+ *
27
+ * =>
28
+ *
29
+ * program: list_number
30
+ * list_number: %empty
31
+ * list_number: list_number number
32
+ */
33
+ %rule list(X): /* empty */
34
+ | list(X) X
35
+ ;
36
+
37
+ /*
38
+ * program: nonempty_list(number)
39
+ *
40
+ * =>
41
+ *
42
+ * program: nonempty_list_number
43
+ * nonempty_list_number: number
44
+ * nonempty_list_number: nonempty_list_number number
45
+ */
46
+ %rule nonempty_list(X): X
47
+ | nonempty_list(X) X
48
+ ;
49
+
50
+ /*
51
+ * program: separated_nonempty_list(comma, number)
52
+ *
53
+ * =>
54
+ *
55
+ * program: separated_nonempty_list_comma_number
56
+ * separated_nonempty_list_comma_number: number
57
+ * separated_nonempty_list_comma_number: separated_nonempty_list_comma_number comma number
58
+ */
59
+ %rule separated_nonempty_list(separator, X): X
60
+ | separated_nonempty_list(separator, X) separator X
61
+ ;
62
+
63
+ /*
64
+ * program: separated_list(comma, number)
65
+ *
66
+ * =>
67
+ *
68
+ * program: separated_list_comma_number
69
+ * separated_list_comma_number: option_separated_nonempty_list_comma_number
70
+ * option_separated_nonempty_list_comma_number: %empty
71
+ * option_separated_nonempty_list_comma_number: separated_nonempty_list_comma_number
72
+ * separated_nonempty_list_comma_number: number
73
+ * separated_nonempty_list_comma_number: comma separated_nonempty_list_comma_number number
74
+ */
75
+ %rule separated_list(separator, X): option(separated_nonempty_list(separator, X))
76
+ ;
77
+
78
+ %%
79
+
80
+ %union{};
@@ -1,6 +1,18 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Type < Struct.new(:id, :tag, keyword_init: true)
3
+ class Type
4
+ attr_reader :id, :tag
5
+
6
+ def initialize(id:, tag:)
7
+ @id = id
8
+ @tag = tag
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.id == other.id &&
14
+ self.tag == other.tag
15
+ end
4
16
  end
5
17
  end
6
18
  end
data/lib/lrama/grammar.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "lrama/grammar/auxiliary"
2
+ require "lrama/grammar/binding"
2
3
  require "lrama/grammar/code"
3
4
  require "lrama/grammar/counter"
4
5
  require "lrama/grammar/error_token"
@@ -8,9 +9,6 @@ require "lrama/grammar/printer"
8
9
  require "lrama/grammar/reference"
9
10
  require "lrama/grammar/rule"
10
11
  require "lrama/grammar/rule_builder"
11
- require "lrama/grammar/parameterizing_rule_builder"
12
- require "lrama/grammar/parameterizing_rule_resolver"
13
- require "lrama/grammar/parameterizing_rule_rhs_builder"
14
12
  require "lrama/grammar/parameterizing_rule"
15
13
  require "lrama/grammar/symbol"
16
14
  require "lrama/grammar/type"
@@ -26,7 +24,7 @@ module Lrama
26
24
  :lex_param, :parse_param, :initial_action,
27
25
  :symbols, :types,
28
26
  :rules, :rule_builders,
29
- :sym_to_rules
27
+ :sym_to_rules, :no_stdlib
30
28
 
31
29
  def initialize(rule_counter)
32
30
  @rule_counter = rule_counter
@@ -40,19 +38,20 @@ module Lrama
40
38
  @rule_builders = []
41
39
  @rules = []
42
40
  @sym_to_rules = {}
43
- @parameterizing_resolver = ParameterizingRuleResolver.new
41
+ @parameterizing_rule_resolver = ParameterizingRule::Resolver.new
44
42
  @empty_symbol = nil
45
43
  @eof_symbol = nil
46
44
  @error_symbol = nil
47
45
  @undef_symbol = nil
48
46
  @accept_symbol = nil
49
47
  @aux = Auxiliary.new
48
+ @no_stdlib = false
50
49
 
51
50
  append_special_symbols
52
51
  end
53
52
 
54
53
  def add_percent_code(id:, code:)
55
- @percent_codes << PercentCode.new(id, code)
54
+ @percent_codes << PercentCode.new(id.s_value, code.s_value)
56
55
  end
57
56
 
58
57
  def add_printer(ident_or_tags:, token_code:, lineno:)
@@ -134,8 +133,16 @@ module Lrama
134
133
  @rule_builders << builder
135
134
  end
136
135
 
137
- def add_parameterizing_rule_builder(builder)
138
- @parameterizing_resolver.add_parameterizing_rule_builder(builder)
136
+ def add_parameterizing_rule(rule)
137
+ @parameterizing_rule_resolver.add_parameterizing_rule(rule)
138
+ end
139
+
140
+ def parameterizing_rules
141
+ @parameterizing_rule_resolver.rules
142
+ end
143
+
144
+ def insert_before_parameterizing_rules(rules)
145
+ @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules
139
146
  end
140
147
 
141
148
  def prologue_first_lineno=(prologue_first_lineno)
@@ -171,7 +178,7 @@ module Lrama
171
178
 
172
179
  # TODO: More validation methods
173
180
  #
174
- # * Validaiton for no_declared_type_reference
181
+ # * Validation for no_declared_type_reference
175
182
  def validate!
176
183
  validate_symbol_number_uniqueness!
177
184
  validate_symbol_alias_name_uniqueness!
@@ -236,7 +243,7 @@ module Lrama
236
243
  def compute_nullable
237
244
  @rules.each do |rule|
238
245
  case
239
- when rule.rhs.empty?
246
+ when rule.empty_rule?
240
247
  rule.nullable = true
241
248
  when rule.rhs.any?(&:term)
242
249
  rule.nullable = false
@@ -319,7 +326,7 @@ module Lrama
319
326
 
320
327
  def setup_rules
321
328
  @rule_builders.each do |builder|
322
- builder.setup_rules(@parameterizing_resolver)
329
+ builder.setup_rules(@parameterizing_rule_resolver)
323
330
  end
324
331
  end
325
332
 
@@ -5,7 +5,7 @@ module Lrama
5
5
 
6
6
  def initialize(path, text)
7
7
  @path = path
8
- @text = text
8
+ @text = text.freeze
9
9
  end
10
10
 
11
11
  def ==(other)
@@ -2,16 +2,21 @@ module Lrama
2
2
  class Lexer
3
3
  class Token
4
4
  class InstantiateRule < Token
5
- attr_accessor :args
5
+ attr_reader :args, :lhs_tag
6
6
 
7
- def initialize(s_value:, alias_name: nil, location: nil, args: [])
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
8
8
  super s_value: s_value, alias_name: alias_name, location: location
9
9
  @args = args
10
+ @lhs_tag = lhs_tag
10
11
  end
11
12
 
12
13
  def rule_name
13
14
  s_value
14
15
  end
16
+
17
+ def args_count
18
+ args.count
19
+ end
15
20
  end
16
21
  end
17
22
  end
@@ -46,6 +46,11 @@ module Lrama
46
46
  def last_column
47
47
  location.last_column
48
48
  end
49
+
50
+ def invalid_ref(ref, message)
51
+ location = self.location.partial_location(ref.first_column, ref.last_column)
52
+ raise location.generate_error_message(message)
53
+ end
49
54
  end
50
55
  end
51
56
  end
data/lib/lrama/lexer.rb CHANGED
@@ -5,9 +5,8 @@ require "lrama/lexer/token"
5
5
 
6
6
  module Lrama
7
7
  class Lexer
8
- attr_reader :head_line, :head_column
9
- attr_accessor :status
10
- attr_accessor :end_symbol
8
+ attr_reader :head_line, :head_column, :line
9
+ attr_accessor :status, :end_symbol
11
10
 
12
11
  SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
13
12
  PERCENT_TOKENS = %w(
@@ -30,6 +29,7 @@ module Lrama
30
29
  %empty
31
30
  %code
32
31
  %rule
32
+ %no-stdlib
33
33
  )
34
34
 
35
35
  def initialize(grammar_file)
@@ -50,10 +50,6 @@ module Lrama
50
50
  end
51
51
  end
52
52
 
53
- def line
54
- @line
55
- end
56
-
57
53
  def column
58
54
  @scanner.pos - @head
59
55
  end
data/lib/lrama/output.rb CHANGED
@@ -352,9 +352,9 @@ module Lrama
352
352
  # b4_percent_code_get
353
353
  def percent_code(name)
354
354
  @grammar.percent_codes.select do |percent_code|
355
- percent_code.id.s_value == name
355
+ percent_code.name == name
356
356
  end.map do |percent_code|
357
- percent_code.code.s_value
357
+ percent_code.code
358
358
  end.join
359
359
  end
360
360