lrama 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +27 -7
  3. data/Gemfile +1 -1
  4. data/NEWS.md +55 -0
  5. data/Steepfile +2 -3
  6. data/lib/lrama/command.rb +25 -3
  7. data/lib/lrama/context.rb +3 -23
  8. data/lib/lrama/counterexamples/example.rb +2 -2
  9. data/lib/lrama/grammar/binding.rb +24 -0
  10. data/lib/lrama/grammar/code/rule_action.rb +1 -1
  11. data/lib/lrama/grammar/code.rb +1 -1
  12. data/lib/lrama/grammar/parameterizing_rule/resolver.rb +47 -0
  13. data/lib/lrama/grammar/parameterizing_rule/rhs.rb +15 -0
  14. data/lib/lrama/grammar/parameterizing_rule/rule.rb +16 -0
  15. data/lib/lrama/grammar/parameterizing_rule.rb +3 -6
  16. data/lib/lrama/grammar/percent_code.rb +3 -3
  17. data/lib/lrama/grammar/rule.rb +2 -2
  18. data/lib/lrama/grammar/rule_builder.rb +60 -31
  19. data/lib/lrama/grammar/stdlib.y +80 -0
  20. data/lib/lrama/grammar/type.rb +13 -1
  21. data/lib/lrama/grammar.rb +18 -11
  22. data/lib/lrama/lexer/grammar_file.rb +1 -1
  23. data/lib/lrama/lexer/token/instantiate_rule.rb +7 -2
  24. data/lib/lrama/lexer/token.rb +5 -0
  25. data/lib/lrama/lexer.rb +3 -7
  26. data/lib/lrama/output.rb +2 -2
  27. data/lib/lrama/parser.rb +508 -467
  28. data/lib/lrama/states/item.rb +17 -13
  29. data/lib/lrama/states_reporter.rb +8 -10
  30. data/lib/lrama/version.rb +1 -1
  31. data/parser.y +12 -13
  32. data/sig/lrama/grammar/binding.rbs +16 -0
  33. data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +22 -0
  34. data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +13 -0
  35. data/sig/lrama/grammar/parameterizing_rule/rule.rbs +14 -0
  36. data/sig/lrama/grammar/parameterizing_rule.rbs +0 -4
  37. data/sig/lrama/grammar/percent_code.rbs +3 -3
  38. data/sig/lrama/grammar/rule_builder.rbs +9 -6
  39. data/sig/lrama/lexer/token/instantiate_rule.rbs +4 -2
  40. data/sig/lrama/lexer/token.rbs +1 -0
  41. metadata +12 -23
  42. data/lib/lrama/grammar/parameterizing_rule_builder.rb +0 -34
  43. data/lib/lrama/grammar/parameterizing_rule_resolver.rb +0 -30
  44. data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +0 -53
  45. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +0 -36
  46. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +0 -28
  47. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +0 -28
  48. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +0 -28
  49. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +0 -39
  50. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +0 -34
  51. data/lib/lrama/grammar/parameterizing_rules/builder.rb +0 -60
  52. data/sig/lrama/grammar/parameterizing_rule_builder.rbs +0 -19
  53. data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +0 -16
  54. data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +0 -18
  55. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +0 -28
  56. data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +0 -10
  57. data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +0 -10
  58. data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +0 -10
  59. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +0 -13
  60. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +0 -13
  61. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +0 -24
@@ -1,23 +1,22 @@
1
- require 'lrama/grammar/parameterizing_rules/builder'
2
-
3
1
  module Lrama
4
2
  class Grammar
5
3
  class RuleBuilder
6
- attr_accessor :lhs, :lhs_tag, :line
7
- attr_reader :rhs, :user_code, :precedence_sym
4
+ attr_accessor :lhs, :line
5
+ attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
8
6
 
9
- def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
7
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
10
8
  @rule_counter = rule_counter
11
9
  @midrule_action_counter = midrule_action_counter
12
10
  @position_in_original_rule_rhs = position_in_original_rule_rhs
13
11
  @skip_preprocess_references = skip_preprocess_references
14
12
 
15
13
  @lhs = nil
14
+ @lhs_tag = lhs_tag
16
15
  @rhs = []
17
- @lhs_tag = nil
18
16
  @user_code = nil
19
17
  @precedence_sym = nil
20
18
  @line = nil
19
+ @rule_builders_for_parameterizing_rules = []
21
20
  @rule_builders_for_derived_rules = []
22
21
  end
23
22
 
@@ -33,7 +32,7 @@ module Lrama
33
32
 
34
33
  def user_code=(user_code)
35
34
  if !@line
36
- @line = user_code.line
35
+ @line = user_code&.line
37
36
  end
38
37
 
39
38
  flush_user_code
@@ -51,9 +50,9 @@ module Lrama
51
50
  freeze_rhs
52
51
  end
53
52
 
54
- def setup_rules(parameterizing_resolver)
53
+ def setup_rules(parameterizing_rule_resolver)
55
54
  preprocess_references unless @skip_preprocess_references
56
- process_rhs(parameterizing_resolver)
55
+ process_rhs(parameterizing_rule_resolver)
57
56
  build_rules
58
57
  end
59
58
 
@@ -75,10 +74,13 @@ module Lrama
75
74
  tokens = @replaced_rhs
76
75
 
77
76
  rule = Rule.new(
78
- id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
77
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
79
78
  position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
80
79
  )
81
80
  @rules = [rule]
81
+ @parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
82
+ rule_builder.rules
83
+ end.flatten
82
84
  @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
83
85
  rule_builder.rules
84
86
  end.flatten
@@ -89,11 +91,10 @@ module Lrama
89
91
 
90
92
  # rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
91
93
  # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
92
- def process_rhs(parameterizing_resolver)
94
+ def process_rhs(parameterizing_rule_resolver)
93
95
  return if @replaced_rhs
94
96
 
95
97
  @replaced_rhs = []
96
- @parameterizing_rules = []
97
98
 
98
99
  rhs.each_with_index do |token, i|
99
100
  case token
@@ -102,26 +103,39 @@ module Lrama
102
103
  when Lrama::Lexer::Token::Ident
103
104
  @replaced_rhs << token
104
105
  when Lrama::Lexer::Token::InstantiateRule
105
- if parameterizing_resolver.defined?(token.rule_name)
106
- parameterizing = parameterizing_resolver.build_rules(token, @rule_counter, @lhs_tag, line)
107
- @parameterizing_rules = @parameterizing_rules + parameterizing.map(&:rules).flatten
108
- @replaced_rhs = @replaced_rhs + parameterizing.map(&:token).flatten.uniq
106
+ parameterizing_rule = parameterizing_rule_resolver.find(token)
107
+ raise "Unexpected token. #{token}" unless parameterizing_rule
108
+
109
+ bindings = Binding.new(parameterizing_rule, token.args)
110
+ lhs_s_value = lhs_s_value(token, bindings)
111
+ if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
112
+ @replaced_rhs << created_lhs
109
113
  else
110
- # TODO: Delete when the standard library will defined as a grammar file.
111
- parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
112
- @parameterizing_rules = @parameterizing_rules + parameterizing.build
113
- @replaced_rhs << parameterizing.build_token
114
+ lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
115
+ @replaced_rhs << lhs_token
116
+ parameterizing_rule_resolver.created_lhs_list << lhs_token
117
+ parameterizing_rule.rhs_list.each do |r|
118
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: token.lhs_tag, skip_preprocess_references: true)
119
+ rule_builder.lhs = lhs_token
120
+ r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
121
+ rule_builder.line = line
122
+ rule_builder.user_code = r.user_code
123
+ rule_builder.precedence_sym = r.precedence_sym
124
+ rule_builder.complete_input
125
+ rule_builder.setup_rules(parameterizing_rule_resolver)
126
+ @rule_builders_for_parameterizing_rules << rule_builder
127
+ end
114
128
  end
115
129
  when Lrama::Lexer::Token::UserCode
116
130
  prefix = token.referred ? "@" : "$@"
117
131
  new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
118
132
  @replaced_rhs << new_token
119
133
 
120
- rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
134
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: lhs_tag, skip_preprocess_references: true)
121
135
  rule_builder.lhs = new_token
122
136
  rule_builder.user_code = token
123
137
  rule_builder.complete_input
124
- rule_builder.setup_rules(parameterizing_resolver)
138
+ rule_builder.setup_rules(parameterizing_rule_resolver)
125
139
 
126
140
  @rule_builders_for_derived_rules << rule_builder
127
141
  else
@@ -130,6 +144,18 @@ module Lrama
130
144
  end
131
145
  end
132
146
 
147
+ def lhs_s_value(token, bindings)
148
+ s_values = token.args.map do |arg|
149
+ resolved = bindings.resolve_symbol(arg)
150
+ if resolved.is_a?(Lexer::Token::InstantiateRule)
151
+ [resolved.s_value, resolved.args.map(&:s_value)]
152
+ else
153
+ resolved.s_value
154
+ end
155
+ end
156
+ "#{token.rule_name}_#{s_values.join('_')}"
157
+ end
158
+
133
159
  def numberize_references
134
160
  # Bison n'th component is 1-origin
135
161
  (rhs + [user_code]).compact.each.with_index(1) do |token, i|
@@ -137,23 +163,26 @@ module Lrama
137
163
 
138
164
  token.references.each do |ref|
139
165
  ref_name = ref.name
140
- if ref_name && ref_name != '$'
141
- if lhs.referred_by?(ref_name)
166
+
167
+ if ref_name
168
+ if ref_name == '$'
142
169
  ref.name = '$'
143
170
  else
144
- candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
171
+ candidates = ([lhs] + rhs).each_with_index.select {|token, _i| token.referred_by?(ref_name) }
145
172
 
146
173
  if candidates.size >= 2
147
- location = token.location.partial_location(ref.first_column, ref.last_column)
148
- raise location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.")
174
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
149
175
  end
150
176
 
151
177
  unless (referring_symbol = candidates.first)
152
- location = token.location.partial_location(ref.first_column, ref.last_column)
153
- raise location.generate_error_message("Referring symbol `#{ref_name}` is not found.")
178
+ token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
154
179
  end
155
180
 
156
- ref.index = referring_symbol[1] + 1
181
+ if referring_symbol[1] == 0 # Refers to LHS
182
+ ref.name = '$'
183
+ else
184
+ ref.index = referring_symbol[1]
185
+ end
157
186
  end
158
187
  end
159
188
 
@@ -163,7 +192,7 @@ module Lrama
163
192
  if ref.index
164
193
  # TODO: Prohibit $0 even so Bison allows it?
165
194
  # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
166
- raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
195
+ token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
167
196
  rhs[ref.index - 1].referred = true
168
197
  end
169
198
  end
@@ -0,0 +1,80 @@
1
+ /**********************************************************************
2
+
3
+ stdlib.y
4
+
5
+ This is lrama's standard library. It provides a number of
6
+ parameterizing rule definitions, such as options and lists,
7
+ that should be useful in a number of situations.
8
+
9
+ **********************************************************************/
10
+
11
+ /*
12
+ * program: option(number)
13
+ *
14
+ * =>
15
+ *
16
+ * program: option_number
17
+ * option_number: %empty
18
+ * option_number: number
19
+ */
20
+ %rule option(X): /* empty */
21
+ | X
22
+ ;
23
+
24
+ /*
25
+ * program: list(number)
26
+ *
27
+ * =>
28
+ *
29
+ * program: list_number
30
+ * list_number: %empty
31
+ * list_number: list_number number
32
+ */
33
+ %rule list(X): /* empty */
34
+ | list(X) X
35
+ ;
36
+
37
+ /*
38
+ * program: nonempty_list(number)
39
+ *
40
+ * =>
41
+ *
42
+ * program: nonempty_list_number
43
+ * nonempty_list_number: number
44
+ * nonempty_list_number: nonempty_list_number number
45
+ */
46
+ %rule nonempty_list(X): X
47
+ | nonempty_list(X) X
48
+ ;
49
+
50
+ /*
51
+ * program: separated_nonempty_list(comma, number)
52
+ *
53
+ * =>
54
+ *
55
+ * program: separated_nonempty_list_comma_number
56
+ * separated_nonempty_list_comma_number: number
57
+ * separated_nonempty_list_comma_number: separated_nonempty_list_comma_number comma number
58
+ */
59
+ %rule separated_nonempty_list(separator, X): X
60
+ | separated_nonempty_list(separator, X) separator X
61
+ ;
62
+
63
+ /*
64
+ * program: separated_list(comma, number)
65
+ *
66
+ * =>
67
+ *
68
+ * program: separated_list_comma_number
69
+ * separated_list_comma_number: option_separated_nonempty_list_comma_number
70
+ * option_separated_nonempty_list_comma_number: %empty
71
+ * option_separated_nonempty_list_comma_number: separated_nonempty_list_comma_number
72
+ * separated_nonempty_list_comma_number: number
73
+ * separated_nonempty_list_comma_number: comma separated_nonempty_list_comma_number number
74
+ */
75
+ %rule separated_list(separator, X): option(separated_nonempty_list(separator, X))
76
+ ;
77
+
78
+ %%
79
+
80
+ %union{};
@@ -1,6 +1,18 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Type < Struct.new(:id, :tag, keyword_init: true)
3
+ class Type
4
+ attr_reader :id, :tag
5
+
6
+ def initialize(id:, tag:)
7
+ @id = id
8
+ @tag = tag
9
+ end
10
+
11
+ def ==(other)
12
+ self.class == other.class &&
13
+ self.id == other.id &&
14
+ self.tag == other.tag
15
+ end
4
16
  end
5
17
  end
6
18
  end
data/lib/lrama/grammar.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "lrama/grammar/auxiliary"
2
+ require "lrama/grammar/binding"
2
3
  require "lrama/grammar/code"
3
4
  require "lrama/grammar/counter"
4
5
  require "lrama/grammar/error_token"
@@ -8,9 +9,6 @@ require "lrama/grammar/printer"
8
9
  require "lrama/grammar/reference"
9
10
  require "lrama/grammar/rule"
10
11
  require "lrama/grammar/rule_builder"
11
- require "lrama/grammar/parameterizing_rule_builder"
12
- require "lrama/grammar/parameterizing_rule_resolver"
13
- require "lrama/grammar/parameterizing_rule_rhs_builder"
14
12
  require "lrama/grammar/parameterizing_rule"
15
13
  require "lrama/grammar/symbol"
16
14
  require "lrama/grammar/type"
@@ -26,7 +24,7 @@ module Lrama
26
24
  :lex_param, :parse_param, :initial_action,
27
25
  :symbols, :types,
28
26
  :rules, :rule_builders,
29
- :sym_to_rules
27
+ :sym_to_rules, :no_stdlib
30
28
 
31
29
  def initialize(rule_counter)
32
30
  @rule_counter = rule_counter
@@ -40,19 +38,20 @@ module Lrama
40
38
  @rule_builders = []
41
39
  @rules = []
42
40
  @sym_to_rules = {}
43
- @parameterizing_resolver = ParameterizingRuleResolver.new
41
+ @parameterizing_rule_resolver = ParameterizingRule::Resolver.new
44
42
  @empty_symbol = nil
45
43
  @eof_symbol = nil
46
44
  @error_symbol = nil
47
45
  @undef_symbol = nil
48
46
  @accept_symbol = nil
49
47
  @aux = Auxiliary.new
48
+ @no_stdlib = false
50
49
 
51
50
  append_special_symbols
52
51
  end
53
52
 
54
53
  def add_percent_code(id:, code:)
55
- @percent_codes << PercentCode.new(id, code)
54
+ @percent_codes << PercentCode.new(id.s_value, code.s_value)
56
55
  end
57
56
 
58
57
  def add_printer(ident_or_tags:, token_code:, lineno:)
@@ -134,8 +133,16 @@ module Lrama
134
133
  @rule_builders << builder
135
134
  end
136
135
 
137
- def add_parameterizing_rule_builder(builder)
138
- @parameterizing_resolver.add_parameterizing_rule_builder(builder)
136
+ def add_parameterizing_rule(rule)
137
+ @parameterizing_rule_resolver.add_parameterizing_rule(rule)
138
+ end
139
+
140
+ def parameterizing_rules
141
+ @parameterizing_rule_resolver.rules
142
+ end
143
+
144
+ def insert_before_parameterizing_rules(rules)
145
+ @parameterizing_rule_resolver.rules = rules + @parameterizing_rule_resolver.rules
139
146
  end
140
147
 
141
148
  def prologue_first_lineno=(prologue_first_lineno)
@@ -171,7 +178,7 @@ module Lrama
171
178
 
172
179
  # TODO: More validation methods
173
180
  #
174
- # * Validaiton for no_declared_type_reference
181
+ # * Validation for no_declared_type_reference
175
182
  def validate!
176
183
  validate_symbol_number_uniqueness!
177
184
  validate_symbol_alias_name_uniqueness!
@@ -236,7 +243,7 @@ module Lrama
236
243
  def compute_nullable
237
244
  @rules.each do |rule|
238
245
  case
239
- when rule.rhs.empty?
246
+ when rule.empty_rule?
240
247
  rule.nullable = true
241
248
  when rule.rhs.any?(&:term)
242
249
  rule.nullable = false
@@ -319,7 +326,7 @@ module Lrama
319
326
 
320
327
  def setup_rules
321
328
  @rule_builders.each do |builder|
322
- builder.setup_rules(@parameterizing_resolver)
329
+ builder.setup_rules(@parameterizing_rule_resolver)
323
330
  end
324
331
  end
325
332
 
@@ -5,7 +5,7 @@ module Lrama
5
5
 
6
6
  def initialize(path, text)
7
7
  @path = path
8
- @text = text
8
+ @text = text.freeze
9
9
  end
10
10
 
11
11
  def ==(other)
@@ -2,16 +2,21 @@ module Lrama
2
2
  class Lexer
3
3
  class Token
4
4
  class InstantiateRule < Token
5
- attr_accessor :args
5
+ attr_reader :args, :lhs_tag
6
6
 
7
- def initialize(s_value:, alias_name: nil, location: nil, args: [])
7
+ def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
8
8
  super s_value: s_value, alias_name: alias_name, location: location
9
9
  @args = args
10
+ @lhs_tag = lhs_tag
10
11
  end
11
12
 
12
13
  def rule_name
13
14
  s_value
14
15
  end
16
+
17
+ def args_count
18
+ args.count
19
+ end
15
20
  end
16
21
  end
17
22
  end
@@ -46,6 +46,11 @@ module Lrama
46
46
  def last_column
47
47
  location.last_column
48
48
  end
49
+
50
+ def invalid_ref(ref, message)
51
+ location = self.location.partial_location(ref.first_column, ref.last_column)
52
+ raise location.generate_error_message(message)
53
+ end
49
54
  end
50
55
  end
51
56
  end
data/lib/lrama/lexer.rb CHANGED
@@ -5,9 +5,8 @@ require "lrama/lexer/token"
5
5
 
6
6
  module Lrama
7
7
  class Lexer
8
- attr_reader :head_line, :head_column
9
- attr_accessor :status
10
- attr_accessor :end_symbol
8
+ attr_reader :head_line, :head_column, :line
9
+ attr_accessor :status, :end_symbol
11
10
 
12
11
  SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
13
12
  PERCENT_TOKENS = %w(
@@ -30,6 +29,7 @@ module Lrama
30
29
  %empty
31
30
  %code
32
31
  %rule
32
+ %no-stdlib
33
33
  )
34
34
 
35
35
  def initialize(grammar_file)
@@ -50,10 +50,6 @@ module Lrama
50
50
  end
51
51
  end
52
52
 
53
- def line
54
- @line
55
- end
56
-
57
53
  def column
58
54
  @scanner.pos - @head
59
55
  end
data/lib/lrama/output.rb CHANGED
@@ -352,9 +352,9 @@ module Lrama
352
352
  # b4_percent_code_get
353
353
  def percent_code(name)
354
354
  @grammar.percent_codes.select do |percent_code|
355
- percent_code.id.s_value == name
355
+ percent_code.name == name
356
356
  end.map do |percent_code|
357
- percent_code.code.s_value
357
+ percent_code.code
358
358
  end.join
359
359
  end
360
360