lrama 0.5.8 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +6 -1
  3. data/.gitignore +7 -4
  4. data/Gemfile +10 -6
  5. data/README.md +3 -3
  6. data/Rakefile +15 -7
  7. data/Steepfile +15 -1
  8. data/lib/lrama/command.rb +6 -1
  9. data/lib/lrama/context.rb +1 -3
  10. data/lib/lrama/counterexamples/path.rb +0 -46
  11. data/lib/lrama/counterexamples/production_path.rb +17 -0
  12. data/lib/lrama/counterexamples/start_path.rb +21 -0
  13. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  14. data/lib/lrama/counterexamples.rb +3 -0
  15. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  16. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  17. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  18. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  19. data/lib/lrama/grammar/code.rb +9 -93
  20. data/lib/lrama/grammar/counter.rb +15 -0
  21. data/lib/lrama/grammar/error_token.rb +3 -3
  22. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +20 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +20 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +20 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +28 -0
  27. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +27 -0
  28. data/lib/lrama/grammar/parameterizing_rules/builder.rb +43 -0
  29. data/lib/lrama/grammar/percent_code.rb +12 -0
  30. data/lib/lrama/grammar/printer.rb +3 -3
  31. data/lib/lrama/grammar/reference.rb +7 -16
  32. data/lib/lrama/grammar/rule.rb +18 -2
  33. data/lib/lrama/grammar/rule_builder.rb +179 -0
  34. data/lib/lrama/grammar/symbol.rb +2 -2
  35. data/lib/lrama/grammar.rb +132 -302
  36. data/lib/lrama/lexer/location.rb +22 -0
  37. data/lib/lrama/lexer/token/char.rb +8 -0
  38. data/lib/lrama/lexer/token/ident.rb +8 -0
  39. data/lib/lrama/lexer/token/parameterizing.rb +34 -0
  40. data/lib/lrama/lexer/token/tag.rb +12 -0
  41. data/lib/lrama/lexer/token/user_code.rb +64 -0
  42. data/lib/lrama/lexer/token.rb +23 -63
  43. data/lib/lrama/lexer.rb +38 -37
  44. data/lib/lrama/option_parser.rb +2 -1
  45. data/lib/lrama/options.rb +2 -2
  46. data/lib/lrama/output.rb +11 -2
  47. data/lib/lrama/parser.rb +607 -488
  48. data/lib/lrama/report/profile.rb +1 -12
  49. data/lib/lrama/version.rb +1 -1
  50. data/parser.y +177 -96
  51. data/rbs_collection.lock.yaml +17 -1
  52. data/rbs_collection.yaml +1 -0
  53. data/sample/calc.y +3 -1
  54. data/sample/parse.y +5 -1
  55. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  56. data/sig/lrama/grammar/code.rbs +24 -0
  57. data/sig/lrama/grammar/counter.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +10 -0
  59. data/sig/lrama/grammar/percent_code.rbs +10 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +22 -0
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +41 -0
  65. data/sig/lrama/grammar.rbs +5 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/char.rbs +8 -0
  68. data/sig/lrama/lexer/token/ident.rbs +8 -0
  69. data/sig/lrama/lexer/token/parameterizing.rbs +15 -0
  70. data/sig/lrama/lexer/token/tag.rbs +9 -0
  71. data/sig/lrama/lexer/token/user_code.rbs +16 -0
  72. data/sig/lrama/lexer/token.rbs +22 -0
  73. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  74. data/template/bison/_yacc.h +2 -2
  75. data/template/bison/yacc.c +5 -2
  76. metadata +44 -4
  77. data/lib/lrama/lexer/token/type.rb +0 -8
  78. data/sig/lrama/lexer/token/type.rbs +0 -17
@@ -0,0 +1,20 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ class NonemptyList < Base
6
+ def build
7
+ validate_argument_number!
8
+
9
+ rules = []
10
+ nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{@token.s_value}")
11
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [nonempty_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
12
+ rules << Rule.new(id: @rule_counter.increment, _lhs: nonempty_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
13
+ rules << Rule.new(id: @rule_counter.increment, _lhs: nonempty_list_token, _rhs: [nonempty_list_token, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
14
+ rules
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,20 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ class Option < Base
6
+ def build
7
+ validate_argument_number!
8
+
9
+ rules = []
10
+ option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{@token.s_value}")
11
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [option_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
12
+ rules << Rule.new(id: @rule_counter.increment, _lhs: option_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
13
+ rules << Rule.new(id: @rule_counter.increment, _lhs: option_token, _rhs: [@token], token_code: @ser_code, precedence_sym: @precedence_sym, lineno: @line)
14
+ rules
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,28 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ class SeparatedList < Base
6
+ def initialize(token, rule_counter, lhs, user_code, precedence_sym, line)
7
+ super
8
+ @separator = @args[0]
9
+ @token = @args[1]
10
+ @expected_argument_num = 2
11
+ end
12
+
13
+ def build
14
+ validate_argument_number!
15
+
16
+ rules = []
17
+ separated_list_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_list_#{@token.s_value}")
18
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
19
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
20
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
21
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
22
+ rules
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,27 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ class SeparatedNonemptyList < Base
6
+ def initialize(token, rule_counter, lhs, user_code, precedence_sym, line)
7
+ super
8
+ @separator = @args[0]
9
+ @token = @args[1]
10
+ @expected_argument_num = 2
11
+ end
12
+
13
+ def build
14
+ validate_argument_number!
15
+
16
+ rules = []
17
+ separated_list_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_nonempty_list_#{@token.s_value}")
18
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @lhs, _rhs: [separated_list_token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
19
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [@token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
20
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_list_token, _rhs: [separated_list_token, @separator, @token], token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
21
+ rules
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,43 @@
1
+ require 'lrama/grammar/parameterizing_rules/builder/base'
2
+ require 'lrama/grammar/parameterizing_rules/builder/list'
3
+ require 'lrama/grammar/parameterizing_rules/builder/nonempty_list'
4
+ require 'lrama/grammar/parameterizing_rules/builder/option'
5
+ require 'lrama/grammar/parameterizing_rules/builder/separated_nonempty_list'
6
+ require 'lrama/grammar/parameterizing_rules/builder/separated_list'
7
+
8
+ module Lrama
9
+ class Grammar
10
+ class ParameterizingRules
11
+ class Builder
12
+ RULES = {
13
+ option: Lrama::Grammar::ParameterizingRules::Builder::Option,
14
+ "?": Lrama::Grammar::ParameterizingRules::Builder::Option,
15
+ nonempty_list: Lrama::Grammar::ParameterizingRules::Builder::NonemptyList,
16
+ "+": Lrama::Grammar::ParameterizingRules::Builder::NonemptyList,
17
+ list: Lrama::Grammar::ParameterizingRules::Builder::List,
18
+ "*": Lrama::Grammar::ParameterizingRules::Builder::List,
19
+ separated_nonempty_list: Lrama::Grammar::ParameterizingRules::Builder::SeparatedNonemptyList,
20
+ separated_list: Lrama::Grammar::ParameterizingRules::Builder::SeparatedList,
21
+ }
22
+
23
+ def initialize(token, rule_counter, lhs, user_code, precedence_sym, line)
24
+ @token = token
25
+ @key = token.s_value.to_sym
26
+ @rule_counter = rule_counter
27
+ @lhs = lhs
28
+ @user_code = user_code
29
+ @precedence_sym = precedence_sym
30
+ @line = line
31
+ end
32
+
33
+ def build
34
+ if RULES.key?(@key)
35
+ RULES[@key].new(@token, @rule_counter, @lhs, @user_code, @precedence_sym, @line).build
36
+ else
37
+ raise "Parameterizing rule does not exist. `#{@key}`"
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,12 @@
1
+ module Lrama
2
+ class Grammar
3
+ class PercentCode
4
+ attr_reader :id, :code
5
+
6
+ def initialize(id, code)
7
+ @id = id
8
+ @code = code
9
+ end
10
+ end
11
+ end
12
+ end
@@ -1,8 +1,8 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Printer < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true)
4
- def translated_code(member)
5
- code.translated_printer_code(member)
3
+ class Printer < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true)
4
+ def translated_code(tag)
5
+ Code::PrinterCode.new(type: :printer, token_code: token_code, tag: tag).translated_code
6
6
  end
7
7
  end
8
8
  end
@@ -1,21 +1,12 @@
1
- # type: :dollar or :at
2
- # ex_tag: "$<tag>1" (Optional)
3
-
4
1
  module Lrama
5
2
  class Grammar
6
- class Reference < Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true)
7
- def tag
8
- if ex_tag
9
- ex_tag
10
- else
11
- # FIXME: Remove this class check
12
- if referring_symbol.is_a?(Symbol)
13
- referring_symbol.tag
14
- else
15
- # Lrama::Lexer::Token (User_code) case
16
- nil
17
- end
18
- end
3
+ # type: :dollar or :at
4
+ # name: String (e.g. $$, $foo, $expr.right)
5
+ # index: Integer (e.g. $1)
6
+ # ex_tag: "$<tag>1" (Optional)
7
+ class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
8
+ def value
9
+ name || index
19
10
  end
20
11
  end
21
12
  end
@@ -1,6 +1,20 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Rule < Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true)
3
+ # _rhs holds original RHS element. Use rhs to refer to Symbol.
4
+ class Rule < Struct.new(:id, :_lhs, :lhs, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true)
5
+ attr_accessor :original_rule
6
+
7
+ def ==(other)
8
+ self.class == other.class &&
9
+ self.lhs == other.lhs &&
10
+ self.rhs == other.rhs &&
11
+ self.token_code == other.token_code &&
12
+ self.position_in_original_rule_rhs == other.position_in_original_rule_rhs &&
13
+ self.nullable == other.nullable &&
14
+ self.precedence_sym == other.precedence_sym &&
15
+ self.lineno == other.lineno
16
+ end
17
+
4
18
  # TODO: Change this to display_name
5
19
  def to_s
6
20
  l = lhs.id.s_value
@@ -32,7 +46,9 @@ module Lrama
32
46
  end
33
47
 
34
48
  def translated_code
35
- code&.translated_code
49
+ return nil unless token_code
50
+
51
+ Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code
36
52
  end
37
53
  end
38
54
  end
@@ -0,0 +1,179 @@
1
+ require 'lrama/grammar/parameterizing_rules/builder'
2
+
3
+ module Lrama
4
+ class Grammar
5
+ class RuleBuilder
6
+ attr_accessor :lhs, :line
7
+ attr_reader :rhs, :user_code, :precedence_sym
8
+
9
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
10
+ @rule_counter = rule_counter
11
+ @midrule_action_counter = midrule_action_counter
12
+ @position_in_original_rule_rhs = position_in_original_rule_rhs
13
+ @skip_preprocess_references = skip_preprocess_references
14
+
15
+ @lhs = nil
16
+ @rhs = []
17
+ @user_code = nil
18
+ @precedence_sym = nil
19
+ @line = nil
20
+ @rule_builders_for_derived_rules = []
21
+ end
22
+
23
+ def add_rhs(rhs)
24
+ if !@line
25
+ @line = rhs.line
26
+ end
27
+
28
+ flush_user_code
29
+
30
+ @rhs << rhs
31
+ end
32
+
33
+ def user_code=(user_code)
34
+ if !@line
35
+ @line = user_code.line
36
+ end
37
+
38
+ flush_user_code
39
+
40
+ @user_code = user_code
41
+ end
42
+
43
+ def precedence_sym=(precedence_sym)
44
+ flush_user_code
45
+
46
+ @precedence_sym = precedence_sym
47
+ end
48
+
49
+ def complete_input
50
+ freeze_rhs
51
+ end
52
+
53
+ def setup_rules
54
+ preprocess_references unless @skip_preprocess_references
55
+ process_rhs
56
+ build_rules
57
+ end
58
+
59
+ def parameterizing_rules
60
+ @parameterizing_rules
61
+ end
62
+
63
+ def midrule_action_rules
64
+ @midrule_action_rules
65
+ end
66
+
67
+ def rules
68
+ @rules
69
+ end
70
+
71
+ private
72
+
73
+ def freeze_rhs
74
+ @rhs.freeze
75
+ end
76
+
77
+ def preprocess_references
78
+ numberize_references
79
+ end
80
+
81
+ def build_rules
82
+ tokens = @replaced_rhs
83
+
84
+ # Expand Parameterizing rules
85
+ if tokens.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
86
+ @rules = @parameterizing_rules
87
+ @midrule_action_rules = []
88
+ else
89
+ rule = Rule.new(
90
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
91
+ position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
92
+ )
93
+ @rules = [rule]
94
+ @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
95
+ rule_builder.rules
96
+ end.flatten
97
+ @midrule_action_rules.each do |r|
98
+ r.original_rule = rule
99
+ end
100
+ end
101
+ end
102
+
103
+ # rhs is a mixture of variety type of tokens like `Ident`, `Parameterizing`, `UserCode` and so on.
104
+ # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
105
+ def process_rhs
106
+ return if @replaced_rhs
107
+
108
+ @replaced_rhs = []
109
+ @parameterizing_rules = []
110
+
111
+ rhs.each_with_index do |token, i|
112
+ case token
113
+ when Lrama::Lexer::Token::Char
114
+ @replaced_rhs << token
115
+ when Lrama::Lexer::Token::Ident
116
+ @replaced_rhs << token
117
+ when Lrama::Lexer::Token::Parameterizing
118
+ @parameterizing_rules = ParameterizingRules::Builder.new(token, @rule_counter, lhs, user_code, precedence_sym, line).build
119
+ @replaced_rhs << token
120
+ when Lrama::Lexer::Token::UserCode
121
+ prefix = token.referred ? "@" : "$@"
122
+ new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
123
+ @replaced_rhs << new_token
124
+
125
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
126
+ rule_builder.lhs = new_token
127
+ rule_builder.user_code = token
128
+ rule_builder.complete_input
129
+ rule_builder.setup_rules
130
+
131
+ @rule_builders_for_derived_rules << rule_builder
132
+ else
133
+ raise "Unexpected token. #{token}"
134
+ end
135
+ end
136
+ end
137
+
138
+ def numberize_references
139
+ # Bison n'th component is 1-origin
140
+ (rhs + [user_code]).compact.each.with_index(1) do |token, i|
141
+ next unless token.is_a?(Lrama::Lexer::Token::UserCode)
142
+
143
+ token.references.each do |ref|
144
+ ref_name = ref.name
145
+ if ref_name && ref_name != '$'
146
+ if lhs.referred_by?(ref_name)
147
+ ref.name = '$'
148
+ else
149
+ candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
150
+
151
+ raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2
152
+ raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first
153
+
154
+ ref.index = referring_symbol[1] + 1
155
+ end
156
+ end
157
+
158
+ # TODO: Need to check index of @ too?
159
+ next if ref.type == :at
160
+
161
+ if ref.index
162
+ # TODO: Prohibit $0 even so Bison allows it?
163
+ # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
164
+ raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
165
+ rhs[ref.index - 1].referred = true
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ def flush_user_code
172
+ if c = @user_code
173
+ @rhs << c
174
+ @user_code = nil
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
@@ -47,9 +47,9 @@ module Lrama
47
47
  name = "YYACCEPT"
48
48
  when eof_symbol?
49
49
  name = "YYEOF"
50
- when term? && id.type == Token::Char
50
+ when term? && id.is_a?(Lrama::Lexer::Token::Char)
51
51
  name = number.to_s + display_name
52
- when term? && id.type == Token::Ident
52
+ when term? && id.is_a?(Lrama::Lexer::Token::Ident)
53
53
  name = id.s_value
54
54
  when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
55
55
  name = number.to_s + id.s_value