lrama 0.5.9 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/test.yaml +25 -0
  3. data/.gitignore +7 -4
  4. data/Gemfile +9 -5
  5. data/Rakefile +13 -0
  6. data/Steepfile +13 -11
  7. data/lib/lrama/context.rb +1 -3
  8. data/lib/lrama/counterexamples/path.rb +0 -46
  9. data/lib/lrama/counterexamples/production_path.rb +17 -0
  10. data/lib/lrama/counterexamples/start_path.rb +21 -0
  11. data/lib/lrama/counterexamples/transition_path.rb +17 -0
  12. data/lib/lrama/counterexamples.rb +3 -0
  13. data/lib/lrama/grammar/code/initial_action_code.rb +28 -0
  14. data/lib/lrama/grammar/code/no_reference_code.rb +24 -0
  15. data/lib/lrama/grammar/code/printer_code.rb +34 -0
  16. data/lib/lrama/grammar/code/rule_action.rb +62 -0
  17. data/lib/lrama/grammar/code.rb +9 -93
  18. data/lib/lrama/grammar/counter.rb +15 -0
  19. data/lib/lrama/grammar/error_token.rb +3 -3
  20. data/lib/lrama/grammar/parameterizing_rules/builder/base.rb +36 -0
  21. data/lib/lrama/grammar/parameterizing_rules/builder/list.rb +28 -0
  22. data/lib/lrama/grammar/parameterizing_rules/builder/nonempty_list.rb +28 -0
  23. data/lib/lrama/grammar/parameterizing_rules/builder/option.rb +28 -0
  24. data/lib/lrama/grammar/parameterizing_rules/builder/separated_list.rb +39 -0
  25. data/lib/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rb +34 -0
  26. data/lib/lrama/grammar/parameterizing_rules/builder.rb +60 -0
  27. data/lib/lrama/grammar/printer.rb +3 -3
  28. data/lib/lrama/grammar/reference.rb +7 -16
  29. data/lib/lrama/grammar/rule.rb +19 -2
  30. data/lib/lrama/grammar/rule_builder.rb +177 -0
  31. data/lib/lrama/grammar/symbol.rb +16 -2
  32. data/lib/lrama/grammar/type.rb +6 -0
  33. data/lib/lrama/grammar.rb +115 -325
  34. data/lib/lrama/lexer/location.rb +22 -0
  35. data/lib/lrama/lexer/token/parameterizing.rb +18 -3
  36. data/lib/lrama/lexer/token/tag.rb +4 -0
  37. data/lib/lrama/lexer/token/user_code.rb +54 -4
  38. data/lib/lrama/lexer/token.rb +35 -10
  39. data/lib/lrama/lexer.rb +32 -31
  40. data/lib/lrama/options.rb +1 -2
  41. data/lib/lrama/output.rb +2 -2
  42. data/lib/lrama/parser.rb +514 -424
  43. data/lib/lrama/report/profile.rb +1 -12
  44. data/lib/lrama/version.rb +1 -1
  45. data/lib/lrama.rb +0 -1
  46. data/parser.y +111 -52
  47. data/rbs_collection.lock.yaml +6 -8
  48. data/rbs_collection.yaml +1 -0
  49. data/sig/lrama/grammar/code/printer_code.rbs +15 -0
  50. data/sig/lrama/grammar/code.rbs +24 -0
  51. data/sig/lrama/grammar/counter.rbs +11 -0
  52. data/sig/lrama/grammar/error_token.rbs +11 -0
  53. data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +26 -0
  54. data/sig/lrama/grammar/parameterizing_rules/builder/list.rbs +10 -0
  55. data/sig/lrama/grammar/parameterizing_rules/builder/nonempty_list.rbs +10 -0
  56. data/sig/lrama/grammar/parameterizing_rules/builder/option.rbs +10 -0
  57. data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +11 -0
  58. data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +11 -0
  59. data/sig/lrama/grammar/parameterizing_rules/builder.rbs +23 -0
  60. data/sig/lrama/grammar/precedence.rbs +11 -0
  61. data/sig/lrama/grammar/printer.rbs +11 -0
  62. data/sig/lrama/grammar/reference.rbs +6 -6
  63. data/sig/lrama/grammar/rule.rbs +13 -0
  64. data/sig/lrama/grammar/rule_builder.rbs +42 -0
  65. data/sig/lrama/grammar/symbol.rbs +37 -0
  66. data/sig/lrama/lexer/location.rbs +14 -0
  67. data/sig/lrama/lexer/token/parameterizing.rbs +9 -0
  68. data/sig/lrama/lexer/token/tag.rbs +1 -0
  69. data/sig/lrama/lexer/token/user_code.rbs +8 -1
  70. data/sig/lrama/lexer/token.rbs +9 -4
  71. data/sig/stdlib/strscan/string_scanner.rbs +5 -0
  72. data/template/bison/yacc.c +5 -2
  73. metadata +38 -3
  74. data/lib/lrama/type.rb +0 -4
@@ -0,0 +1,28 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ # Builder for nonempty list of general parameterizing rules
6
+ class NonemptyList < Base
7
+
8
+ # program: nonempty_list(number)
9
+ #
10
+ # =>
11
+ #
12
+ # program: nonempty_list_number
13
+ # nonempty_list_number: number
14
+ # nonempty_list_number: nonempty_list_number number
15
+ def build
16
+ validate_argument_number!
17
+
18
+ rules = []
19
+ @build_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{@token.s_value}")
20
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [@token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
21
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [@build_token, @token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
22
+ rules
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,28 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ # Builder for option of general parameterizing rules
6
+ class Option < Base
7
+
8
+ # program: option(number)
9
+ #
10
+ # =>
11
+ #
12
+ # program: option_number
13
+ # option_number: ε
14
+ # option_number: number
15
+ def build
16
+ validate_argument_number!
17
+
18
+ rules = []
19
+ @build_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{@token.s_value}")
20
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
21
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [@token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
22
+ rules
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,39 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ # Builder for separated list of general parameterizing rules
6
+ class SeparatedList < Base
7
+ def initialize(token, rule_counter, lhs_tag, user_code, precedence_sym, line)
8
+ super
9
+ @separator = @args[0]
10
+ @token = @args[1]
11
+ @expected_argument_num = 2
12
+ end
13
+
14
+ # program: separated_list(',', number)
15
+ #
16
+ # =>
17
+ #
18
+ # program: separated_list_number
19
+ # separated_list_number: ε
20
+ # separated_list_number: separated_nonempty_list_number
21
+ # separated_nonempty_list_number: number
22
+ # separated_nonempty_list_number: separated_nonempty_list_number ',' number
23
+ def build
24
+ validate_argument_number!
25
+
26
+ rules = []
27
+ @build_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_list_#{@token.s_value}")
28
+ separated_nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_nonempty_list_#{@token.s_value}")
29
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
30
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [separated_nonempty_list_token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
31
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_nonempty_list_token, _rhs: [@token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
32
+ rules << Rule.new(id: @rule_counter.increment, _lhs: separated_nonempty_list_token, _rhs: [separated_nonempty_list_token, @separator, @token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
33
+ rules
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,34 @@
1
+ module Lrama
2
+ class Grammar
3
+ class ParameterizingRules
4
+ class Builder
5
+ # Builder for separated nonempty list of general parameterizing rules
6
+ class SeparatedNonemptyList < Base
7
+ def initialize(token, rule_counter, lhs_tag, user_code, precedence_sym, line)
8
+ super
9
+ @separator = @args[0]
10
+ @token = @args[1]
11
+ @expected_argument_num = 2
12
+ end
13
+
14
+ # program: separated_nonempty_list(',', number)
15
+ #
16
+ # =>
17
+ #
18
+ # program: separated_nonempty_list_number
19
+ # separated_nonempty_list_number: number
20
+ # separated_nonempty_list_number: separated_nonempty_list_number ',' number
21
+ def build
22
+ validate_argument_number!
23
+
24
+ rules = []
25
+ @build_token = Lrama::Lexer::Token::Ident.new(s_value: "separated_nonempty_list_#{@token.s_value}")
26
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [@token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
27
+ rules << Rule.new(id: @rule_counter.increment, _lhs: @build_token, _rhs: [@build_token, @separator, @token], lhs_tag: @lhs_tag, token_code: @user_code, precedence_sym: @precedence_sym, lineno: @line)
28
+ rules
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,60 @@
1
+ require 'lrama/grammar/parameterizing_rules/builder/base'
2
+ require 'lrama/grammar/parameterizing_rules/builder/list'
3
+ require 'lrama/grammar/parameterizing_rules/builder/nonempty_list'
4
+ require 'lrama/grammar/parameterizing_rules/builder/option'
5
+ require 'lrama/grammar/parameterizing_rules/builder/separated_nonempty_list'
6
+ require 'lrama/grammar/parameterizing_rules/builder/separated_list'
7
+
8
+ module Lrama
9
+ class Grammar
10
+ class ParameterizingRules
11
+ # Builder for parameterizing rules
12
+ class Builder
13
+ RULES = {
14
+ option: Lrama::Grammar::ParameterizingRules::Builder::Option,
15
+ "?": Lrama::Grammar::ParameterizingRules::Builder::Option,
16
+ nonempty_list: Lrama::Grammar::ParameterizingRules::Builder::NonemptyList,
17
+ "+": Lrama::Grammar::ParameterizingRules::Builder::NonemptyList,
18
+ list: Lrama::Grammar::ParameterizingRules::Builder::List,
19
+ "*": Lrama::Grammar::ParameterizingRules::Builder::List,
20
+ separated_nonempty_list: Lrama::Grammar::ParameterizingRules::Builder::SeparatedNonemptyList,
21
+ separated_list: Lrama::Grammar::ParameterizingRules::Builder::SeparatedList,
22
+ }
23
+
24
+ def initialize(token, rule_counter, lhs_tag, user_code, precedence_sym, line)
25
+ @token = token
26
+ @key = token.s_value.to_sym
27
+ @rule_counter = rule_counter
28
+ @lhs_tag = lhs_tag
29
+ @user_code = user_code
30
+ @precedence_sym = precedence_sym
31
+ @line = line
32
+ @builder = nil
33
+ end
34
+
35
+ def build
36
+ create_builder
37
+ @builder.build
38
+ end
39
+
40
+ def build_token
41
+ create_builder
42
+ @builder.build_token
43
+ end
44
+
45
+ private
46
+
47
+ def create_builder
48
+ unless @builder
49
+ validate_key!
50
+ @builder = RULES[@key].new(@token, @rule_counter, @lhs_tag, @user_code, @precedence_sym, @line)
51
+ end
52
+ end
53
+
54
+ def validate_key!
55
+ raise "Parameterizing rule does not exist. `#{@key}`" unless RULES.key?(@key)
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -1,8 +1,8 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Printer < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true)
4
- def translated_code(member)
5
- code.translated_printer_code(member)
3
+ class Printer < Struct.new(:ident_or_tags, :token_code, :lineno, keyword_init: true)
4
+ def translated_code(tag)
5
+ Code::PrinterCode.new(type: :printer, token_code: token_code, tag: tag).translated_code
6
6
  end
7
7
  end
8
8
  end
@@ -1,21 +1,12 @@
1
- # type: :dollar or :at
2
- # ex_tag: "$<tag>1" (Optional)
3
-
4
1
  module Lrama
5
2
  class Grammar
6
- class Reference < Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true)
7
- def tag
8
- if ex_tag
9
- ex_tag
10
- else
11
- # FIXME: Remove this class check
12
- if referring_symbol.is_a?(Symbol)
13
- referring_symbol.tag
14
- else
15
- # Lrama::Lexer::Token (User_code) case
16
- nil
17
- end
18
- end
3
+ # type: :dollar or :at
4
+ # name: String (e.g. $$, $foo, $expr.right)
5
+ # index: Integer (e.g. $1)
6
+ # ex_tag: "$<tag>1" (Optional)
7
+ class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
8
+ def value
9
+ name || index
19
10
  end
20
11
  end
21
12
  end
@@ -1,6 +1,21 @@
1
1
  module Lrama
2
2
  class Grammar
3
- class Rule < Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true)
3
+ # _rhs holds original RHS element. Use rhs to refer to Symbol.
4
+ class Rule < Struct.new(:id, :_lhs, :lhs, :lhs_tag, :_rhs, :rhs, :token_code, :position_in_original_rule_rhs, :nullable, :precedence_sym, :lineno, keyword_init: true)
5
+ attr_accessor :original_rule
6
+
7
+ def ==(other)
8
+ self.class == other.class &&
9
+ self.lhs == other.lhs &&
10
+ self.lhs_tag == other.lhs_tag &&
11
+ self.rhs == other.rhs &&
12
+ self.token_code == other.token_code &&
13
+ self.position_in_original_rule_rhs == other.position_in_original_rule_rhs &&
14
+ self.nullable == other.nullable &&
15
+ self.precedence_sym == other.precedence_sym &&
16
+ self.lineno == other.lineno
17
+ end
18
+
4
19
  # TODO: Change this to display_name
5
20
  def to_s
6
21
  l = lhs.id.s_value
@@ -32,7 +47,9 @@ module Lrama
32
47
  end
33
48
 
34
49
  def translated_code
35
- code&.translated_code
50
+ return nil unless token_code
51
+
52
+ Code::RuleAction.new(type: :rule_action, token_code: token_code, rule: self).translated_code
36
53
  end
37
54
  end
38
55
  end
@@ -0,0 +1,177 @@
1
+ require 'lrama/grammar/parameterizing_rules/builder'
2
+
3
+ module Lrama
4
+ class Grammar
5
+ class RuleBuilder
6
+ attr_accessor :lhs, :lhs_tag, :line
7
+ attr_reader :rhs, :user_code, :precedence_sym
8
+
9
+ def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
10
+ @rule_counter = rule_counter
11
+ @midrule_action_counter = midrule_action_counter
12
+ @position_in_original_rule_rhs = position_in_original_rule_rhs
13
+ @skip_preprocess_references = skip_preprocess_references
14
+
15
+ @lhs = nil
16
+ @rhs = []
17
+ @lhs_tag = nil
18
+ @user_code = nil
19
+ @precedence_sym = nil
20
+ @line = nil
21
+ @rule_builders_for_derived_rules = []
22
+ end
23
+
24
+ def add_rhs(rhs)
25
+ if !@line
26
+ @line = rhs.line
27
+ end
28
+
29
+ flush_user_code
30
+
31
+ @rhs << rhs
32
+ end
33
+
34
+ def user_code=(user_code)
35
+ if !@line
36
+ @line = user_code.line
37
+ end
38
+
39
+ flush_user_code
40
+
41
+ @user_code = user_code
42
+ end
43
+
44
+ def precedence_sym=(precedence_sym)
45
+ flush_user_code
46
+
47
+ @precedence_sym = precedence_sym
48
+ end
49
+
50
+ def complete_input
51
+ freeze_rhs
52
+ end
53
+
54
+ def setup_rules
55
+ preprocess_references unless @skip_preprocess_references
56
+ process_rhs
57
+ build_rules
58
+ end
59
+
60
+ def parameterizing_rules
61
+ @parameterizing_rules
62
+ end
63
+
64
+ def midrule_action_rules
65
+ @midrule_action_rules
66
+ end
67
+
68
+ def rules
69
+ @rules
70
+ end
71
+
72
+ private
73
+
74
+ def freeze_rhs
75
+ @rhs.freeze
76
+ end
77
+
78
+ def preprocess_references
79
+ numberize_references
80
+ end
81
+
82
+ def build_rules
83
+ tokens = @replaced_rhs
84
+
85
+ rule = Rule.new(
86
+ id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
87
+ position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
88
+ )
89
+ @rules = [rule]
90
+ @midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
91
+ rule_builder.rules
92
+ end.flatten
93
+ @midrule_action_rules.each do |r|
94
+ r.original_rule = rule
95
+ end
96
+ end
97
+
98
+ # rhs is a mixture of variety type of tokens like `Ident`, `Parameterizing`, `UserCode` and so on.
99
+ # `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
100
+ def process_rhs
101
+ return if @replaced_rhs
102
+
103
+ @replaced_rhs = []
104
+ @parameterizing_rules = []
105
+
106
+ rhs.each_with_index do |token, i|
107
+ case token
108
+ when Lrama::Lexer::Token::Char
109
+ @replaced_rhs << token
110
+ when Lrama::Lexer::Token::Ident
111
+ @replaced_rhs << token
112
+ when Lrama::Lexer::Token::Parameterizing
113
+ parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
114
+ parameterizing.build.each do |r|
115
+ @parameterizing_rules << r
116
+ end
117
+ @replaced_rhs << parameterizing.build_token
118
+ when Lrama::Lexer::Token::UserCode
119
+ prefix = token.referred ? "@" : "$@"
120
+ new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
121
+ @replaced_rhs << new_token
122
+
123
+ rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
124
+ rule_builder.lhs = new_token
125
+ rule_builder.user_code = token
126
+ rule_builder.complete_input
127
+ rule_builder.setup_rules
128
+
129
+ @rule_builders_for_derived_rules << rule_builder
130
+ else
131
+ raise "Unexpected token. #{token}"
132
+ end
133
+ end
134
+ end
135
+
136
+ def numberize_references
137
+ # Bison n'th component is 1-origin
138
+ (rhs + [user_code]).compact.each.with_index(1) do |token, i|
139
+ next unless token.is_a?(Lrama::Lexer::Token::UserCode)
140
+
141
+ token.references.each do |ref|
142
+ ref_name = ref.name
143
+ if ref_name && ref_name != '$'
144
+ if lhs.referred_by?(ref_name)
145
+ ref.name = '$'
146
+ else
147
+ candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
148
+
149
+ raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2
150
+ raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first
151
+
152
+ ref.index = referring_symbol[1] + 1
153
+ end
154
+ end
155
+
156
+ # TODO: Need to check index of @ too?
157
+ next if ref.type == :at
158
+
159
+ if ref.index
160
+ # TODO: Prohibit $0 even so Bison allows it?
161
+ # See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
162
+ raise "Can not refer following component. #{ref.index} >= #{i}. #{token}" if ref.index >= i
163
+ rhs[ref.index - 1].referred = true
164
+ end
165
+ end
166
+ end
167
+ end
168
+
169
+ def flush_user_code
170
+ if c = @user_code
171
+ @rhs << c
172
+ @user_code = nil
173
+ end
174
+ end
175
+ end
176
+ end
177
+ end
@@ -6,10 +6,23 @@
6
6
 
7
7
  module Lrama
8
8
  class Grammar
9
- class Symbol < Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, :error_token, keyword_init: true)
10
- attr_accessor :first_set, :first_set_bitmap
9
+ class Symbol
10
+ attr_accessor :id, :alias_name, :tag, :number, :token_id, :nullable, :precedence, :printer, :error_token, :first_set, :first_set_bitmap
11
+ attr_reader :term
11
12
  attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
12
13
 
14
+ def initialize(id:, alias_name: nil, number: nil, tag: nil, term:, token_id: nil, nullable: nil, precedence: nil, printer: nil)
15
+ @id = id
16
+ @alias_name = alias_name
17
+ @number = number
18
+ @tag = tag
19
+ @term = term
20
+ @token_id = token_id
21
+ @nullable = nullable
22
+ @precedence = precedence
23
+ @printer = printer
24
+ end
25
+
13
26
  def term?
14
27
  term
15
28
  end
@@ -41,6 +54,7 @@ module Lrama
41
54
  # name for yysymbol_kind_t
42
55
  #
43
56
  # See: b4_symbol_kind_base
57
+ # @type var name: String
44
58
  def enum_name
45
59
  case
46
60
  when accept_symbol?
@@ -0,0 +1,6 @@
1
+ module Lrama
2
+ class Grammar
3
+ class Type < Struct.new(:id, :tag, keyword_init: true)
4
+ end
5
+ end
6
+ end