lrama 0.6.9 → 0.6.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +24 -1
- data/.gitignore +2 -0
- data/Gemfile +6 -3
- data/NEWS.md +269 -14
- data/README.md +41 -4
- data/Rakefile +2 -0
- data/Steepfile +9 -17
- data/doc/development/compressed_state_table/main.md +635 -0
- data/doc/development/compressed_state_table/parse.output +174 -0
- data/doc/development/compressed_state_table/parse.y +22 -0
- data/doc/development/compressed_state_table/parser.rb +282 -0
- data/exe/lrama +1 -0
- data/lib/lrama/bitmap.rb +3 -1
- data/lib/lrama/command.rb +8 -14
- data/lib/lrama/context.rb +11 -9
- data/lib/lrama/counterexamples/derivation.rb +8 -5
- data/lib/lrama/counterexamples/example.rb +9 -4
- data/lib/lrama/counterexamples/path.rb +6 -0
- data/lib/lrama/counterexamples/production_path.rb +2 -0
- data/lib/lrama/counterexamples/start_path.rb +2 -0
- data/lib/lrama/counterexamples/state_item.rb +2 -0
- data/lib/lrama/counterexamples/transition_path.rb +2 -0
- data/lib/lrama/counterexamples/triple.rb +2 -0
- data/lib/lrama/counterexamples.rb +36 -24
- data/lib/lrama/diagnostics.rb +36 -0
- data/lib/lrama/digraph.rb +2 -0
- data/lib/lrama/grammar/auxiliary.rb +2 -0
- data/lib/lrama/grammar/binding.rb +12 -1
- data/lib/lrama/grammar/code/destructor_code.rb +2 -0
- data/lib/lrama/grammar/code/initial_action_code.rb +2 -0
- data/lib/lrama/grammar/code/no_reference_code.rb +2 -0
- data/lib/lrama/grammar/code/printer_code.rb +2 -0
- data/lib/lrama/grammar/code/rule_action.rb +7 -3
- data/lib/lrama/grammar/code.rb +7 -5
- data/lib/lrama/grammar/counter.rb +2 -0
- data/lib/lrama/grammar/destructor.rb +2 -0
- data/lib/lrama/grammar/error_token.rb +2 -0
- data/lib/lrama/grammar/parameterizing_rule/resolver.rb +7 -1
- data/lib/lrama/grammar/parameterizing_rule/rhs.rb +6 -3
- data/lib/lrama/grammar/parameterizing_rule/rule.rb +6 -0
- data/lib/lrama/grammar/parameterizing_rule.rb +2 -0
- data/lib/lrama/grammar/percent_code.rb +2 -0
- data/lib/lrama/grammar/precedence.rb +2 -0
- data/lib/lrama/grammar/printer.rb +2 -0
- data/lib/lrama/grammar/reference.rb +2 -0
- data/lib/lrama/grammar/rule.rb +10 -3
- data/lib/lrama/grammar/rule_builder.rb +64 -65
- data/lib/lrama/grammar/symbol.rb +2 -0
- data/lib/lrama/grammar/symbols/resolver.rb +9 -1
- data/lib/lrama/grammar/symbols.rb +2 -0
- data/lib/lrama/grammar/type.rb +2 -0
- data/lib/lrama/grammar/union.rb +2 -0
- data/lib/lrama/grammar.rb +53 -32
- data/lib/lrama/grammar_validator.rb +37 -0
- data/lib/lrama/lexer/grammar_file.rb +2 -0
- data/lib/lrama/lexer/location.rb +2 -0
- data/lib/lrama/lexer/token/char.rb +2 -0
- data/lib/lrama/lexer/token/ident.rb +2 -0
- data/lib/lrama/lexer/token/instantiate_rule.rb +2 -0
- data/lib/lrama/lexer/token/tag.rb +2 -0
- data/lib/lrama/lexer/token/user_code.rb +4 -2
- data/lib/lrama/lexer/token.rb +7 -5
- data/lib/lrama/lexer.rb +12 -8
- data/lib/lrama/{warning.rb → logger.rb} +5 -13
- data/lib/lrama/option_parser.rb +58 -33
- data/lib/lrama/options.rb +5 -2
- data/lib/lrama/output.rb +38 -69
- data/lib/lrama/parser.rb +677 -773
- data/lib/lrama/report/duration.rb +2 -0
- data/lib/lrama/report/profile.rb +2 -0
- data/lib/lrama/report.rb +4 -2
- data/lib/lrama/state/reduce.rb +4 -2
- data/lib/lrama/state/reduce_reduce_conflict.rb +2 -0
- data/lib/lrama/state/resolved_conflict.rb +3 -1
- data/lib/lrama/state/shift.rb +2 -0
- data/lib/lrama/state/shift_reduce_conflict.rb +2 -0
- data/lib/lrama/state.rb +7 -5
- data/lib/lrama/states/item.rb +5 -3
- data/lib/lrama/states.rb +18 -46
- data/lib/lrama/states_reporter.rb +60 -19
- data/lib/lrama/trace_reporter.rb +30 -0
- data/lib/lrama/version.rb +3 -1
- data/lib/lrama.rb +22 -17
- data/lrama.gemspec +3 -1
- data/parser.y +129 -237
- data/rbs_collection.lock.yaml +10 -2
- data/sig/lrama/counterexamples/derivation.rbs +33 -0
- data/sig/lrama/counterexamples/example.rbs +45 -0
- data/sig/lrama/counterexamples/path.rbs +21 -0
- data/sig/lrama/counterexamples/production_path.rbs +11 -0
- data/sig/lrama/counterexamples/start_path.rbs +13 -0
- data/sig/lrama/counterexamples/state_item.rbs +10 -0
- data/sig/lrama/counterexamples/transition_path.rbs +11 -0
- data/sig/lrama/counterexamples/triple.rbs +20 -0
- data/sig/lrama/counterexamples.rbs +29 -0
- data/sig/lrama/grammar/auxiliary.rbs +10 -0
- data/sig/lrama/grammar/binding.rbs +4 -0
- data/sig/lrama/grammar/code/destructor_code.rbs +3 -4
- data/sig/lrama/grammar/code/initial_action_code.rbs +15 -0
- data/sig/lrama/grammar/code/no_reference_code.rbs +15 -0
- data/sig/lrama/grammar/code/printer_code.rbs +3 -4
- data/sig/lrama/grammar/code/rule_action.rbs +19 -0
- data/sig/lrama/grammar/code.rbs +3 -3
- data/sig/lrama/grammar/destructor.rbs +3 -1
- data/sig/lrama/grammar/error_token.rbs +4 -2
- data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +2 -1
- data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +1 -1
- data/sig/lrama/grammar/precedence.rbs +3 -1
- data/sig/lrama/grammar/printer.rbs +3 -1
- data/sig/lrama/grammar/rule.rbs +35 -3
- data/sig/lrama/grammar/rule_builder.rbs +10 -9
- data/sig/lrama/grammar/symbol.rbs +6 -6
- data/sig/lrama/grammar/symbols/resolver.rbs +24 -5
- data/sig/lrama/grammar/type.rbs +2 -2
- data/sig/lrama/grammar/union.rbs +12 -0
- data/sig/lrama/grammar.rbs +104 -1
- data/sig/lrama/options.rbs +3 -2
- data/sig/lrama/state/reduce.rbs +20 -0
- data/sig/lrama/state/reduce_reduce_conflict.rbs +13 -0
- data/sig/lrama/state/resolved_conflict.rbs +14 -0
- data/sig/lrama/state/shift.rbs +14 -0
- data/sig/lrama/state/shift_reduce_conflict.rbs +13 -0
- data/sig/lrama/state.rbs +79 -0
- data/sig/lrama/states/item.rbs +30 -0
- data/sig/lrama/states.rbs +101 -0
- data/template/bison/yacc.c +24 -19
- metadata +32 -6
- data/sample/calc.output +0 -263
- data/sample/calc.y +0 -101
- data/sample/parse.y +0 -59
@@ -1,12 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Lrama
|
2
4
|
class Grammar
|
3
5
|
class RuleBuilder
|
4
6
|
attr_accessor :lhs, :line
|
5
7
|
attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
|
6
8
|
|
7
|
-
def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
|
9
|
+
def initialize(rule_counter, midrule_action_counter, parameterizing_rule_resolver, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
|
8
10
|
@rule_counter = rule_counter
|
9
11
|
@midrule_action_counter = midrule_action_counter
|
12
|
+
@parameterizing_rule_resolver = parameterizing_rule_resolver
|
10
13
|
@position_in_original_rule_rhs = position_in_original_rule_rhs
|
11
14
|
@skip_preprocess_references = skip_preprocess_references
|
12
15
|
|
@@ -19,16 +22,12 @@ module Lrama
|
|
19
22
|
@rules = []
|
20
23
|
@rule_builders_for_parameterizing_rules = []
|
21
24
|
@rule_builders_for_derived_rules = []
|
22
|
-
@rule_builders_for_inline_rules = []
|
23
25
|
@parameterizing_rules = []
|
24
|
-
@inline_rules = []
|
25
26
|
@midrule_action_rules = []
|
26
27
|
end
|
27
28
|
|
28
29
|
def add_rhs(rhs)
|
29
|
-
|
30
|
-
@line = rhs.line
|
31
|
-
end
|
30
|
+
@line ||= rhs.line
|
32
31
|
|
33
32
|
flush_user_code
|
34
33
|
|
@@ -36,9 +35,7 @@ module Lrama
|
|
36
35
|
end
|
37
36
|
|
38
37
|
def user_code=(user_code)
|
39
|
-
|
40
|
-
@line = user_code&.line
|
41
|
-
end
|
38
|
+
@line ||= user_code&.line
|
42
39
|
|
43
40
|
flush_user_code
|
44
41
|
|
@@ -55,18 +52,41 @@ module Lrama
|
|
55
52
|
freeze_rhs
|
56
53
|
end
|
57
54
|
|
58
|
-
def setup_rules
|
55
|
+
def setup_rules
|
59
56
|
preprocess_references unless @skip_preprocess_references
|
60
|
-
|
61
|
-
resolve_inline(parameterizing_rule_resolver)
|
62
|
-
else
|
63
|
-
process_rhs(parameterizing_rule_resolver)
|
64
|
-
end
|
57
|
+
process_rhs
|
65
58
|
build_rules
|
66
59
|
end
|
67
60
|
|
68
61
|
def rules
|
69
|
-
@parameterizing_rules + @
|
62
|
+
@parameterizing_rules + @midrule_action_rules + @rules
|
63
|
+
end
|
64
|
+
|
65
|
+
def has_inline_rules?
|
66
|
+
rhs.any? { |token| @parameterizing_rule_resolver.find_inline(token) }
|
67
|
+
end
|
68
|
+
|
69
|
+
def resolve_inline_rules
|
70
|
+
resolved_builders = [] #: Array[RuleBuilder]
|
71
|
+
rhs.each_with_index do |token, i|
|
72
|
+
if (inline_rule = @parameterizing_rule_resolver.find_inline(token))
|
73
|
+
inline_rule.rhs_list.each do |inline_rhs|
|
74
|
+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: lhs_tag)
|
75
|
+
if token.is_a?(Lexer::Token::InstantiateRule)
|
76
|
+
resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule, token.args))
|
77
|
+
else
|
78
|
+
resolve_inline_rhs(rule_builder, inline_rhs, i)
|
79
|
+
end
|
80
|
+
rule_builder.lhs = lhs
|
81
|
+
rule_builder.line = line
|
82
|
+
rule_builder.precedence_sym = precedence_sym
|
83
|
+
rule_builder.user_code = replace_inline_user_code(inline_rhs, i)
|
84
|
+
resolved_builders << rule_builder
|
85
|
+
end
|
86
|
+
break
|
87
|
+
end
|
88
|
+
end
|
89
|
+
resolved_builders
|
70
90
|
end
|
71
91
|
|
72
92
|
private
|
@@ -82,31 +102,25 @@ module Lrama
|
|
82
102
|
def build_rules
|
83
103
|
tokens = @replaced_rhs
|
84
104
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
r.original_rule = rule
|
99
|
-
end
|
100
|
-
else
|
101
|
-
@inline_rules = @rule_builders_for_inline_rules.map do |rule_builder|
|
102
|
-
rule_builder.rules
|
103
|
-
end.flatten
|
105
|
+
rule = Rule.new(
|
106
|
+
id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
|
107
|
+
position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
|
108
|
+
)
|
109
|
+
@rules = [rule]
|
110
|
+
@parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
|
111
|
+
rule_builder.rules
|
112
|
+
end.flatten
|
113
|
+
@midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
|
114
|
+
rule_builder.rules
|
115
|
+
end.flatten
|
116
|
+
@midrule_action_rules.each do |r|
|
117
|
+
r.original_rule = rule
|
104
118
|
end
|
105
119
|
end
|
106
120
|
|
107
121
|
# rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
|
108
122
|
# `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
|
109
|
-
def process_rhs
|
123
|
+
def process_rhs
|
110
124
|
return if @replaced_rhs
|
111
125
|
|
112
126
|
@replaced_rhs = []
|
@@ -118,26 +132,26 @@ module Lrama
|
|
118
132
|
when Lrama::Lexer::Token::Ident
|
119
133
|
@replaced_rhs << token
|
120
134
|
when Lrama::Lexer::Token::InstantiateRule
|
121
|
-
parameterizing_rule = parameterizing_rule_resolver.find_rule(token)
|
135
|
+
parameterizing_rule = @parameterizing_rule_resolver.find_rule(token)
|
122
136
|
raise "Unexpected token. #{token}" unless parameterizing_rule
|
123
137
|
|
124
138
|
bindings = Binding.new(parameterizing_rule, token.args)
|
125
139
|
lhs_s_value = lhs_s_value(token, bindings)
|
126
|
-
if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
|
140
|
+
if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value))
|
127
141
|
@replaced_rhs << created_lhs
|
128
142
|
else
|
129
143
|
lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
|
130
144
|
@replaced_rhs << lhs_token
|
131
|
-
parameterizing_rule_resolver.created_lhs_list << lhs_token
|
145
|
+
@parameterizing_rule_resolver.created_lhs_list << lhs_token
|
132
146
|
parameterizing_rule.rhs_list.each do |r|
|
133
|
-
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
|
147
|
+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: token.lhs_tag || parameterizing_rule.tag)
|
134
148
|
rule_builder.lhs = lhs_token
|
135
149
|
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
|
136
150
|
rule_builder.line = line
|
137
151
|
rule_builder.precedence_sym = r.precedence_sym
|
138
152
|
rule_builder.user_code = r.resolve_user_code(bindings)
|
139
153
|
rule_builder.complete_input
|
140
|
-
rule_builder.setup_rules
|
154
|
+
rule_builder.setup_rules
|
141
155
|
@rule_builders_for_parameterizing_rules << rule_builder
|
142
156
|
end
|
143
157
|
end
|
@@ -147,11 +161,11 @@ module Lrama
|
|
147
161
|
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
|
148
162
|
@replaced_rhs << new_token
|
149
163
|
|
150
|
-
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: tag, skip_preprocess_references: true)
|
164
|
+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, i, lhs_tag: tag, skip_preprocess_references: true)
|
151
165
|
rule_builder.lhs = new_token
|
152
166
|
rule_builder.user_code = token
|
153
167
|
rule_builder.complete_input
|
154
|
-
rule_builder.setup_rules
|
168
|
+
rule_builder.setup_rules
|
155
169
|
|
156
170
|
@rule_builders_for_derived_rules << rule_builder
|
157
171
|
else
|
@@ -172,27 +186,10 @@ module Lrama
|
|
172
186
|
"#{token.rule_name}_#{s_values.join('_')}"
|
173
187
|
end
|
174
188
|
|
175
|
-
def
|
176
|
-
rhs.each_with_index do |token, i|
|
177
|
-
if inline_rule = parameterizing_rule_resolver.find_inline(token)
|
178
|
-
inline_rule.rhs_list.each_with_index do |inline_rhs|
|
179
|
-
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, lhs_tag: lhs_tag, skip_preprocess_references: true)
|
180
|
-
resolve_inline_rhs(rule_builder, inline_rhs, i)
|
181
|
-
rule_builder.lhs = lhs
|
182
|
-
rule_builder.line = line
|
183
|
-
rule_builder.user_code = replace_inline_user_code(inline_rhs, i)
|
184
|
-
rule_builder.complete_input
|
185
|
-
rule_builder.setup_rules(parameterizing_rule_resolver)
|
186
|
-
@rule_builders_for_inline_rules << rule_builder
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
def resolve_inline_rhs(rule_builder, inline_rhs, index)
|
189
|
+
def resolve_inline_rhs(rule_builder, inline_rhs, index, bindings = nil)
|
193
190
|
rhs.each_with_index do |token, i|
|
194
191
|
if index == i
|
195
|
-
inline_rhs.symbols.each { |sym| rule_builder.add_rhs(sym) }
|
192
|
+
inline_rhs.symbols.each { |sym| rule_builder.add_rhs(bindings.nil? ? sym : bindings.resolve_symbol(sym)) }
|
196
193
|
else
|
197
194
|
rule_builder.add_rhs(token)
|
198
195
|
end
|
@@ -204,6 +201,11 @@ module Lrama
|
|
204
201
|
return user_code if user_code.nil?
|
205
202
|
|
206
203
|
code = user_code.s_value.gsub(/\$#{index + 1}/, inline_rhs.user_code.s_value)
|
204
|
+
user_code.references.each do |ref|
|
205
|
+
next if ref.index.nil? || ref.index <= index # nil is a case for `$$`
|
206
|
+
code = code.gsub(/\$#{ref.index}/, "$#{ref.index + (inline_rhs.symbols.count-1)}")
|
207
|
+
code = code.gsub(/@#{ref.index}/, "@#{ref.index + (inline_rhs.symbols.count-1)}")
|
208
|
+
end
|
207
209
|
Lrama::Lexer::Token::UserCode.new(s_value: code, location: user_code.location)
|
208
210
|
end
|
209
211
|
|
@@ -238,9 +240,6 @@ module Lrama
|
|
238
240
|
end
|
239
241
|
|
240
242
|
if ref.number
|
241
|
-
# TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
|
242
|
-
# `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
|
243
|
-
# it needs resolves from number to index.
|
244
243
|
ref.index = ref.number
|
245
244
|
end
|
246
245
|
|
data/lib/lrama/grammar/symbol.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Lrama
|
2
4
|
class Grammar
|
3
5
|
class Symbols
|
@@ -42,7 +44,9 @@ module Lrama
|
|
42
44
|
end
|
43
45
|
|
44
46
|
def add_nterm(id:, alias_name: nil, tag: nil)
|
45
|
-
|
47
|
+
if (sym = find_symbol_by_id(id))
|
48
|
+
return sym
|
49
|
+
end
|
46
50
|
|
47
51
|
@symbols = nil
|
48
52
|
nterm = Symbol.new(
|
@@ -53,6 +57,10 @@ module Lrama
|
|
53
57
|
nterm
|
54
58
|
end
|
55
59
|
|
60
|
+
def find_term_by_s_value(s_value)
|
61
|
+
terms.find { |s| s.id.s_value == s_value }
|
62
|
+
end
|
63
|
+
|
56
64
|
def find_symbol_by_s_value(s_value)
|
57
65
|
symbols.find { |s| s.id.s_value == s_value }
|
58
66
|
end
|
data/lib/lrama/grammar/type.rb
CHANGED
data/lib/lrama/grammar/union.rb
CHANGED
data/lib/lrama/grammar.rb
CHANGED
@@ -1,43 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "forwardable"
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
4
|
+
require_relative "grammar/auxiliary"
|
5
|
+
require_relative "grammar/binding"
|
6
|
+
require_relative "grammar/code"
|
7
|
+
require_relative "grammar/counter"
|
8
|
+
require_relative "grammar/destructor"
|
9
|
+
require_relative "grammar/error_token"
|
10
|
+
require_relative "grammar/parameterizing_rule"
|
11
|
+
require_relative "grammar/percent_code"
|
12
|
+
require_relative "grammar/precedence"
|
13
|
+
require_relative "grammar/printer"
|
14
|
+
require_relative "grammar/reference"
|
15
|
+
require_relative "grammar/rule"
|
16
|
+
require_relative "grammar/rule_builder"
|
17
|
+
require_relative "grammar/symbol"
|
18
|
+
require_relative "grammar/symbols"
|
19
|
+
require_relative "grammar/type"
|
20
|
+
require_relative "grammar/union"
|
21
|
+
require_relative "lexer"
|
20
22
|
|
21
23
|
module Lrama
|
22
24
|
# Grammar is the result of parsing an input grammar file
|
23
25
|
class Grammar
|
24
26
|
extend Forwardable
|
25
27
|
|
26
|
-
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
27
|
-
attr_accessor :union, :expect,
|
28
|
-
:printers, :error_tokens,
|
29
|
-
:lex_param, :parse_param, :initial_action,
|
28
|
+
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver
|
29
|
+
attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action,
|
30
30
|
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
|
31
|
-
:symbols_resolver, :types,
|
32
|
-
:rules, :rule_builders,
|
33
|
-
:sym_to_rules, :no_stdlib
|
31
|
+
:symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations
|
34
32
|
|
35
|
-
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
|
33
|
+
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value,
|
36
34
|
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
|
37
35
|
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
|
38
36
|
:fill_printer, :fill_destructor, :fill_error_token, :sort_by_number!
|
39
37
|
|
40
|
-
|
41
38
|
def initialize(rule_counter)
|
42
39
|
@rule_counter = rule_counter
|
43
40
|
|
@@ -59,10 +56,15 @@ module Lrama
|
|
59
56
|
@accept_symbol = nil
|
60
57
|
@aux = Auxiliary.new
|
61
58
|
@no_stdlib = false
|
59
|
+
@locations = false
|
62
60
|
|
63
61
|
append_special_symbols
|
64
62
|
end
|
65
63
|
|
64
|
+
def create_rule_builder(rule_counter, midrule_action_counter)
|
65
|
+
RuleBuilder.new(rule_counter, midrule_action_counter, @parameterizing_rule_resolver)
|
66
|
+
end
|
67
|
+
|
66
68
|
def add_percent_code(id:, code:)
|
67
69
|
@percent_codes << PercentCode.new(id.s_value, code.s_value)
|
68
70
|
end
|
@@ -141,6 +143,7 @@ module Lrama
|
|
141
143
|
end
|
142
144
|
|
143
145
|
def prepare
|
146
|
+
resolve_inline_rules
|
144
147
|
normalize_rules
|
145
148
|
collect_symbols
|
146
149
|
set_lhs_and_rhs
|
@@ -149,6 +152,7 @@ module Lrama
|
|
149
152
|
fill_sym_to_rules
|
150
153
|
compute_nullable
|
151
154
|
compute_first_set
|
155
|
+
set_locations
|
152
156
|
end
|
153
157
|
|
154
158
|
# TODO: More validation methods
|
@@ -255,7 +259,7 @@ module Lrama
|
|
255
259
|
|
256
260
|
def setup_rules
|
257
261
|
@rule_builders.each do |builder|
|
258
|
-
builder.setup_rules
|
262
|
+
builder.setup_rules
|
259
263
|
end
|
260
264
|
end
|
261
265
|
|
@@ -289,10 +293,23 @@ module Lrama
|
|
289
293
|
@accept_symbol = term
|
290
294
|
end
|
291
295
|
|
296
|
+
def resolve_inline_rules
|
297
|
+
while @rule_builders.any? {|r| r.has_inline_rules? } do
|
298
|
+
@rule_builders = @rule_builders.flat_map do |builder|
|
299
|
+
if builder.has_inline_rules?
|
300
|
+
builder.resolve_inline_rules
|
301
|
+
else
|
302
|
+
builder
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
end
|
307
|
+
|
292
308
|
def normalize_rules
|
293
309
|
# Add $accept rule to the top of rules
|
294
|
-
|
295
|
-
|
310
|
+
rule_builder = @rule_builders.first # : RuleBuilder
|
311
|
+
lineno = rule_builder ? rule_builder.line : 0
|
312
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [rule_builder.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
|
296
313
|
|
297
314
|
setup_rules
|
298
315
|
|
@@ -365,17 +382,21 @@ module Lrama
|
|
365
382
|
end
|
366
383
|
|
367
384
|
def validate_rule_lhs_is_nterm!
|
368
|
-
errors = []
|
385
|
+
errors = [] #: Array[String]
|
369
386
|
|
370
387
|
rules.each do |rule|
|
371
388
|
next if rule.lhs.nterm?
|
372
389
|
|
373
|
-
errors << "[BUG] LHS of #{rule} (line: #{rule.lineno}) is term. It should be nterm."
|
390
|
+
errors << "[BUG] LHS of #{rule.display_name} (line: #{rule.lineno}) is term. It should be nterm."
|
374
391
|
end
|
375
392
|
|
376
393
|
return if errors.empty?
|
377
394
|
|
378
395
|
raise errors.join("\n")
|
379
396
|
end
|
397
|
+
|
398
|
+
def set_locations
|
399
|
+
@locations = @locations || @rules.any? {|rule| rule.contains_at_reference? }
|
400
|
+
end
|
380
401
|
end
|
381
402
|
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Lrama
|
4
|
+
class GrammarValidator
|
5
|
+
def initialize(grammar, states, logger)
|
6
|
+
@grammar = grammar
|
7
|
+
@states = states
|
8
|
+
@logger = logger
|
9
|
+
end
|
10
|
+
|
11
|
+
def valid?
|
12
|
+
conflicts_within_threshold?
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def conflicts_within_threshold?
|
18
|
+
return true unless @grammar.expect
|
19
|
+
|
20
|
+
[sr_conflicts_within_threshold(@grammar.expect), rr_conflicts_within_threshold(0)].all?
|
21
|
+
end
|
22
|
+
|
23
|
+
def sr_conflicts_within_threshold(expected)
|
24
|
+
return true if expected == @states.sr_conflicts_count
|
25
|
+
|
26
|
+
@logger.error("shift/reduce conflicts: #{@states.sr_conflicts_count} found, #{expected} expected")
|
27
|
+
false
|
28
|
+
end
|
29
|
+
|
30
|
+
def rr_conflicts_within_threshold(expected)
|
31
|
+
return true if expected == @states.rr_conflicts_count
|
32
|
+
|
33
|
+
@logger.error("reduce/reduce conflicts: #{@states.rr_conflicts_count} found, #{expected} expected")
|
34
|
+
false
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/lrama/lexer/location.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "strscan"
|
2
4
|
|
3
5
|
module Lrama
|
@@ -14,9 +16,9 @@ module Lrama
|
|
14
16
|
|
15
17
|
def _references
|
16
18
|
scanner = StringScanner.new(s_value)
|
17
|
-
references = []
|
19
|
+
references = [] #: Array[Grammar::Reference]
|
18
20
|
|
19
|
-
|
21
|
+
until scanner.eos? do
|
20
22
|
case
|
21
23
|
when reference = scan_reference(scanner)
|
22
24
|
references << reference
|
data/lib/lrama/lexer/token.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'token/char'
|
4
|
+
require_relative 'token/ident'
|
5
|
+
require_relative 'token/instantiate_rule'
|
6
|
+
require_relative 'token/tag'
|
7
|
+
require_relative 'token/user_code'
|
6
8
|
|
7
9
|
module Lrama
|
8
10
|
class Lexer
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,19 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "strscan"
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
|
5
|
+
require_relative "lexer/grammar_file"
|
6
|
+
require_relative "lexer/location"
|
7
|
+
require_relative "lexer/token"
|
6
8
|
|
7
9
|
module Lrama
|
8
10
|
class Lexer
|
9
11
|
attr_reader :head_line, :head_column, :line
|
10
12
|
attr_accessor :status, :end_symbol
|
11
13
|
|
12
|
-
SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
|
14
|
+
SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';'].freeze
|
13
15
|
PERCENT_TOKENS = %w(
|
14
16
|
%union
|
15
17
|
%token
|
16
18
|
%type
|
19
|
+
%nterm
|
17
20
|
%left
|
18
21
|
%right
|
19
22
|
%nonassoc
|
@@ -38,7 +41,8 @@ module Lrama
|
|
38
41
|
%rule
|
39
42
|
%no-stdlib
|
40
43
|
%inline
|
41
|
-
|
44
|
+
%locations
|
45
|
+
).freeze
|
42
46
|
|
43
47
|
def initialize(grammar_file)
|
44
48
|
@grammar_file = grammar_file
|
@@ -71,7 +75,7 @@ module Lrama
|
|
71
75
|
end
|
72
76
|
|
73
77
|
def lex_token
|
74
|
-
|
78
|
+
until @scanner.eos? do
|
75
79
|
case
|
76
80
|
when @scanner.scan(/\n/)
|
77
81
|
newline
|
@@ -126,7 +130,7 @@ module Lrama
|
|
126
130
|
code = ''
|
127
131
|
reset_first_position
|
128
132
|
|
129
|
-
|
133
|
+
until @scanner.eos? do
|
130
134
|
case
|
131
135
|
when @scanner.scan(/{/)
|
132
136
|
code += @scanner.matched
|
@@ -163,7 +167,7 @@ module Lrama
|
|
163
167
|
private
|
164
168
|
|
165
169
|
def lex_comment
|
166
|
-
|
170
|
+
until @scanner.eos? do
|
167
171
|
case
|
168
172
|
when @scanner.scan(/\n/)
|
169
173
|
newline
|
@@ -1,25 +1,17 @@
|
|
1
|
-
|
2
|
-
class Warning
|
3
|
-
attr_reader :errors, :warns
|
1
|
+
# frozen_string_literal: true
|
4
2
|
|
3
|
+
module Lrama
|
4
|
+
class Logger
|
5
5
|
def initialize(out = STDERR)
|
6
6
|
@out = out
|
7
|
-
@errors = []
|
8
|
-
@warns = []
|
9
|
-
end
|
10
|
-
|
11
|
-
def error(message)
|
12
|
-
@out << message << "\n"
|
13
|
-
@errors << message
|
14
7
|
end
|
15
8
|
|
16
9
|
def warn(message)
|
17
10
|
@out << message << "\n"
|
18
|
-
@warns << message
|
19
11
|
end
|
20
12
|
|
21
|
-
def
|
22
|
-
|
13
|
+
def error(message)
|
14
|
+
@out << message << "\n"
|
23
15
|
end
|
24
16
|
end
|
25
17
|
end
|