lrama 0.5.12 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +20 -5
- data/Gemfile +1 -1
- data/NEWS.md +66 -0
- data/README.md +14 -3
- data/Steepfile +2 -0
- data/lib/lrama/command.rb +17 -3
- data/lib/lrama/context.rb +2 -22
- data/lib/lrama/grammar/binding.rb +24 -0
- data/lib/lrama/grammar/code/printer_code.rb +1 -1
- data/lib/lrama/grammar/code/rule_action.rb +2 -2
- data/lib/lrama/grammar/code.rb +19 -7
- data/lib/lrama/grammar/parameterizing_rule/resolver.rb +39 -0
- data/lib/lrama/grammar/parameterizing_rule/rhs.rb +15 -0
- data/lib/lrama/grammar/parameterizing_rule/rule.rb +16 -0
- data/lib/lrama/grammar/parameterizing_rule.rb +3 -0
- data/lib/lrama/grammar/percent_code.rb +3 -3
- data/lib/lrama/grammar/rule_builder.rb +69 -31
- data/lib/lrama/grammar/type.rb +13 -1
- data/lib/lrama/grammar.rb +15 -43
- data/lib/lrama/lexer/grammar_file.rb +21 -0
- data/lib/lrama/lexer/location.rb +77 -2
- data/lib/lrama/lexer/token/instantiate_rule.rb +23 -0
- data/lib/lrama/lexer/token/user_code.rb +10 -10
- data/lib/lrama/lexer/token.rb +6 -1
- data/lib/lrama/lexer.rb +23 -18
- data/lib/lrama/output.rb +2 -2
- data/lib/lrama/parser.rb +641 -458
- data/lib/lrama/states_reporter.rb +1 -1
- data/lib/lrama/version.rb +1 -1
- data/parser.y +97 -32
- data/sig/lrama/grammar/binding.rbs +16 -0
- data/sig/lrama/grammar/code/printer_code.rbs +1 -1
- data/sig/lrama/grammar/code.rbs +5 -5
- data/sig/lrama/grammar/parameterizing_rule/resolver.rbs +21 -0
- data/sig/lrama/grammar/parameterizing_rule/rhs.rbs +13 -0
- data/sig/lrama/grammar/parameterizing_rule/rule.rbs +14 -0
- data/sig/lrama/grammar/parameterizing_rule.rbs +6 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +2 -2
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +1 -1
- data/sig/lrama/grammar/percent_code.rbs +3 -3
- data/sig/lrama/grammar/rule_builder.rbs +9 -8
- data/sig/lrama/lexer/grammar_file.rbs +15 -0
- data/sig/lrama/lexer/location.rbs +13 -1
- data/sig/lrama/lexer/token/instantiate_rule.rbs +14 -0
- data/sig/lrama/lexer/token.rbs +1 -0
- metadata +17 -5
- data/lib/lrama/lexer/token/parameterizing.rb +0 -34
- data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
@@ -3,21 +3,22 @@ require 'lrama/grammar/parameterizing_rules/builder'
|
|
3
3
|
module Lrama
|
4
4
|
class Grammar
|
5
5
|
class RuleBuilder
|
6
|
-
attr_accessor :lhs, :
|
7
|
-
attr_reader :rhs, :user_code, :precedence_sym
|
6
|
+
attr_accessor :lhs, :line
|
7
|
+
attr_reader :lhs_tag, :rhs, :user_code, :precedence_sym
|
8
8
|
|
9
|
-
def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, skip_preprocess_references: false)
|
9
|
+
def initialize(rule_counter, midrule_action_counter, position_in_original_rule_rhs = nil, lhs_tag: nil, skip_preprocess_references: false)
|
10
10
|
@rule_counter = rule_counter
|
11
11
|
@midrule_action_counter = midrule_action_counter
|
12
12
|
@position_in_original_rule_rhs = position_in_original_rule_rhs
|
13
13
|
@skip_preprocess_references = skip_preprocess_references
|
14
14
|
|
15
15
|
@lhs = nil
|
16
|
+
@lhs_tag = lhs_tag
|
16
17
|
@rhs = []
|
17
|
-
@lhs_tag = nil
|
18
18
|
@user_code = nil
|
19
19
|
@precedence_sym = nil
|
20
20
|
@line = nil
|
21
|
+
@rule_builders_for_parameterizing_rules = []
|
21
22
|
@rule_builders_for_derived_rules = []
|
22
23
|
end
|
23
24
|
|
@@ -33,7 +34,7 @@ module Lrama
|
|
33
34
|
|
34
35
|
def user_code=(user_code)
|
35
36
|
if !@line
|
36
|
-
@line = user_code
|
37
|
+
@line = user_code&.line
|
37
38
|
end
|
38
39
|
|
39
40
|
flush_user_code
|
@@ -51,22 +52,14 @@ module Lrama
|
|
51
52
|
freeze_rhs
|
52
53
|
end
|
53
54
|
|
54
|
-
def setup_rules
|
55
|
+
def setup_rules(parameterizing_rule_resolver)
|
55
56
|
preprocess_references unless @skip_preprocess_references
|
56
|
-
process_rhs
|
57
|
+
process_rhs(parameterizing_rule_resolver)
|
57
58
|
build_rules
|
58
59
|
end
|
59
60
|
|
60
|
-
def parameterizing_rules
|
61
|
-
@parameterizing_rules
|
62
|
-
end
|
63
|
-
|
64
|
-
def midrule_action_rules
|
65
|
-
@midrule_action_rules
|
66
|
-
end
|
67
|
-
|
68
61
|
def rules
|
69
|
-
@rules
|
62
|
+
@parameterizing_rules + @old_parameterizing_rules + @midrule_action_rules + @rules
|
70
63
|
end
|
71
64
|
|
72
65
|
private
|
@@ -83,10 +76,13 @@ module Lrama
|
|
83
76
|
tokens = @replaced_rhs
|
84
77
|
|
85
78
|
rule = Rule.new(
|
86
|
-
id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, token_code: user_code,
|
79
|
+
id: @rule_counter.increment, _lhs: lhs, _rhs: tokens, lhs_tag: lhs_tag, token_code: user_code,
|
87
80
|
position_in_original_rule_rhs: @position_in_original_rule_rhs, precedence_sym: precedence_sym, lineno: line
|
88
81
|
)
|
89
82
|
@rules = [rule]
|
83
|
+
@parameterizing_rules = @rule_builders_for_parameterizing_rules.map do |rule_builder|
|
84
|
+
rule_builder.rules
|
85
|
+
end.flatten
|
90
86
|
@midrule_action_rules = @rule_builders_for_derived_rules.map do |rule_builder|
|
91
87
|
rule_builder.rules
|
92
88
|
end.flatten
|
@@ -95,13 +91,13 @@ module Lrama
|
|
95
91
|
end
|
96
92
|
end
|
97
93
|
|
98
|
-
# rhs is a mixture of variety type of tokens like `Ident`, `
|
94
|
+
# rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
|
99
95
|
# `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
|
100
|
-
def process_rhs
|
96
|
+
def process_rhs(parameterizing_rule_resolver)
|
101
97
|
return if @replaced_rhs
|
102
98
|
|
103
99
|
@replaced_rhs = []
|
104
|
-
@
|
100
|
+
@old_parameterizing_rules = []
|
105
101
|
|
106
102
|
rhs.each_with_index do |token, i|
|
107
103
|
case token
|
@@ -109,22 +105,47 @@ module Lrama
|
|
109
105
|
@replaced_rhs << token
|
110
106
|
when Lrama::Lexer::Token::Ident
|
111
107
|
@replaced_rhs << token
|
112
|
-
when Lrama::Lexer::Token::
|
113
|
-
|
114
|
-
|
115
|
-
|
108
|
+
when Lrama::Lexer::Token::InstantiateRule
|
109
|
+
if parameterizing_rule_resolver.defined?(token)
|
110
|
+
parameterizing_rule = parameterizing_rule_resolver.find(token)
|
111
|
+
raise "Unexpected token. #{token}" unless parameterizing_rule
|
112
|
+
|
113
|
+
bindings = Binding.new(parameterizing_rule, token.args)
|
114
|
+
lhs_s_value = lhs_s_value(token, bindings)
|
115
|
+
if (created_lhs = parameterizing_rule_resolver.created_lhs(lhs_s_value))
|
116
|
+
@replaced_rhs << created_lhs
|
117
|
+
else
|
118
|
+
lhs_token = Lrama::Lexer::Token::Ident.new(s_value: lhs_s_value, location: token.location)
|
119
|
+
@replaced_rhs << lhs_token
|
120
|
+
parameterizing_rule_resolver.created_lhs_list << lhs_token
|
121
|
+
parameterizing_rule.rhs_list.each do |r|
|
122
|
+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: token.lhs_tag, skip_preprocess_references: true)
|
123
|
+
rule_builder.lhs = lhs_token
|
124
|
+
r.symbols.each { |sym| rule_builder.add_rhs(bindings.resolve_symbol(sym)) }
|
125
|
+
rule_builder.line = line
|
126
|
+
rule_builder.user_code = r.user_code
|
127
|
+
rule_builder.precedence_sym = r.precedence_sym
|
128
|
+
rule_builder.complete_input
|
129
|
+
rule_builder.setup_rules(parameterizing_rule_resolver)
|
130
|
+
@rule_builders_for_parameterizing_rules << rule_builder
|
131
|
+
end
|
132
|
+
end
|
133
|
+
else
|
134
|
+
# TODO: Delete when the standard library will defined as a grammar file.
|
135
|
+
parameterizing_rule = ParameterizingRules::Builder.new(token, @rule_counter, token.lhs_tag, user_code, precedence_sym, line)
|
136
|
+
@old_parameterizing_rules = @old_parameterizing_rules + parameterizing_rule.build
|
137
|
+
@replaced_rhs << parameterizing_rule.build_token
|
116
138
|
end
|
117
|
-
@replaced_rhs << parameterizing.build_token
|
118
139
|
when Lrama::Lexer::Token::UserCode
|
119
140
|
prefix = token.referred ? "@" : "$@"
|
120
141
|
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
|
121
142
|
@replaced_rhs << new_token
|
122
143
|
|
123
|
-
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, skip_preprocess_references: true)
|
144
|
+
rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, i, lhs_tag: lhs_tag, skip_preprocess_references: true)
|
124
145
|
rule_builder.lhs = new_token
|
125
146
|
rule_builder.user_code = token
|
126
147
|
rule_builder.complete_input
|
127
|
-
rule_builder.setup_rules
|
148
|
+
rule_builder.setup_rules(parameterizing_rule_resolver)
|
128
149
|
|
129
150
|
@rule_builders_for_derived_rules << rule_builder
|
130
151
|
else
|
@@ -133,6 +154,18 @@ module Lrama
|
|
133
154
|
end
|
134
155
|
end
|
135
156
|
|
157
|
+
def lhs_s_value(token, bindings)
|
158
|
+
s_values = token.args.map do |arg|
|
159
|
+
resolved = bindings.resolve_symbol(arg)
|
160
|
+
if resolved.is_a?(Lexer::Token::InstantiateRule)
|
161
|
+
[resolved.s_value, resolved.args.map(&:s_value)]
|
162
|
+
else
|
163
|
+
resolved.s_value
|
164
|
+
end
|
165
|
+
end
|
166
|
+
"#{token.rule_name}_#{s_values.join('_')}"
|
167
|
+
end
|
168
|
+
|
136
169
|
def numberize_references
|
137
170
|
# Bison n'th component is 1-origin
|
138
171
|
(rhs + [user_code]).compact.each.with_index(1) do |token, i|
|
@@ -146,8 +179,13 @@ module Lrama
|
|
146
179
|
else
|
147
180
|
candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
|
148
181
|
|
149
|
-
|
150
|
-
|
182
|
+
if candidates.size >= 2
|
183
|
+
token.invalid_ref(ref, "Referring symbol `#{ref_name}` is duplicated.")
|
184
|
+
end
|
185
|
+
|
186
|
+
unless (referring_symbol = candidates.first)
|
187
|
+
token.invalid_ref(ref, "Referring symbol `#{ref_name}` is not found.")
|
188
|
+
end
|
151
189
|
|
152
190
|
ref.index = referring_symbol[1] + 1
|
153
191
|
end
|
@@ -159,7 +197,7 @@ module Lrama
|
|
159
197
|
if ref.index
|
160
198
|
# TODO: Prohibit $0 even so Bison allows it?
|
161
199
|
# See: https://www.gnu.org/software/bison/manual/html_node/Actions.html
|
162
|
-
|
200
|
+
token.invalid_ref(ref, "Can not refer following component. #{ref.index} >= #{i}.") if ref.index >= i
|
163
201
|
rhs[ref.index - 1].referred = true
|
164
202
|
end
|
165
203
|
end
|
@@ -167,7 +205,7 @@ module Lrama
|
|
167
205
|
end
|
168
206
|
|
169
207
|
def flush_user_code
|
170
|
-
if c = @user_code
|
208
|
+
if (c = @user_code)
|
171
209
|
@rhs << c
|
172
210
|
@user_code = nil
|
173
211
|
end
|
data/lib/lrama/grammar/type.rb
CHANGED
@@ -1,6 +1,18 @@
|
|
1
1
|
module Lrama
|
2
2
|
class Grammar
|
3
|
-
class Type
|
3
|
+
class Type
|
4
|
+
attr_reader :id, :tag
|
5
|
+
|
6
|
+
def initialize(id:, tag:)
|
7
|
+
@id = id
|
8
|
+
@tag = tag
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
self.class == other.class &&
|
13
|
+
self.id == other.id &&
|
14
|
+
self.tag == other.tag
|
15
|
+
end
|
4
16
|
end
|
5
17
|
end
|
6
18
|
end
|
data/lib/lrama/grammar.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "lrama/grammar/auxiliary"
|
2
|
+
require "lrama/grammar/binding"
|
2
3
|
require "lrama/grammar/code"
|
3
4
|
require "lrama/grammar/counter"
|
4
5
|
require "lrama/grammar/error_token"
|
@@ -8,6 +9,7 @@ require "lrama/grammar/printer"
|
|
8
9
|
require "lrama/grammar/reference"
|
9
10
|
require "lrama/grammar/rule"
|
10
11
|
require "lrama/grammar/rule_builder"
|
12
|
+
require "lrama/grammar/parameterizing_rule"
|
11
13
|
require "lrama/grammar/symbol"
|
12
14
|
require "lrama/grammar/type"
|
13
15
|
require "lrama/grammar/union"
|
@@ -36,6 +38,7 @@ module Lrama
|
|
36
38
|
@rule_builders = []
|
37
39
|
@rules = []
|
38
40
|
@sym_to_rules = {}
|
41
|
+
@parameterizing_rule_resolver = ParameterizingRule::Resolver.new
|
39
42
|
@empty_symbol = nil
|
40
43
|
@eof_symbol = nil
|
41
44
|
@error_symbol = nil
|
@@ -47,7 +50,7 @@ module Lrama
|
|
47
50
|
end
|
48
51
|
|
49
52
|
def add_percent_code(id:, code:)
|
50
|
-
@percent_codes << PercentCode.new(id, code)
|
53
|
+
@percent_codes << PercentCode.new(id.s_value, code.s_value)
|
51
54
|
end
|
52
55
|
|
53
56
|
def add_printer(ident_or_tags:, token_code:, lineno:)
|
@@ -69,7 +72,7 @@ module Lrama
|
|
69
72
|
return sym
|
70
73
|
end
|
71
74
|
|
72
|
-
if sym = @symbols.find {|s| s.id == id }
|
75
|
+
if (sym = @symbols.find {|s| s.id == id })
|
73
76
|
return sym
|
74
77
|
end
|
75
78
|
|
@@ -129,6 +132,10 @@ module Lrama
|
|
129
132
|
@rule_builders << builder
|
130
133
|
end
|
131
134
|
|
135
|
+
def add_parameterizing_rule(rule)
|
136
|
+
@parameterizing_rule_resolver.add_parameterizing_rule(rule)
|
137
|
+
end
|
138
|
+
|
132
139
|
def prologue_first_lineno=(prologue_first_lineno)
|
133
140
|
@aux.prologue_first_lineno = prologue_first_lineno
|
134
141
|
end
|
@@ -162,7 +169,7 @@ module Lrama
|
|
162
169
|
|
163
170
|
# TODO: More validation methods
|
164
171
|
#
|
165
|
-
# *
|
172
|
+
# * Validation for no_declared_type_reference
|
166
173
|
def validate!
|
167
174
|
validate_symbol_number_uniqueness!
|
168
175
|
validate_symbol_alias_name_uniqueness!
|
@@ -310,7 +317,7 @@ module Lrama
|
|
310
317
|
|
311
318
|
def setup_rules
|
312
319
|
@rule_builders.each do |builder|
|
313
|
-
builder.setup_rules
|
320
|
+
builder.setup_rules(@parameterizing_rule_resolver)
|
314
321
|
end
|
315
322
|
end
|
316
323
|
|
@@ -350,56 +357,21 @@ module Lrama
|
|
350
357
|
@accept_symbol = term
|
351
358
|
end
|
352
359
|
|
353
|
-
# 1. Add $accept rule to the top of rules
|
354
|
-
# 2. Extract action in the middle of RHS into new Empty rule
|
355
|
-
# 3. Append id and extract action then create Rule
|
356
|
-
#
|
357
|
-
# Bison 3.8.2 uses different orders for symbol number and rule number
|
358
|
-
# when a rule has actions in the middle of a rule.
|
359
|
-
#
|
360
|
-
# For example,
|
361
|
-
#
|
362
|
-
# `program: $@1 top_compstmt`
|
363
|
-
#
|
364
|
-
# Rules are ordered like below,
|
365
|
-
#
|
366
|
-
# 1 $@1: ε
|
367
|
-
# 2 program: $@1 top_compstmt
|
368
|
-
#
|
369
|
-
# Symbols are ordered like below,
|
370
|
-
#
|
371
|
-
# 164 program
|
372
|
-
# 165 $@1
|
373
|
-
#
|
374
360
|
def normalize_rules
|
375
|
-
#
|
376
|
-
accept = @accept_symbol
|
377
|
-
eof = @eof_symbol
|
361
|
+
# Add $accept rule to the top of rules
|
378
362
|
lineno = @rule_builders.first ? @rule_builders.first.line : 0
|
379
|
-
@rules << Rule.new(id: @rule_counter.increment, _lhs:
|
363
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
|
380
364
|
|
381
365
|
setup_rules
|
382
366
|
|
383
367
|
@rule_builders.each do |builder|
|
384
|
-
# Extract actions in the middle of RHS into new rules.
|
385
|
-
builder.midrule_action_rules.each do |rule|
|
386
|
-
@rules << rule
|
387
|
-
end
|
388
|
-
|
389
368
|
builder.rules.each do |rule|
|
390
|
-
add_nterm(id: rule._lhs)
|
391
|
-
@rules << rule
|
392
|
-
end
|
393
|
-
|
394
|
-
builder.parameterizing_rules.each do |rule|
|
395
369
|
add_nterm(id: rule._lhs, tag: rule.lhs_tag)
|
396
370
|
@rules << rule
|
397
371
|
end
|
398
|
-
|
399
|
-
builder.midrule_action_rules.each do |rule|
|
400
|
-
add_nterm(id: rule._lhs)
|
401
|
-
end
|
402
372
|
end
|
373
|
+
|
374
|
+
@rules.sort_by!(&:id)
|
403
375
|
end
|
404
376
|
|
405
377
|
# Collect symbols from rules
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class GrammarFile
|
4
|
+
attr_reader :path, :text
|
5
|
+
|
6
|
+
def initialize(path, text)
|
7
|
+
@path = path
|
8
|
+
@text = text.freeze
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
self.class == other.class &&
|
13
|
+
self.path == other.path
|
14
|
+
end
|
15
|
+
|
16
|
+
def lines
|
17
|
+
@lines ||= text.split("\n")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/lrama/lexer/location.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Lrama
|
2
2
|
class Lexer
|
3
3
|
class Location
|
4
|
-
attr_reader :first_line, :first_column, :last_line, :last_column
|
4
|
+
attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
|
5
5
|
|
6
|
-
def initialize(first_line:, first_column:, last_line:, last_column:)
|
6
|
+
def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
|
7
|
+
@grammar_file = grammar_file
|
7
8
|
@first_line = first_line
|
8
9
|
@first_column = first_column
|
9
10
|
@last_line = last_line
|
@@ -12,11 +13,85 @@ module Lrama
|
|
12
13
|
|
13
14
|
def ==(other)
|
14
15
|
self.class == other.class &&
|
16
|
+
self.grammar_file == other.grammar_file &&
|
15
17
|
self.first_line == other.first_line &&
|
16
18
|
self.first_column == other.first_column &&
|
17
19
|
self.last_line == other.last_line &&
|
18
20
|
self.last_column == other.last_column
|
19
21
|
end
|
22
|
+
|
23
|
+
def partial_location(left, right)
|
24
|
+
offset = -first_column
|
25
|
+
new_first_line = -1
|
26
|
+
new_first_column = -1
|
27
|
+
new_last_line = -1
|
28
|
+
new_last_column = -1
|
29
|
+
|
30
|
+
_text.each.with_index do |line, index|
|
31
|
+
new_offset = offset + line.length + 1
|
32
|
+
|
33
|
+
if offset <= left && left <= new_offset
|
34
|
+
new_first_line = first_line + index
|
35
|
+
new_first_column = left - offset
|
36
|
+
end
|
37
|
+
|
38
|
+
if offset <= right && right <= new_offset
|
39
|
+
new_last_line = first_line + index
|
40
|
+
new_last_column = right - offset
|
41
|
+
end
|
42
|
+
|
43
|
+
offset = new_offset
|
44
|
+
end
|
45
|
+
|
46
|
+
Location.new(
|
47
|
+
grammar_file: grammar_file,
|
48
|
+
first_line: new_first_line, first_column: new_first_column,
|
49
|
+
last_line: new_last_line, last_column: new_last_column
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
"#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_error_message(error_message)
|
58
|
+
<<~ERROR.chomp
|
59
|
+
#{path}:#{first_line}:#{first_column}: #{error_message}
|
60
|
+
#{line_with_carets}
|
61
|
+
ERROR
|
62
|
+
end
|
63
|
+
|
64
|
+
def line_with_carets
|
65
|
+
<<~TEXT
|
66
|
+
#{text}
|
67
|
+
#{carets}
|
68
|
+
TEXT
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def path
|
74
|
+
grammar_file.path
|
75
|
+
end
|
76
|
+
|
77
|
+
def blanks
|
78
|
+
(text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
|
79
|
+
end
|
80
|
+
|
81
|
+
def carets
|
82
|
+
blanks + '^' * (last_column - first_column)
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
@text ||= _text.join("\n")
|
87
|
+
end
|
88
|
+
|
89
|
+
def _text
|
90
|
+
@_text ||=begin
|
91
|
+
range = (first_line - 1)...last_line
|
92
|
+
grammar_file.lines[range] or raise "#{range} is invalid"
|
93
|
+
end
|
94
|
+
end
|
20
95
|
end
|
21
96
|
end
|
22
97
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
class InstantiateRule < Token
|
5
|
+
attr_reader :args, :lhs_tag
|
6
|
+
|
7
|
+
def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil)
|
8
|
+
super s_value: s_value, alias_name: alias_name, location: location
|
9
|
+
@args = args
|
10
|
+
@lhs_tag = lhs_tag
|
11
|
+
end
|
12
|
+
|
13
|
+
def rule_name
|
14
|
+
s_value
|
15
|
+
end
|
16
|
+
|
17
|
+
def args_count
|
18
|
+
args.count
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -35,27 +35,27 @@ module Lrama
|
|
35
35
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
36
36
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
37
37
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
38
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos
|
38
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
|
39
39
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
40
40
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
41
|
-
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos
|
41
|
+
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
|
42
42
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
43
43
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
44
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
45
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>
|
44
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
45
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
|
46
46
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
47
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
47
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
48
48
|
|
49
49
|
# @ references
|
50
50
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
51
51
|
when scanner.scan(/@\$/) # @$
|
52
|
-
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos
|
52
|
+
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
|
53
53
|
when scanner.scan(/@(\d+)/) # @1
|
54
|
-
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos
|
54
|
+
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
55
55
|
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
56
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
57
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
58
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
56
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
57
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
|
58
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
data/lib/lrama/lexer/token.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'lrama/lexer/token/char'
|
2
2
|
require 'lrama/lexer/token/ident'
|
3
|
-
require 'lrama/lexer/token/
|
3
|
+
require 'lrama/lexer/token/instantiate_rule'
|
4
4
|
require 'lrama/lexer/token/tag'
|
5
5
|
require 'lrama/lexer/token/user_code'
|
6
6
|
|
@@ -46,6 +46,11 @@ module Lrama
|
|
46
46
|
def last_column
|
47
47
|
location.last_column
|
48
48
|
end
|
49
|
+
|
50
|
+
def invalid_ref(ref, message)
|
51
|
+
location = self.location.partial_location(ref.first_column, ref.last_column)
|
52
|
+
raise location.generate_error_message(message)
|
53
|
+
end
|
49
54
|
end
|
50
55
|
end
|
51
56
|
end
|
data/lib/lrama/lexer.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
require "strscan"
|
2
|
+
require "lrama/lexer/grammar_file"
|
2
3
|
require "lrama/lexer/location"
|
3
4
|
require "lrama/lexer/token"
|
4
5
|
|
5
6
|
module Lrama
|
6
7
|
class Lexer
|
7
|
-
attr_reader :head_line, :head_column
|
8
|
-
attr_accessor :status
|
9
|
-
attr_accessor :end_symbol
|
8
|
+
attr_reader :head_line, :head_column, :line
|
9
|
+
attr_accessor :status, :end_symbol
|
10
10
|
|
11
11
|
SYMBOLS = ['%{', '%}', '%%', '{', '}', '\[', '\]', '\(', '\)', '\,', ':', '\|', ';']
|
12
12
|
PERCENT_TOKENS = %w(
|
@@ -28,10 +28,12 @@ module Lrama
|
|
28
28
|
%error-token
|
29
29
|
%empty
|
30
30
|
%code
|
31
|
+
%rule
|
31
32
|
)
|
32
33
|
|
33
|
-
def initialize(
|
34
|
-
@
|
34
|
+
def initialize(grammar_file)
|
35
|
+
@grammar_file = grammar_file
|
36
|
+
@scanner = StringScanner.new(grammar_file.text)
|
35
37
|
@head_column = @head = @scanner.pos
|
36
38
|
@head_line = @line = 1
|
37
39
|
@status = :initial
|
@@ -47,18 +49,15 @@ module Lrama
|
|
47
49
|
end
|
48
50
|
end
|
49
51
|
|
50
|
-
def line
|
51
|
-
@line
|
52
|
-
end
|
53
|
-
|
54
52
|
def column
|
55
53
|
@scanner.pos - @head
|
56
54
|
end
|
57
55
|
|
58
56
|
def location
|
59
57
|
Location.new(
|
58
|
+
grammar_file: @grammar_file,
|
60
59
|
first_line: @head_line, first_column: @head_column,
|
61
|
-
last_line:
|
60
|
+
last_line: line, last_column: column
|
62
61
|
)
|
63
62
|
end
|
64
63
|
|
@@ -78,8 +77,7 @@ module Lrama
|
|
78
77
|
end
|
79
78
|
end
|
80
79
|
|
81
|
-
|
82
|
-
@head_column = column
|
80
|
+
reset_first_position
|
83
81
|
|
84
82
|
case
|
85
83
|
when @scanner.eos?
|
@@ -117,6 +115,8 @@ module Lrama
|
|
117
115
|
def lex_c_code
|
118
116
|
nested = 0
|
119
117
|
code = ''
|
118
|
+
reset_first_position
|
119
|
+
|
120
120
|
while !@scanner.eos? do
|
121
121
|
case
|
122
122
|
when @scanner.scan(/{/)
|
@@ -140,12 +140,12 @@ module Lrama
|
|
140
140
|
@line += @scanner.matched.count("\n")
|
141
141
|
when @scanner.scan(/'.*?'/)
|
142
142
|
code += %Q(#{@scanner.matched})
|
143
|
+
when @scanner.scan(/[^\"'\{\}\n]+/)
|
144
|
+
code += @scanner.matched
|
145
|
+
when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
|
146
|
+
code += @scanner.matched
|
143
147
|
else
|
144
|
-
|
145
|
-
code += @scanner.matched
|
146
|
-
else
|
147
|
-
code += @scanner.getch
|
148
|
-
end
|
148
|
+
code += @scanner.getch
|
149
149
|
end
|
150
150
|
end
|
151
151
|
raise ParseError, "Unexpected code: #{code}."
|
@@ -166,9 +166,14 @@ module Lrama
|
|
166
166
|
end
|
167
167
|
end
|
168
168
|
|
169
|
+
def reset_first_position
|
170
|
+
@head_line = line
|
171
|
+
@head_column = column
|
172
|
+
end
|
173
|
+
|
169
174
|
def newline
|
170
175
|
@line += 1
|
171
|
-
@head = @scanner.pos
|
176
|
+
@head = @scanner.pos
|
172
177
|
end
|
173
178
|
end
|
174
179
|
end
|
data/lib/lrama/output.rb
CHANGED
@@ -352,9 +352,9 @@ module Lrama
|
|
352
352
|
# b4_percent_code_get
|
353
353
|
def percent_code(name)
|
354
354
|
@grammar.percent_codes.select do |percent_code|
|
355
|
-
percent_code.
|
355
|
+
percent_code.name == name
|
356
356
|
end.map do |percent_code|
|
357
|
-
percent_code.code
|
357
|
+
percent_code.code
|
358
358
|
end.join
|
359
359
|
end
|
360
360
|
|