lrama 0.5.11 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +2 -2
- data/Gemfile +1 -1
- data/LEGAL.md +1 -0
- data/NEWS.md +187 -0
- data/README.md +15 -4
- data/Steepfile +3 -0
- data/lib/lrama/grammar/code/printer_code.rb +1 -1
- data/lib/lrama/grammar/code/rule_action.rb +19 -3
- data/lib/lrama/grammar/code.rb +19 -7
- data/lib/lrama/grammar/parameterizing_rule.rb +6 -0
- data/lib/lrama/grammar/parameterizing_rule_builder.rb +34 -0
- data/lib/lrama/grammar/parameterizing_rule_resolver.rb +30 -0
- data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +53 -0
- data/lib/lrama/grammar/rule_builder.rb +26 -22
- data/lib/lrama/grammar.rb +15 -41
- data/lib/lrama/lexer/grammar_file.rb +21 -0
- data/lib/lrama/lexer/location.rb +77 -2
- data/lib/lrama/lexer/token/instantiate_rule.rb +18 -0
- data/lib/lrama/lexer/token/user_code.rb +10 -10
- data/lib/lrama/lexer/token.rb +1 -1
- data/lib/lrama/lexer.rb +21 -11
- data/lib/lrama/parser.rb +619 -454
- data/lib/lrama/states_reporter.rb +1 -1
- data/lib/lrama/version.rb +1 -1
- data/parser.y +95 -30
- data/sig/lrama/grammar/code/printer_code.rbs +1 -1
- data/sig/lrama/grammar/code.rbs +5 -5
- data/sig/lrama/grammar/parameterizing_rule.rbs +10 -0
- data/sig/lrama/grammar/parameterizing_rule_builder.rbs +19 -0
- data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +16 -0
- data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +18 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +5 -3
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +2 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +2 -0
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +4 -3
- data/sig/lrama/grammar/rule_builder.rbs +2 -4
- data/sig/lrama/lexer/grammar_file.rbs +15 -0
- data/sig/lrama/lexer/location.rbs +13 -1
- data/sig/lrama/lexer/token/instantiate_rule.rbs +12 -0
- metadata +16 -6
- data/doc/TODO.md +0 -59
- data/lib/lrama/lexer/token/parameterizing.rb +0 -34
- data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
data/lib/lrama/grammar.rb
CHANGED
@@ -8,6 +8,10 @@ require "lrama/grammar/printer"
|
|
8
8
|
require "lrama/grammar/reference"
|
9
9
|
require "lrama/grammar/rule"
|
10
10
|
require "lrama/grammar/rule_builder"
|
11
|
+
require "lrama/grammar/parameterizing_rule_builder"
|
12
|
+
require "lrama/grammar/parameterizing_rule_resolver"
|
13
|
+
require "lrama/grammar/parameterizing_rule_rhs_builder"
|
14
|
+
require "lrama/grammar/parameterizing_rule"
|
11
15
|
require "lrama/grammar/symbol"
|
12
16
|
require "lrama/grammar/type"
|
13
17
|
require "lrama/grammar/union"
|
@@ -36,6 +40,7 @@ module Lrama
|
|
36
40
|
@rule_builders = []
|
37
41
|
@rules = []
|
38
42
|
@sym_to_rules = {}
|
43
|
+
@parameterizing_resolver = ParameterizingRuleResolver.new
|
39
44
|
@empty_symbol = nil
|
40
45
|
@eof_symbol = nil
|
41
46
|
@error_symbol = nil
|
@@ -69,7 +74,7 @@ module Lrama
|
|
69
74
|
return sym
|
70
75
|
end
|
71
76
|
|
72
|
-
if sym = @symbols.find {|s| s.id == id }
|
77
|
+
if (sym = @symbols.find {|s| s.id == id })
|
73
78
|
return sym
|
74
79
|
end
|
75
80
|
|
@@ -129,6 +134,10 @@ module Lrama
|
|
129
134
|
@rule_builders << builder
|
130
135
|
end
|
131
136
|
|
137
|
+
def add_parameterizing_rule_builder(builder)
|
138
|
+
@parameterizing_resolver.add_parameterizing_rule_builder(builder)
|
139
|
+
end
|
140
|
+
|
132
141
|
def prologue_first_lineno=(prologue_first_lineno)
|
133
142
|
@aux.prologue_first_lineno = prologue_first_lineno
|
134
143
|
end
|
@@ -310,7 +319,7 @@ module Lrama
|
|
310
319
|
|
311
320
|
def setup_rules
|
312
321
|
@rule_builders.each do |builder|
|
313
|
-
builder.setup_rules
|
322
|
+
builder.setup_rules(@parameterizing_resolver)
|
314
323
|
end
|
315
324
|
end
|
316
325
|
|
@@ -350,56 +359,21 @@ module Lrama
|
|
350
359
|
@accept_symbol = term
|
351
360
|
end
|
352
361
|
|
353
|
-
# 1. Add $accept rule to the top of rules
|
354
|
-
# 2. Extract action in the middle of RHS into new Empty rule
|
355
|
-
# 3. Append id and extract action then create Rule
|
356
|
-
#
|
357
|
-
# Bison 3.8.2 uses different orders for symbol number and rule number
|
358
|
-
# when a rule has actions in the middle of a rule.
|
359
|
-
#
|
360
|
-
# For example,
|
361
|
-
#
|
362
|
-
# `program: $@1 top_compstmt`
|
363
|
-
#
|
364
|
-
# Rules are ordered like below,
|
365
|
-
#
|
366
|
-
# 1 $@1: ε
|
367
|
-
# 2 program: $@1 top_compstmt
|
368
|
-
#
|
369
|
-
# Symbols are ordered like below,
|
370
|
-
#
|
371
|
-
# 164 program
|
372
|
-
# 165 $@1
|
373
|
-
#
|
374
362
|
def normalize_rules
|
375
|
-
#
|
376
|
-
accept = @accept_symbol
|
377
|
-
eof = @eof_symbol
|
363
|
+
# Add $accept rule to the top of rules
|
378
364
|
lineno = @rule_builders.first ? @rule_builders.first.line : 0
|
379
|
-
@rules << Rule.new(id: @rule_counter.increment, _lhs:
|
365
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
|
380
366
|
|
381
367
|
setup_rules
|
382
368
|
|
383
369
|
@rule_builders.each do |builder|
|
384
|
-
# Extract actions in the middle of RHS into new rules.
|
385
|
-
builder.midrule_action_rules.each do |rule|
|
386
|
-
@rules << rule
|
387
|
-
end
|
388
|
-
|
389
370
|
builder.rules.each do |rule|
|
390
|
-
add_nterm(id: rule._lhs)
|
391
|
-
@rules << rule
|
392
|
-
end
|
393
|
-
|
394
|
-
builder.parameterizing_rules.each do |rule|
|
395
371
|
add_nterm(id: rule._lhs, tag: rule.lhs_tag)
|
396
372
|
@rules << rule
|
397
373
|
end
|
398
|
-
|
399
|
-
builder.midrule_action_rules.each do |rule|
|
400
|
-
add_nterm(id: rule._lhs)
|
401
|
-
end
|
402
374
|
end
|
375
|
+
|
376
|
+
@rules.sort_by!(&:id)
|
403
377
|
end
|
404
378
|
|
405
379
|
# Collect symbols from rules
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class GrammarFile
|
4
|
+
attr_reader :path, :text
|
5
|
+
|
6
|
+
def initialize(path, text)
|
7
|
+
@path = path
|
8
|
+
@text = text
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
self.class == other.class &&
|
13
|
+
self.path == other.path
|
14
|
+
end
|
15
|
+
|
16
|
+
def lines
|
17
|
+
@lines ||= text.split("\n")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/lrama/lexer/location.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Lrama
|
2
2
|
class Lexer
|
3
3
|
class Location
|
4
|
-
attr_reader :first_line, :first_column, :last_line, :last_column
|
4
|
+
attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
|
5
5
|
|
6
|
-
def initialize(first_line:, first_column:, last_line:, last_column:)
|
6
|
+
def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
|
7
|
+
@grammar_file = grammar_file
|
7
8
|
@first_line = first_line
|
8
9
|
@first_column = first_column
|
9
10
|
@last_line = last_line
|
@@ -12,11 +13,85 @@ module Lrama
|
|
12
13
|
|
13
14
|
def ==(other)
|
14
15
|
self.class == other.class &&
|
16
|
+
self.grammar_file == other.grammar_file &&
|
15
17
|
self.first_line == other.first_line &&
|
16
18
|
self.first_column == other.first_column &&
|
17
19
|
self.last_line == other.last_line &&
|
18
20
|
self.last_column == other.last_column
|
19
21
|
end
|
22
|
+
|
23
|
+
def partial_location(left, right)
|
24
|
+
offset = -first_column
|
25
|
+
new_first_line = -1
|
26
|
+
new_first_column = -1
|
27
|
+
new_last_line = -1
|
28
|
+
new_last_column = -1
|
29
|
+
|
30
|
+
_text.each.with_index do |line, index|
|
31
|
+
new_offset = offset + line.length + 1
|
32
|
+
|
33
|
+
if offset <= left && left <= new_offset
|
34
|
+
new_first_line = first_line + index
|
35
|
+
new_first_column = left - offset
|
36
|
+
end
|
37
|
+
|
38
|
+
if offset <= right && right <= new_offset
|
39
|
+
new_last_line = first_line + index
|
40
|
+
new_last_column = right - offset
|
41
|
+
end
|
42
|
+
|
43
|
+
offset = new_offset
|
44
|
+
end
|
45
|
+
|
46
|
+
Location.new(
|
47
|
+
grammar_file: grammar_file,
|
48
|
+
first_line: new_first_line, first_column: new_first_column,
|
49
|
+
last_line: new_last_line, last_column: new_last_column
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
"#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_error_message(error_message)
|
58
|
+
<<~ERROR.chomp
|
59
|
+
#{path}:#{first_line}:#{first_column}: #{error_message}
|
60
|
+
#{line_with_carets}
|
61
|
+
ERROR
|
62
|
+
end
|
63
|
+
|
64
|
+
def line_with_carets
|
65
|
+
<<~TEXT
|
66
|
+
#{text}
|
67
|
+
#{carets}
|
68
|
+
TEXT
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def path
|
74
|
+
grammar_file.path
|
75
|
+
end
|
76
|
+
|
77
|
+
def blanks
|
78
|
+
(text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
|
79
|
+
end
|
80
|
+
|
81
|
+
def carets
|
82
|
+
blanks + '^' * (last_column - first_column)
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
@text ||= _text.join("\n")
|
87
|
+
end
|
88
|
+
|
89
|
+
def _text
|
90
|
+
@_text ||=begin
|
91
|
+
range = (first_line - 1)...last_line
|
92
|
+
grammar_file.lines[range] or raise "#{range} is invalid"
|
93
|
+
end
|
94
|
+
end
|
20
95
|
end
|
21
96
|
end
|
22
97
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
class InstantiateRule < Token
|
5
|
+
attr_accessor :args
|
6
|
+
|
7
|
+
def initialize(s_value:, alias_name: nil, location: nil, args: [])
|
8
|
+
super s_value: s_value, alias_name: alias_name, location: location
|
9
|
+
@args = args
|
10
|
+
end
|
11
|
+
|
12
|
+
def rule_name
|
13
|
+
s_value
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -35,27 +35,27 @@ module Lrama
|
|
35
35
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
36
36
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
37
37
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
38
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos
|
38
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
|
39
39
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
40
40
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
41
|
-
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos
|
41
|
+
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
|
42
42
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
43
43
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
44
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
45
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>
|
44
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
45
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
|
46
46
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
47
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
47
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
48
48
|
|
49
49
|
# @ references
|
50
50
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
51
51
|
when scanner.scan(/@\$/) # @$
|
52
|
-
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos
|
52
|
+
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
|
53
53
|
when scanner.scan(/@(\d+)/) # @1
|
54
|
-
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos
|
54
|
+
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
55
55
|
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
56
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
57
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
58
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
56
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
57
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
|
58
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
data/lib/lrama/lexer/token.rb
CHANGED
data/lib/lrama/lexer.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "strscan"
|
2
|
+
require "lrama/lexer/grammar_file"
|
2
3
|
require "lrama/lexer/location"
|
3
4
|
require "lrama/lexer/token"
|
4
5
|
|
@@ -28,10 +29,12 @@ module Lrama
|
|
28
29
|
%error-token
|
29
30
|
%empty
|
30
31
|
%code
|
32
|
+
%rule
|
31
33
|
)
|
32
34
|
|
33
|
-
def initialize(
|
34
|
-
@
|
35
|
+
def initialize(grammar_file)
|
36
|
+
@grammar_file = grammar_file
|
37
|
+
@scanner = StringScanner.new(grammar_file.text)
|
35
38
|
@head_column = @head = @scanner.pos
|
36
39
|
@head_line = @line = 1
|
37
40
|
@status = :initial
|
@@ -57,8 +60,9 @@ module Lrama
|
|
57
60
|
|
58
61
|
def location
|
59
62
|
Location.new(
|
63
|
+
grammar_file: @grammar_file,
|
60
64
|
first_line: @head_line, first_column: @head_column,
|
61
|
-
last_line:
|
65
|
+
last_line: line, last_column: column
|
62
66
|
)
|
63
67
|
end
|
64
68
|
|
@@ -78,8 +82,7 @@ module Lrama
|
|
78
82
|
end
|
79
83
|
end
|
80
84
|
|
81
|
-
|
82
|
-
@head_column = column
|
85
|
+
reset_first_position
|
83
86
|
|
84
87
|
case
|
85
88
|
when @scanner.eos?
|
@@ -117,6 +120,8 @@ module Lrama
|
|
117
120
|
def lex_c_code
|
118
121
|
nested = 0
|
119
122
|
code = ''
|
123
|
+
reset_first_position
|
124
|
+
|
120
125
|
while !@scanner.eos? do
|
121
126
|
case
|
122
127
|
when @scanner.scan(/{/)
|
@@ -140,12 +145,12 @@ module Lrama
|
|
140
145
|
@line += @scanner.matched.count("\n")
|
141
146
|
when @scanner.scan(/'.*?'/)
|
142
147
|
code += %Q(#{@scanner.matched})
|
148
|
+
when @scanner.scan(/[^\"'\{\}\n]+/)
|
149
|
+
code += @scanner.matched
|
150
|
+
when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
|
151
|
+
code += @scanner.matched
|
143
152
|
else
|
144
|
-
|
145
|
-
code += @scanner.matched
|
146
|
-
else
|
147
|
-
code += @scanner.getch
|
148
|
-
end
|
153
|
+
code += @scanner.getch
|
149
154
|
end
|
150
155
|
end
|
151
156
|
raise ParseError, "Unexpected code: #{code}."
|
@@ -166,9 +171,14 @@ module Lrama
|
|
166
171
|
end
|
167
172
|
end
|
168
173
|
|
174
|
+
def reset_first_position
|
175
|
+
@head_line = line
|
176
|
+
@head_column = column
|
177
|
+
end
|
178
|
+
|
169
179
|
def newline
|
170
180
|
@line += 1
|
171
|
-
@head = @scanner.pos
|
181
|
+
@head = @scanner.pos
|
172
182
|
end
|
173
183
|
end
|
174
184
|
end
|