lrama 0.5.11 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +2 -2
- data/Gemfile +1 -1
- data/LEGAL.md +1 -0
- data/NEWS.md +187 -0
- data/README.md +15 -4
- data/Steepfile +3 -0
- data/lib/lrama/grammar/code/printer_code.rb +1 -1
- data/lib/lrama/grammar/code/rule_action.rb +19 -3
- data/lib/lrama/grammar/code.rb +19 -7
- data/lib/lrama/grammar/parameterizing_rule.rb +6 -0
- data/lib/lrama/grammar/parameterizing_rule_builder.rb +34 -0
- data/lib/lrama/grammar/parameterizing_rule_resolver.rb +30 -0
- data/lib/lrama/grammar/parameterizing_rule_rhs_builder.rb +53 -0
- data/lib/lrama/grammar/rule_builder.rb +26 -22
- data/lib/lrama/grammar.rb +15 -41
- data/lib/lrama/lexer/grammar_file.rb +21 -0
- data/lib/lrama/lexer/location.rb +77 -2
- data/lib/lrama/lexer/token/instantiate_rule.rb +18 -0
- data/lib/lrama/lexer/token/user_code.rb +10 -10
- data/lib/lrama/lexer/token.rb +1 -1
- data/lib/lrama/lexer.rb +21 -11
- data/lib/lrama/parser.rb +619 -454
- data/lib/lrama/states_reporter.rb +1 -1
- data/lib/lrama/version.rb +1 -1
- data/parser.y +95 -30
- data/sig/lrama/grammar/code/printer_code.rbs +1 -1
- data/sig/lrama/grammar/code.rbs +5 -5
- data/sig/lrama/grammar/parameterizing_rule.rbs +10 -0
- data/sig/lrama/grammar/parameterizing_rule_builder.rbs +19 -0
- data/sig/lrama/grammar/parameterizing_rule_resolver.rbs +16 -0
- data/sig/lrama/grammar/parameterizing_rule_rhs_builder.rbs +18 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/base.rbs +5 -3
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_list.rbs +2 -0
- data/sig/lrama/grammar/parameterizing_rules/builder/separated_nonempty_list.rbs +2 -0
- data/sig/lrama/grammar/parameterizing_rules/builder.rbs +4 -3
- data/sig/lrama/grammar/rule_builder.rbs +2 -4
- data/sig/lrama/lexer/grammar_file.rbs +15 -0
- data/sig/lrama/lexer/location.rbs +13 -1
- data/sig/lrama/lexer/token/instantiate_rule.rbs +12 -0
- metadata +16 -6
- data/doc/TODO.md +0 -59
- data/lib/lrama/lexer/token/parameterizing.rb +0 -34
- data/sig/lrama/lexer/token/parameterizing.rbs +0 -17
data/lib/lrama/grammar.rb
CHANGED
@@ -8,6 +8,10 @@ require "lrama/grammar/printer"
|
|
8
8
|
require "lrama/grammar/reference"
|
9
9
|
require "lrama/grammar/rule"
|
10
10
|
require "lrama/grammar/rule_builder"
|
11
|
+
require "lrama/grammar/parameterizing_rule_builder"
|
12
|
+
require "lrama/grammar/parameterizing_rule_resolver"
|
13
|
+
require "lrama/grammar/parameterizing_rule_rhs_builder"
|
14
|
+
require "lrama/grammar/parameterizing_rule"
|
11
15
|
require "lrama/grammar/symbol"
|
12
16
|
require "lrama/grammar/type"
|
13
17
|
require "lrama/grammar/union"
|
@@ -36,6 +40,7 @@ module Lrama
|
|
36
40
|
@rule_builders = []
|
37
41
|
@rules = []
|
38
42
|
@sym_to_rules = {}
|
43
|
+
@parameterizing_resolver = ParameterizingRuleResolver.new
|
39
44
|
@empty_symbol = nil
|
40
45
|
@eof_symbol = nil
|
41
46
|
@error_symbol = nil
|
@@ -69,7 +74,7 @@ module Lrama
|
|
69
74
|
return sym
|
70
75
|
end
|
71
76
|
|
72
|
-
if sym = @symbols.find {|s| s.id == id }
|
77
|
+
if (sym = @symbols.find {|s| s.id == id })
|
73
78
|
return sym
|
74
79
|
end
|
75
80
|
|
@@ -129,6 +134,10 @@ module Lrama
|
|
129
134
|
@rule_builders << builder
|
130
135
|
end
|
131
136
|
|
137
|
+
def add_parameterizing_rule_builder(builder)
|
138
|
+
@parameterizing_resolver.add_parameterizing_rule_builder(builder)
|
139
|
+
end
|
140
|
+
|
132
141
|
def prologue_first_lineno=(prologue_first_lineno)
|
133
142
|
@aux.prologue_first_lineno = prologue_first_lineno
|
134
143
|
end
|
@@ -310,7 +319,7 @@ module Lrama
|
|
310
319
|
|
311
320
|
def setup_rules
|
312
321
|
@rule_builders.each do |builder|
|
313
|
-
builder.setup_rules
|
322
|
+
builder.setup_rules(@parameterizing_resolver)
|
314
323
|
end
|
315
324
|
end
|
316
325
|
|
@@ -350,56 +359,21 @@ module Lrama
|
|
350
359
|
@accept_symbol = term
|
351
360
|
end
|
352
361
|
|
353
|
-
# 1. Add $accept rule to the top of rules
|
354
|
-
# 2. Extract action in the middle of RHS into new Empty rule
|
355
|
-
# 3. Append id and extract action then create Rule
|
356
|
-
#
|
357
|
-
# Bison 3.8.2 uses different orders for symbol number and rule number
|
358
|
-
# when a rule has actions in the middle of a rule.
|
359
|
-
#
|
360
|
-
# For example,
|
361
|
-
#
|
362
|
-
# `program: $@1 top_compstmt`
|
363
|
-
#
|
364
|
-
# Rules are ordered like below,
|
365
|
-
#
|
366
|
-
# 1 $@1: ε
|
367
|
-
# 2 program: $@1 top_compstmt
|
368
|
-
#
|
369
|
-
# Symbols are ordered like below,
|
370
|
-
#
|
371
|
-
# 164 program
|
372
|
-
# 165 $@1
|
373
|
-
#
|
374
362
|
def normalize_rules
|
375
|
-
#
|
376
|
-
accept = @accept_symbol
|
377
|
-
eof = @eof_symbol
|
363
|
+
# Add $accept rule to the top of rules
|
378
364
|
lineno = @rule_builders.first ? @rule_builders.first.line : 0
|
379
|
-
@rules << Rule.new(id: @rule_counter.increment, _lhs:
|
365
|
+
@rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
|
380
366
|
|
381
367
|
setup_rules
|
382
368
|
|
383
369
|
@rule_builders.each do |builder|
|
384
|
-
# Extract actions in the middle of RHS into new rules.
|
385
|
-
builder.midrule_action_rules.each do |rule|
|
386
|
-
@rules << rule
|
387
|
-
end
|
388
|
-
|
389
370
|
builder.rules.each do |rule|
|
390
|
-
add_nterm(id: rule._lhs)
|
391
|
-
@rules << rule
|
392
|
-
end
|
393
|
-
|
394
|
-
builder.parameterizing_rules.each do |rule|
|
395
371
|
add_nterm(id: rule._lhs, tag: rule.lhs_tag)
|
396
372
|
@rules << rule
|
397
373
|
end
|
398
|
-
|
399
|
-
builder.midrule_action_rules.each do |rule|
|
400
|
-
add_nterm(id: rule._lhs)
|
401
|
-
end
|
402
374
|
end
|
375
|
+
|
376
|
+
@rules.sort_by!(&:id)
|
403
377
|
end
|
404
378
|
|
405
379
|
# Collect symbols from rules
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class GrammarFile
|
4
|
+
attr_reader :path, :text
|
5
|
+
|
6
|
+
def initialize(path, text)
|
7
|
+
@path = path
|
8
|
+
@text = text
|
9
|
+
end
|
10
|
+
|
11
|
+
def ==(other)
|
12
|
+
self.class == other.class &&
|
13
|
+
self.path == other.path
|
14
|
+
end
|
15
|
+
|
16
|
+
def lines
|
17
|
+
@lines ||= text.split("\n")
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/lrama/lexer/location.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Lrama
|
2
2
|
class Lexer
|
3
3
|
class Location
|
4
|
-
attr_reader :first_line, :first_column, :last_line, :last_column
|
4
|
+
attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
|
5
5
|
|
6
|
-
def initialize(first_line:, first_column:, last_line:, last_column:)
|
6
|
+
def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
|
7
|
+
@grammar_file = grammar_file
|
7
8
|
@first_line = first_line
|
8
9
|
@first_column = first_column
|
9
10
|
@last_line = last_line
|
@@ -12,11 +13,85 @@ module Lrama
|
|
12
13
|
|
13
14
|
def ==(other)
|
14
15
|
self.class == other.class &&
|
16
|
+
self.grammar_file == other.grammar_file &&
|
15
17
|
self.first_line == other.first_line &&
|
16
18
|
self.first_column == other.first_column &&
|
17
19
|
self.last_line == other.last_line &&
|
18
20
|
self.last_column == other.last_column
|
19
21
|
end
|
22
|
+
|
23
|
+
def partial_location(left, right)
|
24
|
+
offset = -first_column
|
25
|
+
new_first_line = -1
|
26
|
+
new_first_column = -1
|
27
|
+
new_last_line = -1
|
28
|
+
new_last_column = -1
|
29
|
+
|
30
|
+
_text.each.with_index do |line, index|
|
31
|
+
new_offset = offset + line.length + 1
|
32
|
+
|
33
|
+
if offset <= left && left <= new_offset
|
34
|
+
new_first_line = first_line + index
|
35
|
+
new_first_column = left - offset
|
36
|
+
end
|
37
|
+
|
38
|
+
if offset <= right && right <= new_offset
|
39
|
+
new_last_line = first_line + index
|
40
|
+
new_last_column = right - offset
|
41
|
+
end
|
42
|
+
|
43
|
+
offset = new_offset
|
44
|
+
end
|
45
|
+
|
46
|
+
Location.new(
|
47
|
+
grammar_file: grammar_file,
|
48
|
+
first_line: new_first_line, first_column: new_first_column,
|
49
|
+
last_line: new_last_line, last_column: new_last_column
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
def to_s
|
54
|
+
"#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
|
55
|
+
end
|
56
|
+
|
57
|
+
def generate_error_message(error_message)
|
58
|
+
<<~ERROR.chomp
|
59
|
+
#{path}:#{first_line}:#{first_column}: #{error_message}
|
60
|
+
#{line_with_carets}
|
61
|
+
ERROR
|
62
|
+
end
|
63
|
+
|
64
|
+
def line_with_carets
|
65
|
+
<<~TEXT
|
66
|
+
#{text}
|
67
|
+
#{carets}
|
68
|
+
TEXT
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def path
|
74
|
+
grammar_file.path
|
75
|
+
end
|
76
|
+
|
77
|
+
def blanks
|
78
|
+
(text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
|
79
|
+
end
|
80
|
+
|
81
|
+
def carets
|
82
|
+
blanks + '^' * (last_column - first_column)
|
83
|
+
end
|
84
|
+
|
85
|
+
def text
|
86
|
+
@text ||= _text.join("\n")
|
87
|
+
end
|
88
|
+
|
89
|
+
def _text
|
90
|
+
@_text ||=begin
|
91
|
+
range = (first_line - 1)...last_line
|
92
|
+
grammar_file.lines[range] or raise "#{range} is invalid"
|
93
|
+
end
|
94
|
+
end
|
20
95
|
end
|
21
96
|
end
|
22
97
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
class InstantiateRule < Token
|
5
|
+
attr_accessor :args
|
6
|
+
|
7
|
+
def initialize(s_value:, alias_name: nil, location: nil, args: [])
|
8
|
+
super s_value: s_value, alias_name: alias_name, location: location
|
9
|
+
@args = args
|
10
|
+
end
|
11
|
+
|
12
|
+
def rule_name
|
13
|
+
s_value
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -35,27 +35,27 @@ module Lrama
|
|
35
35
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
36
36
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
37
37
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
38
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos
|
38
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
|
39
39
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
40
40
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
41
|
-
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos
|
41
|
+
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
|
42
42
|
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
43
43
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
44
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
45
|
-
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>
|
44
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
45
|
+
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
|
46
46
|
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
47
|
-
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos
|
47
|
+
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
48
48
|
|
49
49
|
# @ references
|
50
50
|
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
51
51
|
when scanner.scan(/@\$/) # @$
|
52
|
-
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos
|
52
|
+
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
|
53
53
|
when scanner.scan(/@(\d+)/) # @1
|
54
|
-
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos
|
54
|
+
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
55
55
|
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
56
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
57
|
-
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
58
|
-
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos
|
56
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
57
|
+
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
|
58
|
+
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
data/lib/lrama/lexer/token.rb
CHANGED
data/lib/lrama/lexer.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require "strscan"
|
2
|
+
require "lrama/lexer/grammar_file"
|
2
3
|
require "lrama/lexer/location"
|
3
4
|
require "lrama/lexer/token"
|
4
5
|
|
@@ -28,10 +29,12 @@ module Lrama
|
|
28
29
|
%error-token
|
29
30
|
%empty
|
30
31
|
%code
|
32
|
+
%rule
|
31
33
|
)
|
32
34
|
|
33
|
-
def initialize(
|
34
|
-
@
|
35
|
+
def initialize(grammar_file)
|
36
|
+
@grammar_file = grammar_file
|
37
|
+
@scanner = StringScanner.new(grammar_file.text)
|
35
38
|
@head_column = @head = @scanner.pos
|
36
39
|
@head_line = @line = 1
|
37
40
|
@status = :initial
|
@@ -57,8 +60,9 @@ module Lrama
|
|
57
60
|
|
58
61
|
def location
|
59
62
|
Location.new(
|
63
|
+
grammar_file: @grammar_file,
|
60
64
|
first_line: @head_line, first_column: @head_column,
|
61
|
-
last_line:
|
65
|
+
last_line: line, last_column: column
|
62
66
|
)
|
63
67
|
end
|
64
68
|
|
@@ -78,8 +82,7 @@ module Lrama
|
|
78
82
|
end
|
79
83
|
end
|
80
84
|
|
81
|
-
|
82
|
-
@head_column = column
|
85
|
+
reset_first_position
|
83
86
|
|
84
87
|
case
|
85
88
|
when @scanner.eos?
|
@@ -117,6 +120,8 @@ module Lrama
|
|
117
120
|
def lex_c_code
|
118
121
|
nested = 0
|
119
122
|
code = ''
|
123
|
+
reset_first_position
|
124
|
+
|
120
125
|
while !@scanner.eos? do
|
121
126
|
case
|
122
127
|
when @scanner.scan(/{/)
|
@@ -140,12 +145,12 @@ module Lrama
|
|
140
145
|
@line += @scanner.matched.count("\n")
|
141
146
|
when @scanner.scan(/'.*?'/)
|
142
147
|
code += %Q(#{@scanner.matched})
|
148
|
+
when @scanner.scan(/[^\"'\{\}\n]+/)
|
149
|
+
code += @scanner.matched
|
150
|
+
when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
|
151
|
+
code += @scanner.matched
|
143
152
|
else
|
144
|
-
|
145
|
-
code += @scanner.matched
|
146
|
-
else
|
147
|
-
code += @scanner.getch
|
148
|
-
end
|
153
|
+
code += @scanner.getch
|
149
154
|
end
|
150
155
|
end
|
151
156
|
raise ParseError, "Unexpected code: #{code}."
|
@@ -166,9 +171,14 @@ module Lrama
|
|
166
171
|
end
|
167
172
|
end
|
168
173
|
|
174
|
+
def reset_first_position
|
175
|
+
@head_line = line
|
176
|
+
@head_column = column
|
177
|
+
end
|
178
|
+
|
169
179
|
def newline
|
170
180
|
@line += 1
|
171
|
-
@head = @scanner.pos
|
181
|
+
@head = @scanner.pos
|
172
182
|
end
|
173
183
|
end
|
174
184
|
end
|