lrama 0.5.8 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +2 -7
- data/Steepfile +9 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +100 -55
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +19 -0
- data/lib/lrama/lexer/token/tag.rb +8 -0
- data/lib/lrama/lexer/token/user_code.rb +14 -0
- data/lib/lrama/lexer/token.rb +9 -67
- data/lib/lrama/lexer.rb +14 -15
- data/lib/lrama/option_parser.rb +2 -1
- data/lib/lrama/options.rb +2 -1
- data/lib/lrama/output.rb +9 -0
- data/lib/lrama/parser.rb +500 -458
- data/lib/lrama/version.rb +1 -1
- data/parser.y +97 -73
- data/rbs_collection.lock.yaml +13 -1
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +8 -0
- data/sig/lrama/lexer/token/tag.rbs +8 -0
- data/sig/lrama/lexer/token/user_code.rbs +9 -0
- data/sig/lrama/lexer/token.rbs +17 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +0 -2
- metadata +17 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
data/lib/lrama/version.rb
CHANGED
data/parser.y
CHANGED
@@ -1,6 +1,10 @@
|
|
1
1
|
class Lrama::Parser
|
2
|
+
expect 7
|
3
|
+
|
2
4
|
token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
|
5
|
+
|
3
6
|
rule
|
7
|
+
|
4
8
|
input: prologue_declarations bison_declarations "%%" grammar epilogue_opt
|
5
9
|
|
6
10
|
prologue_declarations: # empty
|
@@ -8,14 +12,12 @@ rule
|
|
8
12
|
|
9
13
|
prologue_declaration: "%{"
|
10
14
|
{
|
11
|
-
|
12
|
-
@lexer.end_symbol = '%}'
|
15
|
+
begin_c_declaration("%}")
|
13
16
|
@grammar.prologue_first_lineno = @lexer.line
|
14
17
|
}
|
15
18
|
C_DECLARATION
|
16
19
|
{
|
17
|
-
|
18
|
-
@lexer.end_symbol = nil
|
20
|
+
end_c_declaration
|
19
21
|
}
|
20
22
|
"%}"
|
21
23
|
{
|
@@ -45,15 +47,25 @@ rule
|
|
45
47
|
@grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
|
46
48
|
}
|
47
49
|
}
|
50
|
+
| "%code" IDENTIFIER "{"
|
51
|
+
{
|
52
|
+
begin_c_declaration("}")
|
53
|
+
}
|
54
|
+
C_DECLARATION
|
55
|
+
{
|
56
|
+
end_c_declaration
|
57
|
+
}
|
58
|
+
"}"
|
59
|
+
{
|
60
|
+
@grammar.add_percent_code(id: val[1], code: val[4])
|
61
|
+
}
|
48
62
|
| "%initial-action" "{"
|
49
63
|
{
|
50
|
-
|
51
|
-
@lexer.end_symbol = '}'
|
64
|
+
begin_c_declaration("}")
|
52
65
|
}
|
53
66
|
C_DECLARATION
|
54
67
|
{
|
55
|
-
|
56
|
-
@lexer.end_symbol = nil
|
68
|
+
end_c_declaration
|
57
69
|
}
|
58
70
|
"}"
|
59
71
|
{
|
@@ -63,13 +75,11 @@ rule
|
|
63
75
|
|
64
76
|
grammar_declaration: "%union" "{"
|
65
77
|
{
|
66
|
-
|
67
|
-
@lexer.end_symbol = '}'
|
78
|
+
begin_c_declaration("}")
|
68
79
|
}
|
69
80
|
C_DECLARATION
|
70
81
|
{
|
71
|
-
|
72
|
-
@lexer.end_symbol = nil
|
82
|
+
end_c_declaration
|
73
83
|
}
|
74
84
|
"}"
|
75
85
|
{
|
@@ -78,24 +88,20 @@ rule
|
|
78
88
|
| symbol_declaration
|
79
89
|
| "%destructor" "{"
|
80
90
|
{
|
81
|
-
|
82
|
-
@lexer.end_symbol = '}'
|
91
|
+
begin_c_declaration("}")
|
83
92
|
}
|
84
93
|
C_DECLARATION
|
85
94
|
{
|
86
|
-
|
87
|
-
@lexer.end_symbol = nil
|
95
|
+
end_c_declaration
|
88
96
|
}
|
89
|
-
|
97
|
+
"}" generic_symlist
|
90
98
|
| "%printer" "{"
|
91
99
|
{
|
92
|
-
|
93
|
-
@lexer.end_symbol = '}'
|
100
|
+
begin_c_declaration("}")
|
94
101
|
}
|
95
102
|
C_DECLARATION
|
96
103
|
{
|
97
|
-
|
98
|
-
@lexer.end_symbol = nil
|
104
|
+
end_c_declaration
|
99
105
|
}
|
100
106
|
"}" generic_symlist
|
101
107
|
{
|
@@ -103,13 +109,11 @@ rule
|
|
103
109
|
}
|
104
110
|
| "%error-token" "{"
|
105
111
|
{
|
106
|
-
|
107
|
-
@lexer.end_symbol = '}'
|
112
|
+
begin_c_declaration("}")
|
108
113
|
}
|
109
114
|
C_DECLARATION
|
110
115
|
{
|
111
|
-
|
112
|
-
@lexer.end_symbol = nil
|
116
|
+
end_c_declaration
|
113
117
|
}
|
114
118
|
"}" generic_symlist
|
115
119
|
{
|
@@ -217,13 +221,11 @@ rule
|
|
217
221
|
|
218
222
|
params: params "{"
|
219
223
|
{
|
220
|
-
|
221
|
-
@lexer.end_symbol = '}'
|
224
|
+
begin_c_declaration("}")
|
222
225
|
}
|
223
226
|
C_DECLARATION
|
224
227
|
{
|
225
|
-
|
226
|
-
@lexer.end_symbol = nil
|
228
|
+
end_c_declaration
|
227
229
|
}
|
228
230
|
"}"
|
229
231
|
{
|
@@ -231,13 +233,11 @@ rule
|
|
231
233
|
}
|
232
234
|
| "{"
|
233
235
|
{
|
234
|
-
|
235
|
-
@lexer.end_symbol = '}'
|
236
|
+
begin_c_declaration("}")
|
236
237
|
}
|
237
238
|
C_DECLARATION
|
238
239
|
{
|
239
|
-
|
240
|
-
@lexer.end_symbol = nil
|
240
|
+
end_c_declaration
|
241
241
|
}
|
242
242
|
"}"
|
243
243
|
{
|
@@ -274,7 +274,7 @@ rule
|
|
274
274
|
rules: id_colon named_ref_opt ":" rhs_list
|
275
275
|
{
|
276
276
|
lhs = val[0]
|
277
|
-
lhs.
|
277
|
+
lhs.alias_name = val[1]
|
278
278
|
val[3].each {|hash|
|
279
279
|
@grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
|
280
280
|
}
|
@@ -292,56 +292,53 @@ rule
|
|
292
292
|
|
293
293
|
rhs: /* empty */
|
294
294
|
{
|
295
|
+
reset_precs
|
296
|
+
result = []
|
297
|
+
}
|
298
|
+
| "%empty"
|
299
|
+
{
|
300
|
+
reset_precs
|
295
301
|
result = []
|
296
|
-
@prec_seen = false
|
297
|
-
@code_after_prec = false
|
298
302
|
}
|
299
303
|
| rhs symbol named_ref_opt
|
300
304
|
{
|
301
305
|
token = val[1]
|
302
|
-
token.
|
306
|
+
token.alias_name = val[2]
|
303
307
|
result = val[0].append(token)
|
304
308
|
}
|
309
|
+
| rhs "?"
|
310
|
+
{
|
311
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
312
|
+
result = val[0].append(token)
|
313
|
+
}
|
314
|
+
| rhs "+"
|
315
|
+
{
|
316
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
317
|
+
result = val[0].append(token)
|
318
|
+
}
|
319
|
+
| rhs "*"
|
320
|
+
{
|
321
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
322
|
+
result = val[0].append(token)
|
323
|
+
}
|
305
324
|
| rhs "{"
|
306
325
|
{
|
307
326
|
if @prec_seen
|
308
327
|
raise "Multiple User_code after %prec" if @code_after_prec
|
309
328
|
@code_after_prec = true
|
310
329
|
end
|
311
|
-
|
312
|
-
@lexer.end_symbol = '}'
|
330
|
+
begin_c_declaration("}")
|
313
331
|
}
|
314
332
|
C_DECLARATION
|
315
333
|
{
|
316
|
-
|
317
|
-
@lexer.end_symbol = nil
|
334
|
+
end_c_declaration
|
318
335
|
}
|
319
336
|
"}" named_ref_opt
|
320
337
|
{
|
321
338
|
token = val[3]
|
322
|
-
token.
|
339
|
+
token.alias_name = val[6]
|
323
340
|
result = val[0].append(token)
|
324
341
|
}
|
325
|
-
| "{"
|
326
|
-
{
|
327
|
-
if @prec_seen
|
328
|
-
raise "Multiple User_code after %prec" if @code_after_prec
|
329
|
-
@code_after_prec = true
|
330
|
-
end
|
331
|
-
@lexer.status = :c_declaration
|
332
|
-
@lexer.end_symbol = '}'
|
333
|
-
}
|
334
|
-
C_DECLARATION
|
335
|
-
{
|
336
|
-
@lexer.status = :initial
|
337
|
-
@lexer.end_symbol = nil
|
338
|
-
}
|
339
|
-
"}" named_ref_opt
|
340
|
-
{
|
341
|
-
token = val[2]
|
342
|
-
token.alias = val[5]
|
343
|
-
result = [token]
|
344
|
-
}
|
345
342
|
| rhs "%prec" symbol
|
346
343
|
{
|
347
344
|
sym = @grammar.find_symbol_by_id!(val[2])
|
@@ -357,14 +354,12 @@ rule
|
|
357
354
|
epilogue_opt: # empty
|
358
355
|
| "%%"
|
359
356
|
{
|
360
|
-
|
361
|
-
@lexer.end_symbol = '\Z'
|
357
|
+
begin_c_declaration('\Z')
|
362
358
|
@grammar.epilogue_first_lineno = @lexer.line + 1
|
363
359
|
}
|
364
360
|
C_DECLARATION
|
365
361
|
{
|
366
|
-
|
367
|
-
@lexer.end_symbol = nil
|
362
|
+
end_c_declaration
|
368
363
|
@grammar.epilogue = val[2].s_value
|
369
364
|
}
|
370
365
|
|
@@ -381,16 +376,17 @@ rule
|
|
381
376
|
generic_symlist_item: symbol
|
382
377
|
| TAG
|
383
378
|
|
384
|
-
string_as_id: STRING { result = Lrama::Lexer::Token.new(
|
379
|
+
string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) }
|
385
380
|
end
|
386
381
|
|
387
382
|
---- inner
|
388
383
|
|
389
384
|
include Lrama::Report::Duration
|
390
385
|
|
391
|
-
def initialize(text, path)
|
386
|
+
def initialize(text, path, debug = false)
|
392
387
|
@text = text
|
393
388
|
@path = path
|
389
|
+
@yydebug = debug
|
394
390
|
end
|
395
391
|
|
396
392
|
def parse
|
@@ -398,8 +394,8 @@ def parse
|
|
398
394
|
@lexer = Lrama::Lexer.new(@text)
|
399
395
|
@grammar = Lrama::Grammar.new
|
400
396
|
@precedence_number = 0
|
397
|
+
reset_precs
|
401
398
|
do_parse
|
402
|
-
@grammar.extract_references
|
403
399
|
@grammar.prepare
|
404
400
|
@grammar.compute_nullable
|
405
401
|
@grammar.compute_first_set
|
@@ -413,10 +409,38 @@ def next_token
|
|
413
409
|
end
|
414
410
|
|
415
411
|
def on_error(error_token_id, error_value, value_stack)
|
416
|
-
|
412
|
+
if error_value.respond_to?(:line) && error_value.respond_to?(:column)
|
413
|
+
line = error_value.line
|
414
|
+
first_column = error_value.column
|
415
|
+
else
|
416
|
+
line = @lexer.line
|
417
|
+
first_column = @lexer.head_column
|
418
|
+
end
|
419
|
+
|
417
420
|
raise ParseError, <<~ERROR
|
418
|
-
#{@path}:#{
|
419
|
-
#{
|
420
|
-
#{
|
421
|
+
#{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
|
422
|
+
#{@text.split("\n")[line - 1]}
|
423
|
+
#{carrets(first_column)}
|
421
424
|
ERROR
|
422
425
|
end
|
426
|
+
|
427
|
+
private
|
428
|
+
|
429
|
+
def reset_precs
|
430
|
+
@prec_seen = false
|
431
|
+
@code_after_prec = false
|
432
|
+
end
|
433
|
+
|
434
|
+
def begin_c_declaration(end_symbol)
|
435
|
+
@lexer.status = :c_declaration
|
436
|
+
@lexer.end_symbol = end_symbol
|
437
|
+
end
|
438
|
+
|
439
|
+
def end_c_declaration
|
440
|
+
@lexer.status = :initial
|
441
|
+
@lexer.end_symbol = nil
|
442
|
+
end
|
443
|
+
|
444
|
+
def carrets(first_column)
|
445
|
+
' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
|
446
|
+
end
|
data/rbs_collection.lock.yaml
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
sources:
|
3
3
|
- type: git
|
4
4
|
name: ruby/gem_rbs_collection
|
5
|
-
revision:
|
5
|
+
revision: 2de2d4535caba275f3b8533684aab110d921f553
|
6
6
|
remote: https://github.com/ruby/gem_rbs_collection.git
|
7
7
|
repo_dir: gems
|
8
8
|
path: ".gem_rbs_collection"
|
@@ -11,6 +11,18 @@ gems:
|
|
11
11
|
version: '0'
|
12
12
|
source:
|
13
13
|
type: stdlib
|
14
|
+
- name: fileutils
|
15
|
+
version: '0'
|
16
|
+
source:
|
17
|
+
type: stdlib
|
18
|
+
- name: rake
|
19
|
+
version: '13.0'
|
20
|
+
source:
|
21
|
+
type: git
|
22
|
+
name: ruby/gem_rbs_collection
|
23
|
+
revision: 2de2d4535caba275f3b8533684aab110d921f553
|
24
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
25
|
+
repo_dir: gems
|
14
26
|
- name: stackprof
|
15
27
|
version: '0.2'
|
16
28
|
source:
|
data/sample/calc.y
CHANGED
data/sample/parse.y
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Grammar
|
3
|
+
class Reference
|
4
|
+
# TODO: Replace untyped referring_symbol with (Grammar::Symbol|Lexer::Token)
|
5
|
+
attr_accessor type: Symbol
|
6
|
+
attr_accessor value: (String|Integer)
|
7
|
+
attr_accessor ex_tag: Lexer::Token?
|
8
|
+
attr_accessor first_column: Integer
|
9
|
+
attr_accessor last_column: Integer
|
10
|
+
attr_accessor referring_symbol: untyped
|
11
|
+
attr_accessor position_in_rhs: Integer?
|
12
|
+
|
13
|
+
def initialize: (
|
14
|
+
type: Symbol, value: (String|Integer), ex_tag: Lexer::Token?,
|
15
|
+
first_column: Integer, last_column: Integer,
|
16
|
+
referring_symbol: untyped, position_in_rhs: Integer?
|
17
|
+
) -> void
|
18
|
+
|
19
|
+
def tag: () -> untyped
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
attr_accessor s_value: String
|
5
|
+
attr_accessor alias_name: String
|
6
|
+
attr_accessor line: Integer
|
7
|
+
attr_accessor column: Integer
|
8
|
+
attr_accessor referred: bool
|
9
|
+
|
10
|
+
def initialize: (?s_value: String, ?alias_name: String) -> void
|
11
|
+
|
12
|
+
def to_s: () -> String
|
13
|
+
def referred_by?: (String string) -> bool
|
14
|
+
def ==: (Token other) -> bool
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/template/bison/_yacc.h
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
#if YYDEBUG && !defined(yydebug)
|
13
13
|
extern int yydebug;
|
14
14
|
#endif
|
15
|
-
|
15
|
+
<%= output.percent_code("requires") %>
|
16
16
|
|
17
17
|
<%-# b4_token_enums_defines -%>
|
18
18
|
/* Token kinds. */
|
@@ -64,7 +64,7 @@ struct YYLTYPE
|
|
64
64
|
int yyparse (<%= output.parse_param %>);
|
65
65
|
|
66
66
|
|
67
|
-
|
67
|
+
<%= output.percent_code("provides") %>
|
68
68
|
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
|
69
69
|
<%- if output.spec_mapped_header_file -%>
|
70
70
|
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
|
data/template/bison/yacc.c
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lrama
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuichiro Kaneko
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: LALR (1) parser generator written by Ruby
|
14
14
|
email:
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- lib/lrama/grammar/auxiliary.rb
|
48
48
|
- lib/lrama/grammar/code.rb
|
49
49
|
- lib/lrama/grammar/error_token.rb
|
50
|
+
- lib/lrama/grammar/percent_code.rb
|
50
51
|
- lib/lrama/grammar/precedence.rb
|
51
52
|
- lib/lrama/grammar/printer.rb
|
52
53
|
- lib/lrama/grammar/reference.rb
|
@@ -55,7 +56,11 @@ files:
|
|
55
56
|
- lib/lrama/grammar/union.rb
|
56
57
|
- lib/lrama/lexer.rb
|
57
58
|
- lib/lrama/lexer/token.rb
|
58
|
-
- lib/lrama/lexer/token/
|
59
|
+
- lib/lrama/lexer/token/char.rb
|
60
|
+
- lib/lrama/lexer/token/ident.rb
|
61
|
+
- lib/lrama/lexer/token/parameterizing.rb
|
62
|
+
- lib/lrama/lexer/token/tag.rb
|
63
|
+
- lib/lrama/lexer/token/user_code.rb
|
59
64
|
- lib/lrama/option_parser.rb
|
60
65
|
- lib/lrama/options.rb
|
61
66
|
- lib/lrama/output.rb
|
@@ -84,7 +89,15 @@ files:
|
|
84
89
|
- sample/parse.y
|
85
90
|
- sig/lrama/bitmap.rbs
|
86
91
|
- sig/lrama/digraph.rbs
|
87
|
-
- sig/lrama/
|
92
|
+
- sig/lrama/grammar.rbs
|
93
|
+
- sig/lrama/grammar/percent_code.rbs
|
94
|
+
- sig/lrama/grammar/reference.rbs
|
95
|
+
- sig/lrama/lexer/token.rbs
|
96
|
+
- sig/lrama/lexer/token/char.rbs
|
97
|
+
- sig/lrama/lexer/token/ident.rbs
|
98
|
+
- sig/lrama/lexer/token/parameterizing.rbs
|
99
|
+
- sig/lrama/lexer/token/tag.rbs
|
100
|
+
- sig/lrama/lexer/token/user_code.rbs
|
88
101
|
- sig/lrama/report/duration.rbs
|
89
102
|
- sig/lrama/report/profile.rbs
|
90
103
|
- sig/lrama/warning.rbs
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module Lrama
|
2
|
-
class Lexer
|
3
|
-
class Token
|
4
|
-
attr_accessor type: Type
|
5
|
-
attr_accessor s_value: String
|
6
|
-
attr_accessor alias: String
|
7
|
-
|
8
|
-
def initialize: (?type: Type, ?s_value: String, ?alias: String) -> void
|
9
|
-
class Type
|
10
|
-
attr_accessor id: Integer
|
11
|
-
attr_accessor name: String
|
12
|
-
|
13
|
-
def initialize: (?id: Integer, ?name: String) -> void
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|