lrama 0.5.7 → 0.5.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yaml +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -3
- data/Rakefile +3 -8
- data/Steepfile +9 -1
- data/lib/lrama/command.rb +6 -1
- data/lib/lrama/grammar/percent_code.rb +12 -0
- data/lib/lrama/grammar/symbol.rb +2 -2
- data/lib/lrama/grammar.rb +127 -159
- data/lib/lrama/lexer/token/char.rb +8 -0
- data/lib/lrama/lexer/token/ident.rb +8 -0
- data/lib/lrama/lexer/token/parameterizing.rb +19 -0
- data/lib/lrama/lexer/token/tag.rb +8 -0
- data/lib/lrama/lexer/token/user_code.rb +14 -0
- data/lib/lrama/lexer/token.rb +9 -67
- data/lib/lrama/lexer.rb +14 -15
- data/lib/lrama/option_parser.rb +3 -3
- data/lib/lrama/options.rb +2 -1
- data/lib/lrama/output.rb +9 -0
- data/lib/lrama/parser.rb +540 -493
- data/lib/lrama/version.rb +1 -1
- data/parser.y +101 -71
- data/rbs_collection.lock.yaml +13 -1
- data/sample/calc.y +3 -1
- data/sample/parse.y +5 -1
- data/sig/lrama/grammar/percent_code.rbs +10 -0
- data/sig/lrama/grammar/reference.rbs +22 -0
- data/sig/lrama/grammar.rbs +5 -0
- data/sig/lrama/lexer/token/char.rbs +8 -0
- data/sig/lrama/lexer/token/ident.rbs +8 -0
- data/sig/lrama/lexer/token/parameterizing.rbs +8 -0
- data/sig/lrama/lexer/token/tag.rbs +8 -0
- data/sig/lrama/lexer/token/user_code.rbs +9 -0
- data/sig/lrama/lexer/token.rbs +17 -0
- data/template/bison/_yacc.h +2 -2
- data/template/bison/yacc.c +0 -2
- metadata +17 -4
- data/lib/lrama/lexer/token/type.rb +0 -8
- data/sig/lrama/lexer/token/type.rbs +0 -17
data/lib/lrama/version.rb
CHANGED
data/parser.y
CHANGED
@@ -1,5 +1,10 @@
|
|
1
1
|
class Lrama::Parser
|
2
|
+
expect 7
|
3
|
+
|
4
|
+
token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
|
5
|
+
|
2
6
|
rule
|
7
|
+
|
3
8
|
input: prologue_declarations bison_declarations "%%" grammar epilogue_opt
|
4
9
|
|
5
10
|
prologue_declarations: # empty
|
@@ -7,14 +12,12 @@ rule
|
|
7
12
|
|
8
13
|
prologue_declaration: "%{"
|
9
14
|
{
|
10
|
-
|
11
|
-
@lexer.end_symbol = '%}'
|
15
|
+
begin_c_declaration("%}")
|
12
16
|
@grammar.prologue_first_lineno = @lexer.line
|
13
17
|
}
|
14
18
|
C_DECLARATION
|
15
19
|
{
|
16
|
-
|
17
|
-
@lexer.end_symbol = nil
|
20
|
+
end_c_declaration
|
18
21
|
}
|
19
22
|
"%}"
|
20
23
|
{
|
@@ -44,15 +47,25 @@ rule
|
|
44
47
|
@grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
|
45
48
|
}
|
46
49
|
}
|
50
|
+
| "%code" IDENTIFIER "{"
|
51
|
+
{
|
52
|
+
begin_c_declaration("}")
|
53
|
+
}
|
54
|
+
C_DECLARATION
|
55
|
+
{
|
56
|
+
end_c_declaration
|
57
|
+
}
|
58
|
+
"}"
|
59
|
+
{
|
60
|
+
@grammar.add_percent_code(id: val[1], code: val[4])
|
61
|
+
}
|
47
62
|
| "%initial-action" "{"
|
48
63
|
{
|
49
|
-
|
50
|
-
@lexer.end_symbol = '}'
|
64
|
+
begin_c_declaration("}")
|
51
65
|
}
|
52
66
|
C_DECLARATION
|
53
67
|
{
|
54
|
-
|
55
|
-
@lexer.end_symbol = nil
|
68
|
+
end_c_declaration
|
56
69
|
}
|
57
70
|
"}"
|
58
71
|
{
|
@@ -62,13 +75,11 @@ rule
|
|
62
75
|
|
63
76
|
grammar_declaration: "%union" "{"
|
64
77
|
{
|
65
|
-
|
66
|
-
@lexer.end_symbol = '}'
|
78
|
+
begin_c_declaration("}")
|
67
79
|
}
|
68
80
|
C_DECLARATION
|
69
81
|
{
|
70
|
-
|
71
|
-
@lexer.end_symbol = nil
|
82
|
+
end_c_declaration
|
72
83
|
}
|
73
84
|
"}"
|
74
85
|
{
|
@@ -77,24 +88,20 @@ rule
|
|
77
88
|
| symbol_declaration
|
78
89
|
| "%destructor" "{"
|
79
90
|
{
|
80
|
-
|
81
|
-
@lexer.end_symbol = '}'
|
91
|
+
begin_c_declaration("}")
|
82
92
|
}
|
83
93
|
C_DECLARATION
|
84
94
|
{
|
85
|
-
|
86
|
-
@lexer.end_symbol = nil
|
95
|
+
end_c_declaration
|
87
96
|
}
|
88
|
-
|
97
|
+
"}" generic_symlist
|
89
98
|
| "%printer" "{"
|
90
99
|
{
|
91
|
-
|
92
|
-
@lexer.end_symbol = '}'
|
100
|
+
begin_c_declaration("}")
|
93
101
|
}
|
94
102
|
C_DECLARATION
|
95
103
|
{
|
96
|
-
|
97
|
-
@lexer.end_symbol = nil
|
104
|
+
end_c_declaration
|
98
105
|
}
|
99
106
|
"}" generic_symlist
|
100
107
|
{
|
@@ -102,13 +109,11 @@ rule
|
|
102
109
|
}
|
103
110
|
| "%error-token" "{"
|
104
111
|
{
|
105
|
-
|
106
|
-
@lexer.end_symbol = '}'
|
112
|
+
begin_c_declaration("}")
|
107
113
|
}
|
108
114
|
C_DECLARATION
|
109
115
|
{
|
110
|
-
|
111
|
-
@lexer.end_symbol = nil
|
116
|
+
end_c_declaration
|
112
117
|
}
|
113
118
|
"}" generic_symlist
|
114
119
|
{
|
@@ -216,13 +221,11 @@ rule
|
|
216
221
|
|
217
222
|
params: params "{"
|
218
223
|
{
|
219
|
-
|
220
|
-
@lexer.end_symbol = '}'
|
224
|
+
begin_c_declaration("}")
|
221
225
|
}
|
222
226
|
C_DECLARATION
|
223
227
|
{
|
224
|
-
|
225
|
-
@lexer.end_symbol = nil
|
228
|
+
end_c_declaration
|
226
229
|
}
|
227
230
|
"}"
|
228
231
|
{
|
@@ -230,13 +233,11 @@ rule
|
|
230
233
|
}
|
231
234
|
| "{"
|
232
235
|
{
|
233
|
-
|
234
|
-
@lexer.end_symbol = '}'
|
236
|
+
begin_c_declaration("}")
|
235
237
|
}
|
236
238
|
C_DECLARATION
|
237
239
|
{
|
238
|
-
|
239
|
-
@lexer.end_symbol = nil
|
240
|
+
end_c_declaration
|
240
241
|
}
|
241
242
|
"}"
|
242
243
|
{
|
@@ -273,7 +274,7 @@ rule
|
|
273
274
|
rules: id_colon named_ref_opt ":" rhs_list
|
274
275
|
{
|
275
276
|
lhs = val[0]
|
276
|
-
lhs.
|
277
|
+
lhs.alias_name = val[1]
|
277
278
|
val[3].each {|hash|
|
278
279
|
@grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
|
279
280
|
}
|
@@ -291,56 +292,53 @@ rule
|
|
291
292
|
|
292
293
|
rhs: /* empty */
|
293
294
|
{
|
295
|
+
reset_precs
|
296
|
+
result = []
|
297
|
+
}
|
298
|
+
| "%empty"
|
299
|
+
{
|
300
|
+
reset_precs
|
294
301
|
result = []
|
295
|
-
@prec_seen = false
|
296
|
-
@code_after_prec = false
|
297
302
|
}
|
298
303
|
| rhs symbol named_ref_opt
|
299
304
|
{
|
300
305
|
token = val[1]
|
301
|
-
|
306
|
+
token.alias_name = val[2]
|
302
307
|
result = val[0].append(token)
|
303
308
|
}
|
309
|
+
| rhs "?"
|
310
|
+
{
|
311
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
312
|
+
result = val[0].append(token)
|
313
|
+
}
|
314
|
+
| rhs "+"
|
315
|
+
{
|
316
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
317
|
+
result = val[0].append(token)
|
318
|
+
}
|
319
|
+
| rhs "*"
|
320
|
+
{
|
321
|
+
token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
|
322
|
+
result = val[0].append(token)
|
323
|
+
}
|
304
324
|
| rhs "{"
|
305
325
|
{
|
306
326
|
if @prec_seen
|
307
327
|
raise "Multiple User_code after %prec" if @code_after_prec
|
308
328
|
@code_after_prec = true
|
309
329
|
end
|
310
|
-
|
311
|
-
@lexer.end_symbol = '}'
|
330
|
+
begin_c_declaration("}")
|
312
331
|
}
|
313
332
|
C_DECLARATION
|
314
333
|
{
|
315
|
-
|
316
|
-
@lexer.end_symbol = nil
|
334
|
+
end_c_declaration
|
317
335
|
}
|
318
336
|
"}" named_ref_opt
|
319
337
|
{
|
320
338
|
token = val[3]
|
321
|
-
token.
|
339
|
+
token.alias_name = val[6]
|
322
340
|
result = val[0].append(token)
|
323
341
|
}
|
324
|
-
| "{"
|
325
|
-
{
|
326
|
-
if @prec_seen
|
327
|
-
raise "Multiple User_code after %prec" if @code_after_prec
|
328
|
-
@code_after_prec = true
|
329
|
-
end
|
330
|
-
@lexer.status = :c_declaration
|
331
|
-
@lexer.end_symbol = '}'
|
332
|
-
}
|
333
|
-
C_DECLARATION
|
334
|
-
{
|
335
|
-
@lexer.status = :initial
|
336
|
-
@lexer.end_symbol = nil
|
337
|
-
}
|
338
|
-
"}" named_ref_opt
|
339
|
-
{
|
340
|
-
token = val[2]
|
341
|
-
token.alias = val[5]
|
342
|
-
result = [token]
|
343
|
-
}
|
344
342
|
| rhs "%prec" symbol
|
345
343
|
{
|
346
344
|
sym = @grammar.find_symbol_by_id!(val[2])
|
@@ -356,14 +354,12 @@ rule
|
|
356
354
|
epilogue_opt: # empty
|
357
355
|
| "%%"
|
358
356
|
{
|
359
|
-
|
360
|
-
@lexer.end_symbol = '\Z'
|
357
|
+
begin_c_declaration('\Z')
|
361
358
|
@grammar.epilogue_first_lineno = @lexer.line + 1
|
362
359
|
}
|
363
360
|
C_DECLARATION
|
364
361
|
{
|
365
|
-
|
366
|
-
@lexer.end_symbol = nil
|
362
|
+
end_c_declaration
|
367
363
|
@grammar.epilogue = val[2].s_value
|
368
364
|
}
|
369
365
|
|
@@ -380,15 +376,17 @@ rule
|
|
380
376
|
generic_symlist_item: symbol
|
381
377
|
| TAG
|
382
378
|
|
383
|
-
string_as_id: STRING { result = Lrama::Lexer::Token.new(
|
379
|
+
string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) }
|
384
380
|
end
|
385
381
|
|
386
382
|
---- inner
|
387
383
|
|
388
384
|
include Lrama::Report::Duration
|
389
385
|
|
390
|
-
def initialize(text)
|
386
|
+
def initialize(text, path, debug = false)
|
391
387
|
@text = text
|
388
|
+
@path = path
|
389
|
+
@yydebug = debug
|
392
390
|
end
|
393
391
|
|
394
392
|
def parse
|
@@ -396,8 +394,8 @@ def parse
|
|
396
394
|
@lexer = Lrama::Lexer.new(@text)
|
397
395
|
@grammar = Lrama::Grammar.new
|
398
396
|
@precedence_number = 0
|
397
|
+
reset_precs
|
399
398
|
do_parse
|
400
|
-
@grammar.extract_references
|
401
399
|
@grammar.prepare
|
402
400
|
@grammar.compute_nullable
|
403
401
|
@grammar.compute_first_set
|
@@ -411,6 +409,38 @@ def next_token
|
|
411
409
|
end
|
412
410
|
|
413
411
|
def on_error(error_token_id, error_value, value_stack)
|
414
|
-
|
415
|
-
|
412
|
+
if error_value.respond_to?(:line) && error_value.respond_to?(:column)
|
413
|
+
line = error_value.line
|
414
|
+
first_column = error_value.column
|
415
|
+
else
|
416
|
+
line = @lexer.line
|
417
|
+
first_column = @lexer.head_column
|
418
|
+
end
|
419
|
+
|
420
|
+
raise ParseError, <<~ERROR
|
421
|
+
#{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
|
422
|
+
#{@text.split("\n")[line - 1]}
|
423
|
+
#{carrets(first_column)}
|
424
|
+
ERROR
|
425
|
+
end
|
426
|
+
|
427
|
+
private
|
428
|
+
|
429
|
+
def reset_precs
|
430
|
+
@prec_seen = false
|
431
|
+
@code_after_prec = false
|
432
|
+
end
|
433
|
+
|
434
|
+
def begin_c_declaration(end_symbol)
|
435
|
+
@lexer.status = :c_declaration
|
436
|
+
@lexer.end_symbol = end_symbol
|
437
|
+
end
|
438
|
+
|
439
|
+
def end_c_declaration
|
440
|
+
@lexer.status = :initial
|
441
|
+
@lexer.end_symbol = nil
|
442
|
+
end
|
443
|
+
|
444
|
+
def carrets(first_column)
|
445
|
+
' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
|
416
446
|
end
|
data/rbs_collection.lock.yaml
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
sources:
|
3
3
|
- type: git
|
4
4
|
name: ruby/gem_rbs_collection
|
5
|
-
revision:
|
5
|
+
revision: 2de2d4535caba275f3b8533684aab110d921f553
|
6
6
|
remote: https://github.com/ruby/gem_rbs_collection.git
|
7
7
|
repo_dir: gems
|
8
8
|
path: ".gem_rbs_collection"
|
@@ -11,6 +11,18 @@ gems:
|
|
11
11
|
version: '0'
|
12
12
|
source:
|
13
13
|
type: stdlib
|
14
|
+
- name: fileutils
|
15
|
+
version: '0'
|
16
|
+
source:
|
17
|
+
type: stdlib
|
18
|
+
- name: rake
|
19
|
+
version: '13.0'
|
20
|
+
source:
|
21
|
+
type: git
|
22
|
+
name: ruby/gem_rbs_collection
|
23
|
+
revision: 2de2d4535caba275f3b8533684aab110d921f553
|
24
|
+
remote: https://github.com/ruby/gem_rbs_collection.git
|
25
|
+
repo_dir: gems
|
14
26
|
- name: stackprof
|
15
27
|
version: '0.2'
|
16
28
|
source:
|
data/sample/calc.y
CHANGED
data/sample/parse.y
CHANGED
@@ -0,0 +1,22 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Grammar
|
3
|
+
class Reference
|
4
|
+
# TODO: Replace untyped referring_symbol with (Grammar::Symbol|Lexer::Token)
|
5
|
+
attr_accessor type: Symbol
|
6
|
+
attr_accessor value: (String|Integer)
|
7
|
+
attr_accessor ex_tag: Lexer::Token?
|
8
|
+
attr_accessor first_column: Integer
|
9
|
+
attr_accessor last_column: Integer
|
10
|
+
attr_accessor referring_symbol: untyped
|
11
|
+
attr_accessor position_in_rhs: Integer?
|
12
|
+
|
13
|
+
def initialize: (
|
14
|
+
type: Symbol, value: (String|Integer), ex_tag: Lexer::Token?,
|
15
|
+
first_column: Integer, last_column: Integer,
|
16
|
+
referring_symbol: untyped, position_in_rhs: Integer?
|
17
|
+
) -> void
|
18
|
+
|
19
|
+
def tag: () -> untyped
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Lrama
|
2
|
+
class Lexer
|
3
|
+
class Token
|
4
|
+
attr_accessor s_value: String
|
5
|
+
attr_accessor alias_name: String
|
6
|
+
attr_accessor line: Integer
|
7
|
+
attr_accessor column: Integer
|
8
|
+
attr_accessor referred: bool
|
9
|
+
|
10
|
+
def initialize: (?s_value: String, ?alias_name: String) -> void
|
11
|
+
|
12
|
+
def to_s: () -> String
|
13
|
+
def referred_by?: (String string) -> bool
|
14
|
+
def ==: (Token other) -> bool
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/template/bison/_yacc.h
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
#if YYDEBUG && !defined(yydebug)
|
13
13
|
extern int yydebug;
|
14
14
|
#endif
|
15
|
-
|
15
|
+
<%= output.percent_code("requires") %>
|
16
16
|
|
17
17
|
<%-# b4_token_enums_defines -%>
|
18
18
|
/* Token kinds. */
|
@@ -64,7 +64,7 @@ struct YYLTYPE
|
|
64
64
|
int yyparse (<%= output.parse_param %>);
|
65
65
|
|
66
66
|
|
67
|
-
|
67
|
+
<%= output.percent_code("provides") %>
|
68
68
|
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
|
69
69
|
<%- if output.spec_mapped_header_file -%>
|
70
70
|
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
|
data/template/bison/yacc.c
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lrama
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yuichiro Kaneko
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: LALR (1) parser generator written by Ruby
|
14
14
|
email:
|
@@ -47,6 +47,7 @@ files:
|
|
47
47
|
- lib/lrama/grammar/auxiliary.rb
|
48
48
|
- lib/lrama/grammar/code.rb
|
49
49
|
- lib/lrama/grammar/error_token.rb
|
50
|
+
- lib/lrama/grammar/percent_code.rb
|
50
51
|
- lib/lrama/grammar/precedence.rb
|
51
52
|
- lib/lrama/grammar/printer.rb
|
52
53
|
- lib/lrama/grammar/reference.rb
|
@@ -55,7 +56,11 @@ files:
|
|
55
56
|
- lib/lrama/grammar/union.rb
|
56
57
|
- lib/lrama/lexer.rb
|
57
58
|
- lib/lrama/lexer/token.rb
|
58
|
-
- lib/lrama/lexer/token/
|
59
|
+
- lib/lrama/lexer/token/char.rb
|
60
|
+
- lib/lrama/lexer/token/ident.rb
|
61
|
+
- lib/lrama/lexer/token/parameterizing.rb
|
62
|
+
- lib/lrama/lexer/token/tag.rb
|
63
|
+
- lib/lrama/lexer/token/user_code.rb
|
59
64
|
- lib/lrama/option_parser.rb
|
60
65
|
- lib/lrama/options.rb
|
61
66
|
- lib/lrama/output.rb
|
@@ -84,7 +89,15 @@ files:
|
|
84
89
|
- sample/parse.y
|
85
90
|
- sig/lrama/bitmap.rbs
|
86
91
|
- sig/lrama/digraph.rbs
|
87
|
-
- sig/lrama/
|
92
|
+
- sig/lrama/grammar.rbs
|
93
|
+
- sig/lrama/grammar/percent_code.rbs
|
94
|
+
- sig/lrama/grammar/reference.rbs
|
95
|
+
- sig/lrama/lexer/token.rbs
|
96
|
+
- sig/lrama/lexer/token/char.rbs
|
97
|
+
- sig/lrama/lexer/token/ident.rbs
|
98
|
+
- sig/lrama/lexer/token/parameterizing.rbs
|
99
|
+
- sig/lrama/lexer/token/tag.rbs
|
100
|
+
- sig/lrama/lexer/token/user_code.rbs
|
88
101
|
- sig/lrama/report/duration.rbs
|
89
102
|
- sig/lrama/report/profile.rbs
|
90
103
|
- sig/lrama/warning.rbs
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module Lrama
|
2
|
-
class Lexer
|
3
|
-
class Token
|
4
|
-
attr_accessor type: Type
|
5
|
-
attr_accessor s_value: String
|
6
|
-
attr_accessor alias: String
|
7
|
-
|
8
|
-
def initialize: (?type: Type, ?s_value: String, ?alias: String) -> void
|
9
|
-
class Type
|
10
|
-
attr_accessor id: Integer
|
11
|
-
attr_accessor name: String
|
12
|
-
|
13
|
-
def initialize: (?id: Integer, ?name: String) -> void
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|