lrama 0.5.8 → 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lrama/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lrama
2
- VERSION = "0.5.8".freeze
2
+ VERSION = "0.5.9".freeze
3
3
  end
data/parser.y CHANGED
@@ -1,6 +1,10 @@
1
1
  class Lrama::Parser
2
+ expect 7
3
+
2
4
  token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
5
+
3
6
  rule
7
+
4
8
  input: prologue_declarations bison_declarations "%%" grammar epilogue_opt
5
9
 
6
10
  prologue_declarations: # empty
@@ -8,14 +12,12 @@ rule
8
12
 
9
13
  prologue_declaration: "%{"
10
14
  {
11
- @lexer.status = :c_declaration
12
- @lexer.end_symbol = '%}'
15
+ begin_c_declaration("%}")
13
16
  @grammar.prologue_first_lineno = @lexer.line
14
17
  }
15
18
  C_DECLARATION
16
19
  {
17
- @lexer.status = :initial
18
- @lexer.end_symbol = nil
20
+ end_c_declaration
19
21
  }
20
22
  "%}"
21
23
  {
@@ -45,15 +47,25 @@ rule
45
47
  @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
46
48
  }
47
49
  }
50
+ | "%code" IDENTIFIER "{"
51
+ {
52
+ begin_c_declaration("}")
53
+ }
54
+ C_DECLARATION
55
+ {
56
+ end_c_declaration
57
+ }
58
+ "}"
59
+ {
60
+ @grammar.add_percent_code(id: val[1], code: val[4])
61
+ }
48
62
  | "%initial-action" "{"
49
63
  {
50
- @lexer.status = :c_declaration
51
- @lexer.end_symbol = '}'
64
+ begin_c_declaration("}")
52
65
  }
53
66
  C_DECLARATION
54
67
  {
55
- @lexer.status = :initial
56
- @lexer.end_symbol = nil
68
+ end_c_declaration
57
69
  }
58
70
  "}"
59
71
  {
@@ -63,13 +75,11 @@ rule
63
75
 
64
76
  grammar_declaration: "%union" "{"
65
77
  {
66
- @lexer.status = :c_declaration
67
- @lexer.end_symbol = '}'
78
+ begin_c_declaration("}")
68
79
  }
69
80
  C_DECLARATION
70
81
  {
71
- @lexer.status = :initial
72
- @lexer.end_symbol = nil
82
+ end_c_declaration
73
83
  }
74
84
  "}"
75
85
  {
@@ -78,24 +88,20 @@ rule
78
88
  | symbol_declaration
79
89
  | "%destructor" "{"
80
90
  {
81
- @lexer.status = :c_declaration
82
- @lexer.end_symbol = '}'
91
+ begin_c_declaration("}")
83
92
  }
84
93
  C_DECLARATION
85
94
  {
86
- @lexer.status = :initial
87
- @lexer.end_symbol = nil
95
+ end_c_declaration
88
96
  }
89
- "}" generic_symlist
97
+ "}" generic_symlist
90
98
  | "%printer" "{"
91
99
  {
92
- @lexer.status = :c_declaration
93
- @lexer.end_symbol = '}'
100
+ begin_c_declaration("}")
94
101
  }
95
102
  C_DECLARATION
96
103
  {
97
- @lexer.status = :initial
98
- @lexer.end_symbol = nil
104
+ end_c_declaration
99
105
  }
100
106
  "}" generic_symlist
101
107
  {
@@ -103,13 +109,11 @@ rule
103
109
  }
104
110
  | "%error-token" "{"
105
111
  {
106
- @lexer.status = :c_declaration
107
- @lexer.end_symbol = '}'
112
+ begin_c_declaration("}")
108
113
  }
109
114
  C_DECLARATION
110
115
  {
111
- @lexer.status = :initial
112
- @lexer.end_symbol = nil
116
+ end_c_declaration
113
117
  }
114
118
  "}" generic_symlist
115
119
  {
@@ -217,13 +221,11 @@ rule
217
221
 
218
222
  params: params "{"
219
223
  {
220
- @lexer.status = :c_declaration
221
- @lexer.end_symbol = '}'
224
+ begin_c_declaration("}")
222
225
  }
223
226
  C_DECLARATION
224
227
  {
225
- @lexer.status = :initial
226
- @lexer.end_symbol = nil
228
+ end_c_declaration
227
229
  }
228
230
  "}"
229
231
  {
@@ -231,13 +233,11 @@ rule
231
233
  }
232
234
  | "{"
233
235
  {
234
- @lexer.status = :c_declaration
235
- @lexer.end_symbol = '}'
236
+ begin_c_declaration("}")
236
237
  }
237
238
  C_DECLARATION
238
239
  {
239
- @lexer.status = :initial
240
- @lexer.end_symbol = nil
240
+ end_c_declaration
241
241
  }
242
242
  "}"
243
243
  {
@@ -274,7 +274,7 @@ rule
274
274
  rules: id_colon named_ref_opt ":" rhs_list
275
275
  {
276
276
  lhs = val[0]
277
- lhs.alias = val[1]
277
+ lhs.alias_name = val[1]
278
278
  val[3].each {|hash|
279
279
  @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
280
280
  }
@@ -292,56 +292,53 @@ rule
292
292
 
293
293
  rhs: /* empty */
294
294
  {
295
+ reset_precs
296
+ result = []
297
+ }
298
+ | "%empty"
299
+ {
300
+ reset_precs
295
301
  result = []
296
- @prec_seen = false
297
- @code_after_prec = false
298
302
  }
299
303
  | rhs symbol named_ref_opt
300
304
  {
301
305
  token = val[1]
302
- token.alias = val[2]
306
+ token.alias_name = val[2]
303
307
  result = val[0].append(token)
304
308
  }
309
+ | rhs "?"
310
+ {
311
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
312
+ result = val[0].append(token)
313
+ }
314
+ | rhs "+"
315
+ {
316
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
317
+ result = val[0].append(token)
318
+ }
319
+ | rhs "*"
320
+ {
321
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
322
+ result = val[0].append(token)
323
+ }
305
324
  | rhs "{"
306
325
  {
307
326
  if @prec_seen
308
327
  raise "Multiple User_code after %prec" if @code_after_prec
309
328
  @code_after_prec = true
310
329
  end
311
- @lexer.status = :c_declaration
312
- @lexer.end_symbol = '}'
330
+ begin_c_declaration("}")
313
331
  }
314
332
  C_DECLARATION
315
333
  {
316
- @lexer.status = :initial
317
- @lexer.end_symbol = nil
334
+ end_c_declaration
318
335
  }
319
336
  "}" named_ref_opt
320
337
  {
321
338
  token = val[3]
322
- token.alias = val[6]
339
+ token.alias_name = val[6]
323
340
  result = val[0].append(token)
324
341
  }
325
- | "{"
326
- {
327
- if @prec_seen
328
- raise "Multiple User_code after %prec" if @code_after_prec
329
- @code_after_prec = true
330
- end
331
- @lexer.status = :c_declaration
332
- @lexer.end_symbol = '}'
333
- }
334
- C_DECLARATION
335
- {
336
- @lexer.status = :initial
337
- @lexer.end_symbol = nil
338
- }
339
- "}" named_ref_opt
340
- {
341
- token = val[2]
342
- token.alias = val[5]
343
- result = [token]
344
- }
345
342
  | rhs "%prec" symbol
346
343
  {
347
344
  sym = @grammar.find_symbol_by_id!(val[2])
@@ -357,14 +354,12 @@ rule
357
354
  epilogue_opt: # empty
358
355
  | "%%"
359
356
  {
360
- @lexer.status = :c_declaration
361
- @lexer.end_symbol = '\Z'
357
+ begin_c_declaration('\Z')
362
358
  @grammar.epilogue_first_lineno = @lexer.line + 1
363
359
  }
364
360
  C_DECLARATION
365
361
  {
366
- @lexer.status = :initial
367
- @lexer.end_symbol = nil
362
+ end_c_declaration
368
363
  @grammar.epilogue = val[2].s_value
369
364
  }
370
365
 
@@ -381,16 +376,17 @@ rule
381
376
  generic_symlist_item: symbol
382
377
  | TAG
383
378
 
384
- string_as_id: STRING { result = Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Ident, s_value: val[0]) }
379
+ string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) }
385
380
  end
386
381
 
387
382
  ---- inner
388
383
 
389
384
  include Lrama::Report::Duration
390
385
 
391
- def initialize(text, path)
386
+ def initialize(text, path, debug = false)
392
387
  @text = text
393
388
  @path = path
389
+ @yydebug = debug
394
390
  end
395
391
 
396
392
  def parse
@@ -398,8 +394,8 @@ def parse
398
394
  @lexer = Lrama::Lexer.new(@text)
399
395
  @grammar = Lrama::Grammar.new
400
396
  @precedence_number = 0
397
+ reset_precs
401
398
  do_parse
402
- @grammar.extract_references
403
399
  @grammar.prepare
404
400
  @grammar.compute_nullable
405
401
  @grammar.compute_first_set
@@ -413,10 +409,38 @@ def next_token
413
409
  end
414
410
 
415
411
  def on_error(error_token_id, error_value, value_stack)
416
- source = @text.split("\n")[error_value.line - 1]
412
+ if error_value.respond_to?(:line) && error_value.respond_to?(:column)
413
+ line = error_value.line
414
+ first_column = error_value.column
415
+ else
416
+ line = @lexer.line
417
+ first_column = @lexer.head_column
418
+ end
419
+
417
420
  raise ParseError, <<~ERROR
418
- #{@path}:#{@lexer.line}:#{@lexer.column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
419
- #{source}
420
- #{' ' * @lexer.column}^
421
+ #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
422
+ #{@text.split("\n")[line - 1]}
423
+ #{carrets(first_column)}
421
424
  ERROR
422
425
  end
426
+
427
+ private
428
+
429
+ def reset_precs
430
+ @prec_seen = false
431
+ @code_after_prec = false
432
+ end
433
+
434
+ def begin_c_declaration(end_symbol)
435
+ @lexer.status = :c_declaration
436
+ @lexer.end_symbol = end_symbol
437
+ end
438
+
439
+ def end_c_declaration
440
+ @lexer.status = :initial
441
+ @lexer.end_symbol = nil
442
+ end
443
+
444
+ def carrets(first_column)
445
+ ' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
446
+ end
@@ -2,7 +2,7 @@
2
2
  sources:
3
3
  - type: git
4
4
  name: ruby/gem_rbs_collection
5
- revision: 95ad664324500c9eec78569b45da98c65a27a511
5
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
6
6
  remote: https://github.com/ruby/gem_rbs_collection.git
7
7
  repo_dir: gems
8
8
  path: ".gem_rbs_collection"
@@ -11,6 +11,18 @@ gems:
11
11
  version: '0'
12
12
  source:
13
13
  type: stdlib
14
+ - name: fileutils
15
+ version: '0'
16
+ source:
17
+ type: stdlib
18
+ - name: rake
19
+ version: '13.0'
20
+ source:
21
+ type: git
22
+ name: ruby/gem_rbs_collection
23
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
24
+ remote: https://github.com/ruby/gem_rbs_collection.git
25
+ repo_dir: gems
14
26
  - name: stackprof
15
27
  version: '0.2'
16
28
  source:
data/sample/calc.y CHANGED
@@ -15,10 +15,12 @@
15
15
  #include <stdio.h>
16
16
  #include <stdlib.h>
17
17
  #include <ctype.h>
18
+ %}
18
19
 
20
+ %code provides {
19
21
  static int yylex(YYSTYPE *val, YYLTYPE *loc);
20
22
  static int yyerror(YYLTYPE *loc, const char *str);
21
- %}
23
+ }
22
24
 
23
25
  %union {
24
26
  int val;
data/sample/parse.y CHANGED
@@ -4,10 +4,14 @@
4
4
 
5
5
  %{
6
6
  // Prologue
7
+ %}
8
+
9
+ %code provides {
10
+
7
11
  static enum yytokentype yylex(YYSTYPE *lval, YYLTYPE *yylloc);
8
12
  static void yyerror(YYLTYPE *yylloc, const char *msg);
9
13
 
10
- %}
14
+ }
11
15
 
12
16
  %expect 0
13
17
  %define api.pure
@@ -0,0 +1,10 @@
1
+ module Lrama
2
+ class Grammar
3
+ class PercentCode
4
+ attr_reader id: Lexer::Token::Ident
5
+ attr_reader code: Lexer::Token::UserCode
6
+
7
+ def initialize: (Lexer::Token::Ident id, Lexer::Token::UserCode code) -> void
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,22 @@
1
+ module Lrama
2
+ class Grammar
3
+ class Reference
4
+ # TODO: Replace untyped referring_symbol with (Grammar::Symbol|Lexer::Token)
5
+ attr_accessor type: Symbol
6
+ attr_accessor value: (String|Integer)
7
+ attr_accessor ex_tag: Lexer::Token?
8
+ attr_accessor first_column: Integer
9
+ attr_accessor last_column: Integer
10
+ attr_accessor referring_symbol: untyped
11
+ attr_accessor position_in_rhs: Integer?
12
+
13
+ def initialize: (
14
+ type: Symbol, value: (String|Integer), ex_tag: Lexer::Token?,
15
+ first_column: Integer, last_column: Integer,
16
+ referring_symbol: untyped, position_in_rhs: Integer?
17
+ ) -> void
18
+
19
+ def tag: () -> untyped
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module Lrama
2
+ class Grammar
3
+ def numberize_references: (Lexer::Token lhs, Array[Lexer::Token] rhs, Array[Reference]) -> void
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Char < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Ident < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Parameterizing < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Tag < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class UserCode < Token
5
+ attr_accessor references: Array[[Symbol, (String|Integer), Token?, Integer, Integer]]
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ attr_accessor s_value: String
5
+ attr_accessor alias_name: String
6
+ attr_accessor line: Integer
7
+ attr_accessor column: Integer
8
+ attr_accessor referred: bool
9
+
10
+ def initialize: (?s_value: String, ?alias_name: String) -> void
11
+
12
+ def to_s: () -> String
13
+ def referred_by?: (String string) -> bool
14
+ def ==: (Token other) -> bool
15
+ end
16
+ end
17
+ end
@@ -12,7 +12,7 @@
12
12
  #if YYDEBUG && !defined(yydebug)
13
13
  extern int yydebug;
14
14
  #endif
15
- <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
15
+ <%= output.percent_code("requires") %>
16
16
 
17
17
  <%-# b4_token_enums_defines -%>
18
18
  /* Token kinds. */
@@ -64,7 +64,7 @@ struct YYLTYPE
64
64
  int yyparse (<%= output.parse_param %>);
65
65
 
66
66
 
67
- <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
67
+ <%= output.percent_code("provides") %>
68
68
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
69
69
  <%- if output.spec_mapped_header_file -%>
70
70
  #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
@@ -68,8 +68,6 @@
68
68
  #define YYPULL 1
69
69
 
70
70
 
71
-
72
-
73
71
  <%# b4_user_pre_prologue -%>
74
72
  /* First part of user prologue. */
75
73
  #line <%= output.aux.prologue_first_lineno %> "<%= output.grammar_file_path %>"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lrama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.8
4
+ version: 0.5.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuichiro Kaneko
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-26 00:00:00.000000000 Z
11
+ date: 2023-11-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: LALR (1) parser generator written by Ruby
14
14
  email:
@@ -47,6 +47,7 @@ files:
47
47
  - lib/lrama/grammar/auxiliary.rb
48
48
  - lib/lrama/grammar/code.rb
49
49
  - lib/lrama/grammar/error_token.rb
50
+ - lib/lrama/grammar/percent_code.rb
50
51
  - lib/lrama/grammar/precedence.rb
51
52
  - lib/lrama/grammar/printer.rb
52
53
  - lib/lrama/grammar/reference.rb
@@ -55,7 +56,11 @@ files:
55
56
  - lib/lrama/grammar/union.rb
56
57
  - lib/lrama/lexer.rb
57
58
  - lib/lrama/lexer/token.rb
58
- - lib/lrama/lexer/token/type.rb
59
+ - lib/lrama/lexer/token/char.rb
60
+ - lib/lrama/lexer/token/ident.rb
61
+ - lib/lrama/lexer/token/parameterizing.rb
62
+ - lib/lrama/lexer/token/tag.rb
63
+ - lib/lrama/lexer/token/user_code.rb
59
64
  - lib/lrama/option_parser.rb
60
65
  - lib/lrama/options.rb
61
66
  - lib/lrama/output.rb
@@ -84,7 +89,15 @@ files:
84
89
  - sample/parse.y
85
90
  - sig/lrama/bitmap.rbs
86
91
  - sig/lrama/digraph.rbs
87
- - sig/lrama/lexer/token/type.rbs
92
+ - sig/lrama/grammar.rbs
93
+ - sig/lrama/grammar/percent_code.rbs
94
+ - sig/lrama/grammar/reference.rbs
95
+ - sig/lrama/lexer/token.rbs
96
+ - sig/lrama/lexer/token/char.rbs
97
+ - sig/lrama/lexer/token/ident.rbs
98
+ - sig/lrama/lexer/token/parameterizing.rbs
99
+ - sig/lrama/lexer/token/tag.rbs
100
+ - sig/lrama/lexer/token/user_code.rbs
88
101
  - sig/lrama/report/duration.rbs
89
102
  - sig/lrama/report/profile.rbs
90
103
  - sig/lrama/warning.rbs
@@ -1,8 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
4
- class Type < Struct.new(:id, :name, keyword_init: true)
5
- end
6
- end
7
- end
8
- end
@@ -1,17 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token
4
- attr_accessor type: Type
5
- attr_accessor s_value: String
6
- attr_accessor alias: String
7
-
8
- def initialize: (?type: Type, ?s_value: String, ?alias: String) -> void
9
- class Type
10
- attr_accessor id: Integer
11
- attr_accessor name: String
12
-
13
- def initialize: (?id: Integer, ?name: String) -> void
14
- end
15
- end
16
- end
17
- end