lrama 0.5.8 → 0.5.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/lrama/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lrama
2
- VERSION = "0.5.8".freeze
2
+ VERSION = "0.5.9".freeze
3
3
  end
data/parser.y CHANGED
@@ -1,6 +1,10 @@
1
1
  class Lrama::Parser
2
+ expect 7
3
+
2
4
  token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
5
+
3
6
  rule
7
+
4
8
  input: prologue_declarations bison_declarations "%%" grammar epilogue_opt
5
9
 
6
10
  prologue_declarations: # empty
@@ -8,14 +12,12 @@ rule
8
12
 
9
13
  prologue_declaration: "%{"
10
14
  {
11
- @lexer.status = :c_declaration
12
- @lexer.end_symbol = '%}'
15
+ begin_c_declaration("%}")
13
16
  @grammar.prologue_first_lineno = @lexer.line
14
17
  }
15
18
  C_DECLARATION
16
19
  {
17
- @lexer.status = :initial
18
- @lexer.end_symbol = nil
20
+ end_c_declaration
19
21
  }
20
22
  "%}"
21
23
  {
@@ -45,15 +47,25 @@ rule
45
47
  @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
46
48
  }
47
49
  }
50
+ | "%code" IDENTIFIER "{"
51
+ {
52
+ begin_c_declaration("}")
53
+ }
54
+ C_DECLARATION
55
+ {
56
+ end_c_declaration
57
+ }
58
+ "}"
59
+ {
60
+ @grammar.add_percent_code(id: val[1], code: val[4])
61
+ }
48
62
  | "%initial-action" "{"
49
63
  {
50
- @lexer.status = :c_declaration
51
- @lexer.end_symbol = '}'
64
+ begin_c_declaration("}")
52
65
  }
53
66
  C_DECLARATION
54
67
  {
55
- @lexer.status = :initial
56
- @lexer.end_symbol = nil
68
+ end_c_declaration
57
69
  }
58
70
  "}"
59
71
  {
@@ -63,13 +75,11 @@ rule
63
75
 
64
76
  grammar_declaration: "%union" "{"
65
77
  {
66
- @lexer.status = :c_declaration
67
- @lexer.end_symbol = '}'
78
+ begin_c_declaration("}")
68
79
  }
69
80
  C_DECLARATION
70
81
  {
71
- @lexer.status = :initial
72
- @lexer.end_symbol = nil
82
+ end_c_declaration
73
83
  }
74
84
  "}"
75
85
  {
@@ -78,24 +88,20 @@ rule
78
88
  | symbol_declaration
79
89
  | "%destructor" "{"
80
90
  {
81
- @lexer.status = :c_declaration
82
- @lexer.end_symbol = '}'
91
+ begin_c_declaration("}")
83
92
  }
84
93
  C_DECLARATION
85
94
  {
86
- @lexer.status = :initial
87
- @lexer.end_symbol = nil
95
+ end_c_declaration
88
96
  }
89
- "}" generic_symlist
97
+ "}" generic_symlist
90
98
  | "%printer" "{"
91
99
  {
92
- @lexer.status = :c_declaration
93
- @lexer.end_symbol = '}'
100
+ begin_c_declaration("}")
94
101
  }
95
102
  C_DECLARATION
96
103
  {
97
- @lexer.status = :initial
98
- @lexer.end_symbol = nil
104
+ end_c_declaration
99
105
  }
100
106
  "}" generic_symlist
101
107
  {
@@ -103,13 +109,11 @@ rule
103
109
  }
104
110
  | "%error-token" "{"
105
111
  {
106
- @lexer.status = :c_declaration
107
- @lexer.end_symbol = '}'
112
+ begin_c_declaration("}")
108
113
  }
109
114
  C_DECLARATION
110
115
  {
111
- @lexer.status = :initial
112
- @lexer.end_symbol = nil
116
+ end_c_declaration
113
117
  }
114
118
  "}" generic_symlist
115
119
  {
@@ -217,13 +221,11 @@ rule
217
221
 
218
222
  params: params "{"
219
223
  {
220
- @lexer.status = :c_declaration
221
- @lexer.end_symbol = '}'
224
+ begin_c_declaration("}")
222
225
  }
223
226
  C_DECLARATION
224
227
  {
225
- @lexer.status = :initial
226
- @lexer.end_symbol = nil
228
+ end_c_declaration
227
229
  }
228
230
  "}"
229
231
  {
@@ -231,13 +233,11 @@ rule
231
233
  }
232
234
  | "{"
233
235
  {
234
- @lexer.status = :c_declaration
235
- @lexer.end_symbol = '}'
236
+ begin_c_declaration("}")
236
237
  }
237
238
  C_DECLARATION
238
239
  {
239
- @lexer.status = :initial
240
- @lexer.end_symbol = nil
240
+ end_c_declaration
241
241
  }
242
242
  "}"
243
243
  {
@@ -274,7 +274,7 @@ rule
274
274
  rules: id_colon named_ref_opt ":" rhs_list
275
275
  {
276
276
  lhs = val[0]
277
- lhs.alias = val[1]
277
+ lhs.alias_name = val[1]
278
278
  val[3].each {|hash|
279
279
  @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
280
280
  }
@@ -292,56 +292,53 @@ rule
292
292
 
293
293
  rhs: /* empty */
294
294
  {
295
+ reset_precs
296
+ result = []
297
+ }
298
+ | "%empty"
299
+ {
300
+ reset_precs
295
301
  result = []
296
- @prec_seen = false
297
- @code_after_prec = false
298
302
  }
299
303
  | rhs symbol named_ref_opt
300
304
  {
301
305
  token = val[1]
302
- token.alias = val[2]
306
+ token.alias_name = val[2]
303
307
  result = val[0].append(token)
304
308
  }
309
+ | rhs "?"
310
+ {
311
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
312
+ result = val[0].append(token)
313
+ }
314
+ | rhs "+"
315
+ {
316
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
317
+ result = val[0].append(token)
318
+ }
319
+ | rhs "*"
320
+ {
321
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
322
+ result = val[0].append(token)
323
+ }
305
324
  | rhs "{"
306
325
  {
307
326
  if @prec_seen
308
327
  raise "Multiple User_code after %prec" if @code_after_prec
309
328
  @code_after_prec = true
310
329
  end
311
- @lexer.status = :c_declaration
312
- @lexer.end_symbol = '}'
330
+ begin_c_declaration("}")
313
331
  }
314
332
  C_DECLARATION
315
333
  {
316
- @lexer.status = :initial
317
- @lexer.end_symbol = nil
334
+ end_c_declaration
318
335
  }
319
336
  "}" named_ref_opt
320
337
  {
321
338
  token = val[3]
322
- token.alias = val[6]
339
+ token.alias_name = val[6]
323
340
  result = val[0].append(token)
324
341
  }
325
- | "{"
326
- {
327
- if @prec_seen
328
- raise "Multiple User_code after %prec" if @code_after_prec
329
- @code_after_prec = true
330
- end
331
- @lexer.status = :c_declaration
332
- @lexer.end_symbol = '}'
333
- }
334
- C_DECLARATION
335
- {
336
- @lexer.status = :initial
337
- @lexer.end_symbol = nil
338
- }
339
- "}" named_ref_opt
340
- {
341
- token = val[2]
342
- token.alias = val[5]
343
- result = [token]
344
- }
345
342
  | rhs "%prec" symbol
346
343
  {
347
344
  sym = @grammar.find_symbol_by_id!(val[2])
@@ -357,14 +354,12 @@ rule
357
354
  epilogue_opt: # empty
358
355
  | "%%"
359
356
  {
360
- @lexer.status = :c_declaration
361
- @lexer.end_symbol = '\Z'
357
+ begin_c_declaration('\Z')
362
358
  @grammar.epilogue_first_lineno = @lexer.line + 1
363
359
  }
364
360
  C_DECLARATION
365
361
  {
366
- @lexer.status = :initial
367
- @lexer.end_symbol = nil
362
+ end_c_declaration
368
363
  @grammar.epilogue = val[2].s_value
369
364
  }
370
365
 
@@ -381,16 +376,17 @@ rule
381
376
  generic_symlist_item: symbol
382
377
  | TAG
383
378
 
384
- string_as_id: STRING { result = Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Ident, s_value: val[0]) }
379
+ string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) }
385
380
  end
386
381
 
387
382
  ---- inner
388
383
 
389
384
  include Lrama::Report::Duration
390
385
 
391
- def initialize(text, path)
386
+ def initialize(text, path, debug = false)
392
387
  @text = text
393
388
  @path = path
389
+ @yydebug = debug
394
390
  end
395
391
 
396
392
  def parse
@@ -398,8 +394,8 @@ def parse
398
394
  @lexer = Lrama::Lexer.new(@text)
399
395
  @grammar = Lrama::Grammar.new
400
396
  @precedence_number = 0
397
+ reset_precs
401
398
  do_parse
402
- @grammar.extract_references
403
399
  @grammar.prepare
404
400
  @grammar.compute_nullable
405
401
  @grammar.compute_first_set
@@ -413,10 +409,38 @@ def next_token
413
409
  end
414
410
 
415
411
  def on_error(error_token_id, error_value, value_stack)
416
- source = @text.split("\n")[error_value.line - 1]
412
+ if error_value.respond_to?(:line) && error_value.respond_to?(:column)
413
+ line = error_value.line
414
+ first_column = error_value.column
415
+ else
416
+ line = @lexer.line
417
+ first_column = @lexer.head_column
418
+ end
419
+
417
420
  raise ParseError, <<~ERROR
418
- #{@path}:#{@lexer.line}:#{@lexer.column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
419
- #{source}
420
- #{' ' * @lexer.column}^
421
+ #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
422
+ #{@text.split("\n")[line - 1]}
423
+ #{carrets(first_column)}
421
424
  ERROR
422
425
  end
426
+
427
+ private
428
+
429
+ def reset_precs
430
+ @prec_seen = false
431
+ @code_after_prec = false
432
+ end
433
+
434
+ def begin_c_declaration(end_symbol)
435
+ @lexer.status = :c_declaration
436
+ @lexer.end_symbol = end_symbol
437
+ end
438
+
439
+ def end_c_declaration
440
+ @lexer.status = :initial
441
+ @lexer.end_symbol = nil
442
+ end
443
+
444
+ def carrets(first_column)
445
+ ' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
446
+ end
@@ -2,7 +2,7 @@
2
2
  sources:
3
3
  - type: git
4
4
  name: ruby/gem_rbs_collection
5
- revision: 95ad664324500c9eec78569b45da98c65a27a511
5
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
6
6
  remote: https://github.com/ruby/gem_rbs_collection.git
7
7
  repo_dir: gems
8
8
  path: ".gem_rbs_collection"
@@ -11,6 +11,18 @@ gems:
11
11
  version: '0'
12
12
  source:
13
13
  type: stdlib
14
+ - name: fileutils
15
+ version: '0'
16
+ source:
17
+ type: stdlib
18
+ - name: rake
19
+ version: '13.0'
20
+ source:
21
+ type: git
22
+ name: ruby/gem_rbs_collection
23
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
24
+ remote: https://github.com/ruby/gem_rbs_collection.git
25
+ repo_dir: gems
14
26
  - name: stackprof
15
27
  version: '0.2'
16
28
  source:
data/sample/calc.y CHANGED
@@ -15,10 +15,12 @@
15
15
  #include <stdio.h>
16
16
  #include <stdlib.h>
17
17
  #include <ctype.h>
18
+ %}
18
19
 
20
+ %code provides {
19
21
  static int yylex(YYSTYPE *val, YYLTYPE *loc);
20
22
  static int yyerror(YYLTYPE *loc, const char *str);
21
- %}
23
+ }
22
24
 
23
25
  %union {
24
26
  int val;
data/sample/parse.y CHANGED
@@ -4,10 +4,14 @@
4
4
 
5
5
  %{
6
6
  // Prologue
7
+ %}
8
+
9
+ %code provides {
10
+
7
11
  static enum yytokentype yylex(YYSTYPE *lval, YYLTYPE *yylloc);
8
12
  static void yyerror(YYLTYPE *yylloc, const char *msg);
9
13
 
10
- %}
14
+ }
11
15
 
12
16
  %expect 0
13
17
  %define api.pure
@@ -0,0 +1,10 @@
1
+ module Lrama
2
+ class Grammar
3
+ class PercentCode
4
+ attr_reader id: Lexer::Token::Ident
5
+ attr_reader code: Lexer::Token::UserCode
6
+
7
+ def initialize: (Lexer::Token::Ident id, Lexer::Token::UserCode code) -> void
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,22 @@
1
+ module Lrama
2
+ class Grammar
3
+ class Reference
4
+ # TODO: Replace untyped referring_symbol with (Grammar::Symbol|Lexer::Token)
5
+ attr_accessor type: Symbol
6
+ attr_accessor value: (String|Integer)
7
+ attr_accessor ex_tag: Lexer::Token?
8
+ attr_accessor first_column: Integer
9
+ attr_accessor last_column: Integer
10
+ attr_accessor referring_symbol: untyped
11
+ attr_accessor position_in_rhs: Integer?
12
+
13
+ def initialize: (
14
+ type: Symbol, value: (String|Integer), ex_tag: Lexer::Token?,
15
+ first_column: Integer, last_column: Integer,
16
+ referring_symbol: untyped, position_in_rhs: Integer?
17
+ ) -> void
18
+
19
+ def tag: () -> untyped
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module Lrama
2
+ class Grammar
3
+ def numberize_references: (Lexer::Token lhs, Array[Lexer::Token] rhs, Array[Reference]) -> void
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Char < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Ident < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Parameterizing < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Tag < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class UserCode < Token
5
+ attr_accessor references: Array[[Symbol, (String|Integer), Token?, Integer, Integer]]
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ attr_accessor s_value: String
5
+ attr_accessor alias_name: String
6
+ attr_accessor line: Integer
7
+ attr_accessor column: Integer
8
+ attr_accessor referred: bool
9
+
10
+ def initialize: (?s_value: String, ?alias_name: String) -> void
11
+
12
+ def to_s: () -> String
13
+ def referred_by?: (String string) -> bool
14
+ def ==: (Token other) -> bool
15
+ end
16
+ end
17
+ end
@@ -12,7 +12,7 @@
12
12
  #if YYDEBUG && !defined(yydebug)
13
13
  extern int yydebug;
14
14
  #endif
15
- <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
15
+ <%= output.percent_code("requires") %>
16
16
 
17
17
  <%-# b4_token_enums_defines -%>
18
18
  /* Token kinds. */
@@ -64,7 +64,7 @@ struct YYLTYPE
64
64
  int yyparse (<%= output.parse_param %>);
65
65
 
66
66
 
67
- <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
67
+ <%= output.percent_code("provides") %>
68
68
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
69
69
  <%- if output.spec_mapped_header_file -%>
70
70
  #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
@@ -68,8 +68,6 @@
68
68
  #define YYPULL 1
69
69
 
70
70
 
71
-
72
-
73
71
  <%# b4_user_pre_prologue -%>
74
72
  /* First part of user prologue. */
75
73
  #line <%= output.aux.prologue_first_lineno %> "<%= output.grammar_file_path %>"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lrama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.8
4
+ version: 0.5.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuichiro Kaneko
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-26 00:00:00.000000000 Z
11
+ date: 2023-11-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: LALR (1) parser generator written by Ruby
14
14
  email:
@@ -47,6 +47,7 @@ files:
47
47
  - lib/lrama/grammar/auxiliary.rb
48
48
  - lib/lrama/grammar/code.rb
49
49
  - lib/lrama/grammar/error_token.rb
50
+ - lib/lrama/grammar/percent_code.rb
50
51
  - lib/lrama/grammar/precedence.rb
51
52
  - lib/lrama/grammar/printer.rb
52
53
  - lib/lrama/grammar/reference.rb
@@ -55,7 +56,11 @@ files:
55
56
  - lib/lrama/grammar/union.rb
56
57
  - lib/lrama/lexer.rb
57
58
  - lib/lrama/lexer/token.rb
58
- - lib/lrama/lexer/token/type.rb
59
+ - lib/lrama/lexer/token/char.rb
60
+ - lib/lrama/lexer/token/ident.rb
61
+ - lib/lrama/lexer/token/parameterizing.rb
62
+ - lib/lrama/lexer/token/tag.rb
63
+ - lib/lrama/lexer/token/user_code.rb
59
64
  - lib/lrama/option_parser.rb
60
65
  - lib/lrama/options.rb
61
66
  - lib/lrama/output.rb
@@ -84,7 +89,15 @@ files:
84
89
  - sample/parse.y
85
90
  - sig/lrama/bitmap.rbs
86
91
  - sig/lrama/digraph.rbs
87
- - sig/lrama/lexer/token/type.rbs
92
+ - sig/lrama/grammar.rbs
93
+ - sig/lrama/grammar/percent_code.rbs
94
+ - sig/lrama/grammar/reference.rbs
95
+ - sig/lrama/lexer/token.rbs
96
+ - sig/lrama/lexer/token/char.rbs
97
+ - sig/lrama/lexer/token/ident.rbs
98
+ - sig/lrama/lexer/token/parameterizing.rbs
99
+ - sig/lrama/lexer/token/tag.rbs
100
+ - sig/lrama/lexer/token/user_code.rbs
88
101
  - sig/lrama/report/duration.rbs
89
102
  - sig/lrama/report/profile.rbs
90
103
  - sig/lrama/warning.rbs
@@ -1,8 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
4
- class Type < Struct.new(:id, :name, keyword_init: true)
5
- end
6
- end
7
- end
8
- end
@@ -1,17 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token
4
- attr_accessor type: Type
5
- attr_accessor s_value: String
6
- attr_accessor alias: String
7
-
8
- def initialize: (?type: Type, ?s_value: String, ?alias: String) -> void
9
- class Type
10
- attr_accessor id: Integer
11
- attr_accessor name: String
12
-
13
- def initialize: (?id: Integer, ?name: String) -> void
14
- end
15
- end
16
- end
17
- end