lrama 0.5.7 → 0.5.9

Sign up to get free protection for your applications and to get access to all the features.
data/lib/lrama/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lrama
2
- VERSION = "0.5.7".freeze
2
+ VERSION = "0.5.9".freeze
3
3
  end
data/parser.y CHANGED
@@ -1,5 +1,10 @@
1
1
  class Lrama::Parser
2
+ expect 7
3
+
4
+ token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
5
+
2
6
  rule
7
+
3
8
  input: prologue_declarations bison_declarations "%%" grammar epilogue_opt
4
9
 
5
10
  prologue_declarations: # empty
@@ -7,14 +12,12 @@ rule
7
12
 
8
13
  prologue_declaration: "%{"
9
14
  {
10
- @lexer.status = :c_declaration
11
- @lexer.end_symbol = '%}'
15
+ begin_c_declaration("%}")
12
16
  @grammar.prologue_first_lineno = @lexer.line
13
17
  }
14
18
  C_DECLARATION
15
19
  {
16
- @lexer.status = :initial
17
- @lexer.end_symbol = nil
20
+ end_c_declaration
18
21
  }
19
22
  "%}"
20
23
  {
@@ -44,15 +47,25 @@ rule
44
47
  @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
45
48
  }
46
49
  }
50
+ | "%code" IDENTIFIER "{"
51
+ {
52
+ begin_c_declaration("}")
53
+ }
54
+ C_DECLARATION
55
+ {
56
+ end_c_declaration
57
+ }
58
+ "}"
59
+ {
60
+ @grammar.add_percent_code(id: val[1], code: val[4])
61
+ }
47
62
  | "%initial-action" "{"
48
63
  {
49
- @lexer.status = :c_declaration
50
- @lexer.end_symbol = '}'
64
+ begin_c_declaration("}")
51
65
  }
52
66
  C_DECLARATION
53
67
  {
54
- @lexer.status = :initial
55
- @lexer.end_symbol = nil
68
+ end_c_declaration
56
69
  }
57
70
  "}"
58
71
  {
@@ -62,13 +75,11 @@ rule
62
75
 
63
76
  grammar_declaration: "%union" "{"
64
77
  {
65
- @lexer.status = :c_declaration
66
- @lexer.end_symbol = '}'
78
+ begin_c_declaration("}")
67
79
  }
68
80
  C_DECLARATION
69
81
  {
70
- @lexer.status = :initial
71
- @lexer.end_symbol = nil
82
+ end_c_declaration
72
83
  }
73
84
  "}"
74
85
  {
@@ -77,24 +88,20 @@ rule
77
88
  | symbol_declaration
78
89
  | "%destructor" "{"
79
90
  {
80
- @lexer.status = :c_declaration
81
- @lexer.end_symbol = '}'
91
+ begin_c_declaration("}")
82
92
  }
83
93
  C_DECLARATION
84
94
  {
85
- @lexer.status = :initial
86
- @lexer.end_symbol = nil
95
+ end_c_declaration
87
96
  }
88
- "}" generic_symlist
97
+ "}" generic_symlist
89
98
  | "%printer" "{"
90
99
  {
91
- @lexer.status = :c_declaration
92
- @lexer.end_symbol = '}'
100
+ begin_c_declaration("}")
93
101
  }
94
102
  C_DECLARATION
95
103
  {
96
- @lexer.status = :initial
97
- @lexer.end_symbol = nil
104
+ end_c_declaration
98
105
  }
99
106
  "}" generic_symlist
100
107
  {
@@ -102,13 +109,11 @@ rule
102
109
  }
103
110
  | "%error-token" "{"
104
111
  {
105
- @lexer.status = :c_declaration
106
- @lexer.end_symbol = '}'
112
+ begin_c_declaration("}")
107
113
  }
108
114
  C_DECLARATION
109
115
  {
110
- @lexer.status = :initial
111
- @lexer.end_symbol = nil
116
+ end_c_declaration
112
117
  }
113
118
  "}" generic_symlist
114
119
  {
@@ -216,13 +221,11 @@ rule
216
221
 
217
222
  params: params "{"
218
223
  {
219
- @lexer.status = :c_declaration
220
- @lexer.end_symbol = '}'
224
+ begin_c_declaration("}")
221
225
  }
222
226
  C_DECLARATION
223
227
  {
224
- @lexer.status = :initial
225
- @lexer.end_symbol = nil
228
+ end_c_declaration
226
229
  }
227
230
  "}"
228
231
  {
@@ -230,13 +233,11 @@ rule
230
233
  }
231
234
  | "{"
232
235
  {
233
- @lexer.status = :c_declaration
234
- @lexer.end_symbol = '}'
236
+ begin_c_declaration("}")
235
237
  }
236
238
  C_DECLARATION
237
239
  {
238
- @lexer.status = :initial
239
- @lexer.end_symbol = nil
240
+ end_c_declaration
240
241
  }
241
242
  "}"
242
243
  {
@@ -273,7 +274,7 @@ rule
273
274
  rules: id_colon named_ref_opt ":" rhs_list
274
275
  {
275
276
  lhs = val[0]
276
- lhs.alias = val[1]
277
+ lhs.alias_name = val[1]
277
278
  val[3].each {|hash|
278
279
  @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
279
280
  }
@@ -291,56 +292,53 @@ rule
291
292
 
292
293
  rhs: /* empty */
293
294
  {
295
+ reset_precs
296
+ result = []
297
+ }
298
+ | "%empty"
299
+ {
300
+ reset_precs
294
301
  result = []
295
- @prec_seen = false
296
- @code_after_prec = false
297
302
  }
298
303
  | rhs symbol named_ref_opt
299
304
  {
300
305
  token = val[1]
301
- val[1].alias = val[2]
306
+ token.alias_name = val[2]
302
307
  result = val[0].append(token)
303
308
  }
309
+ | rhs "?"
310
+ {
311
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
312
+ result = val[0].append(token)
313
+ }
314
+ | rhs "+"
315
+ {
316
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
317
+ result = val[0].append(token)
318
+ }
319
+ | rhs "*"
320
+ {
321
+ token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
322
+ result = val[0].append(token)
323
+ }
304
324
  | rhs "{"
305
325
  {
306
326
  if @prec_seen
307
327
  raise "Multiple User_code after %prec" if @code_after_prec
308
328
  @code_after_prec = true
309
329
  end
310
- @lexer.status = :c_declaration
311
- @lexer.end_symbol = '}'
330
+ begin_c_declaration("}")
312
331
  }
313
332
  C_DECLARATION
314
333
  {
315
- @lexer.status = :initial
316
- @lexer.end_symbol = nil
334
+ end_c_declaration
317
335
  }
318
336
  "}" named_ref_opt
319
337
  {
320
338
  token = val[3]
321
- token.alias = val[6]
339
+ token.alias_name = val[6]
322
340
  result = val[0].append(token)
323
341
  }
324
- | "{"
325
- {
326
- if @prec_seen
327
- raise "Multiple User_code after %prec" if @code_after_prec
328
- @code_after_prec = true
329
- end
330
- @lexer.status = :c_declaration
331
- @lexer.end_symbol = '}'
332
- }
333
- C_DECLARATION
334
- {
335
- @lexer.status = :initial
336
- @lexer.end_symbol = nil
337
- }
338
- "}" named_ref_opt
339
- {
340
- token = val[2]
341
- token.alias = val[5]
342
- result = [token]
343
- }
344
342
  | rhs "%prec" symbol
345
343
  {
346
344
  sym = @grammar.find_symbol_by_id!(val[2])
@@ -356,14 +354,12 @@ rule
356
354
  epilogue_opt: # empty
357
355
  | "%%"
358
356
  {
359
- @lexer.status = :c_declaration
360
- @lexer.end_symbol = '\Z'
357
+ begin_c_declaration('\Z')
361
358
  @grammar.epilogue_first_lineno = @lexer.line + 1
362
359
  }
363
360
  C_DECLARATION
364
361
  {
365
- @lexer.status = :initial
366
- @lexer.end_symbol = nil
362
+ end_c_declaration
367
363
  @grammar.epilogue = val[2].s_value
368
364
  }
369
365
 
@@ -380,15 +376,17 @@ rule
380
376
  generic_symlist_item: symbol
381
377
  | TAG
382
378
 
383
- string_as_id: STRING { result = Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Ident, s_value: val[0]) }
379
+ string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) }
384
380
  end
385
381
 
386
382
  ---- inner
387
383
 
388
384
  include Lrama::Report::Duration
389
385
 
390
- def initialize(text)
386
+ def initialize(text, path, debug = false)
391
387
  @text = text
388
+ @path = path
389
+ @yydebug = debug
392
390
  end
393
391
 
394
392
  def parse
@@ -396,8 +394,8 @@ def parse
396
394
  @lexer = Lrama::Lexer.new(@text)
397
395
  @grammar = Lrama::Grammar.new
398
396
  @precedence_number = 0
397
+ reset_precs
399
398
  do_parse
400
- @grammar.extract_references
401
399
  @grammar.prepare
402
400
  @grammar.compute_nullable
403
401
  @grammar.compute_first_set
@@ -411,6 +409,38 @@ def next_token
411
409
  end
412
410
 
413
411
  def on_error(error_token_id, error_value, value_stack)
414
- raise ParseError, sprintf("\n%d:%d: parse error on value %s (%s)",
415
- @lexer.line, @lexer.column, error_value.inspect, token_to_str(error_token_id) || '?')
412
+ if error_value.respond_to?(:line) && error_value.respond_to?(:column)
413
+ line = error_value.line
414
+ first_column = error_value.column
415
+ else
416
+ line = @lexer.line
417
+ first_column = @lexer.head_column
418
+ end
419
+
420
+ raise ParseError, <<~ERROR
421
+ #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
422
+ #{@text.split("\n")[line - 1]}
423
+ #{carrets(first_column)}
424
+ ERROR
425
+ end
426
+
427
+ private
428
+
429
+ def reset_precs
430
+ @prec_seen = false
431
+ @code_after_prec = false
432
+ end
433
+
434
+ def begin_c_declaration(end_symbol)
435
+ @lexer.status = :c_declaration
436
+ @lexer.end_symbol = end_symbol
437
+ end
438
+
439
+ def end_c_declaration
440
+ @lexer.status = :initial
441
+ @lexer.end_symbol = nil
442
+ end
443
+
444
+ def carrets(first_column)
445
+ ' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
416
446
  end
@@ -2,7 +2,7 @@
2
2
  sources:
3
3
  - type: git
4
4
  name: ruby/gem_rbs_collection
5
- revision: 95ad664324500c9eec78569b45da98c65a27a511
5
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
6
6
  remote: https://github.com/ruby/gem_rbs_collection.git
7
7
  repo_dir: gems
8
8
  path: ".gem_rbs_collection"
@@ -11,6 +11,18 @@ gems:
11
11
  version: '0'
12
12
  source:
13
13
  type: stdlib
14
+ - name: fileutils
15
+ version: '0'
16
+ source:
17
+ type: stdlib
18
+ - name: rake
19
+ version: '13.0'
20
+ source:
21
+ type: git
22
+ name: ruby/gem_rbs_collection
23
+ revision: 2de2d4535caba275f3b8533684aab110d921f553
24
+ remote: https://github.com/ruby/gem_rbs_collection.git
25
+ repo_dir: gems
14
26
  - name: stackprof
15
27
  version: '0.2'
16
28
  source:
data/sample/calc.y CHANGED
@@ -15,10 +15,12 @@
15
15
  #include <stdio.h>
16
16
  #include <stdlib.h>
17
17
  #include <ctype.h>
18
+ %}
18
19
 
20
+ %code provides {
19
21
  static int yylex(YYSTYPE *val, YYLTYPE *loc);
20
22
  static int yyerror(YYLTYPE *loc, const char *str);
21
- %}
23
+ }
22
24
 
23
25
  %union {
24
26
  int val;
data/sample/parse.y CHANGED
@@ -4,10 +4,14 @@
4
4
 
5
5
  %{
6
6
  // Prologue
7
+ %}
8
+
9
+ %code provides {
10
+
7
11
  static enum yytokentype yylex(YYSTYPE *lval, YYLTYPE *yylloc);
8
12
  static void yyerror(YYLTYPE *yylloc, const char *msg);
9
13
 
10
- %}
14
+ }
11
15
 
12
16
  %expect 0
13
17
  %define api.pure
@@ -0,0 +1,10 @@
1
+ module Lrama
2
+ class Grammar
3
+ class PercentCode
4
+ attr_reader id: Lexer::Token::Ident
5
+ attr_reader code: Lexer::Token::UserCode
6
+
7
+ def initialize: (Lexer::Token::Ident id, Lexer::Token::UserCode code) -> void
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,22 @@
1
+ module Lrama
2
+ class Grammar
3
+ class Reference
4
+ # TODO: Replace untyped referring_symbol with (Grammar::Symbol|Lexer::Token)
5
+ attr_accessor type: Symbol
6
+ attr_accessor value: (String|Integer)
7
+ attr_accessor ex_tag: Lexer::Token?
8
+ attr_accessor first_column: Integer
9
+ attr_accessor last_column: Integer
10
+ attr_accessor referring_symbol: untyped
11
+ attr_accessor position_in_rhs: Integer?
12
+
13
+ def initialize: (
14
+ type: Symbol, value: (String|Integer), ex_tag: Lexer::Token?,
15
+ first_column: Integer, last_column: Integer,
16
+ referring_symbol: untyped, position_in_rhs: Integer?
17
+ ) -> void
18
+
19
+ def tag: () -> untyped
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module Lrama
2
+ class Grammar
3
+ def numberize_references: (Lexer::Token lhs, Array[Lexer::Token] rhs, Array[Reference]) -> void
4
+ end
5
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Char < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Ident < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Parameterizing < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class Tag < Token
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,9 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ class UserCode < Token
5
+ attr_accessor references: Array[[Symbol, (String|Integer), Token?, Integer, Integer]]
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ module Lrama
2
+ class Lexer
3
+ class Token
4
+ attr_accessor s_value: String
5
+ attr_accessor alias_name: String
6
+ attr_accessor line: Integer
7
+ attr_accessor column: Integer
8
+ attr_accessor referred: bool
9
+
10
+ def initialize: (?s_value: String, ?alias_name: String) -> void
11
+
12
+ def to_s: () -> String
13
+ def referred_by?: (String string) -> bool
14
+ def ==: (Token other) -> bool
15
+ end
16
+ end
17
+ end
@@ -12,7 +12,7 @@
12
12
  #if YYDEBUG && !defined(yydebug)
13
13
  extern int yydebug;
14
14
  #endif
15
- <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
15
+ <%= output.percent_code("requires") %>
16
16
 
17
17
  <%-# b4_token_enums_defines -%>
18
18
  /* Token kinds. */
@@ -64,7 +64,7 @@ struct YYLTYPE
64
64
  int yyparse (<%= output.parse_param %>);
65
65
 
66
66
 
67
- <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
67
+ <%= output.percent_code("provides") %>
68
68
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
69
69
  <%- if output.spec_mapped_header_file -%>
70
70
  #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
@@ -68,8 +68,6 @@
68
68
  #define YYPULL 1
69
69
 
70
70
 
71
-
72
-
73
71
  <%# b4_user_pre_prologue -%>
74
72
  /* First part of user prologue. */
75
73
  #line <%= output.aux.prologue_first_lineno %> "<%= output.grammar_file_path %>"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lrama
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.7
4
+ version: 0.5.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yuichiro Kaneko
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-23 00:00:00.000000000 Z
11
+ date: 2023-11-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: LALR (1) parser generator written by Ruby
14
14
  email:
@@ -47,6 +47,7 @@ files:
47
47
  - lib/lrama/grammar/auxiliary.rb
48
48
  - lib/lrama/grammar/code.rb
49
49
  - lib/lrama/grammar/error_token.rb
50
+ - lib/lrama/grammar/percent_code.rb
50
51
  - lib/lrama/grammar/precedence.rb
51
52
  - lib/lrama/grammar/printer.rb
52
53
  - lib/lrama/grammar/reference.rb
@@ -55,7 +56,11 @@ files:
55
56
  - lib/lrama/grammar/union.rb
56
57
  - lib/lrama/lexer.rb
57
58
  - lib/lrama/lexer/token.rb
58
- - lib/lrama/lexer/token/type.rb
59
+ - lib/lrama/lexer/token/char.rb
60
+ - lib/lrama/lexer/token/ident.rb
61
+ - lib/lrama/lexer/token/parameterizing.rb
62
+ - lib/lrama/lexer/token/tag.rb
63
+ - lib/lrama/lexer/token/user_code.rb
59
64
  - lib/lrama/option_parser.rb
60
65
  - lib/lrama/options.rb
61
66
  - lib/lrama/output.rb
@@ -84,7 +89,15 @@ files:
84
89
  - sample/parse.y
85
90
  - sig/lrama/bitmap.rbs
86
91
  - sig/lrama/digraph.rbs
87
- - sig/lrama/lexer/token/type.rbs
92
+ - sig/lrama/grammar.rbs
93
+ - sig/lrama/grammar/percent_code.rbs
94
+ - sig/lrama/grammar/reference.rbs
95
+ - sig/lrama/lexer/token.rbs
96
+ - sig/lrama/lexer/token/char.rbs
97
+ - sig/lrama/lexer/token/ident.rbs
98
+ - sig/lrama/lexer/token/parameterizing.rbs
99
+ - sig/lrama/lexer/token/tag.rbs
100
+ - sig/lrama/lexer/token/user_code.rbs
88
101
  - sig/lrama/report/duration.rbs
89
102
  - sig/lrama/report/profile.rbs
90
103
  - sig/lrama/warning.rbs
@@ -1,8 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
4
- class Type < Struct.new(:id, :name, keyword_init: true)
5
- end
6
- end
7
- end
8
- end
@@ -1,17 +0,0 @@
1
- module Lrama
2
- class Lexer
3
- class Token
4
- attr_accessor type: Type
5
- attr_accessor s_value: String
6
- attr_accessor alias: String
7
-
8
- def initialize: (?type: Type, ?s_value: String, ?alias: String) -> void
9
- class Type
10
- attr_accessor id: Integer
11
- attr_accessor name: String
12
-
13
- def initialize: (?id: Integer, ?name: String) -> void
14
- end
15
- end
16
- end
17
- end