prism 0.19.0 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/serialize.c CHANGED
@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
158
158
  }
159
159
  case PM_ASSOC_NODE: {
160
160
  pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
161
- if (((pm_assoc_node_t *)node)->value == NULL) {
162
- pm_buffer_append_byte(buffer, 0);
163
- } else {
164
- pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
165
- }
161
+ pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
166
162
  if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
167
163
  pm_buffer_append_byte(buffer, 0);
168
164
  } else {
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
229
225
  break;
230
226
  }
231
227
  case PM_BLOCK_LOCAL_VARIABLE_NODE: {
228
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
232
229
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
233
230
  break;
234
231
  }
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
238
235
  for (uint32_t index = 0; index < locals_size; index++) {
239
236
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
240
237
  }
241
- pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
242
238
  if (((pm_block_node_t *)node)->parameters == NULL) {
243
239
  pm_buffer_append_byte(buffer, 0);
244
240
  } else {
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
254
250
  break;
255
251
  }
256
252
  case PM_BLOCK_PARAMETER_NODE: {
253
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
257
254
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
258
255
  if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
259
256
  pm_buffer_append_byte(buffer, 0);
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
651
648
  for (uint32_t index = 0; index < locals_size; index++) {
652
649
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
653
650
  }
654
- pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
655
651
  pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
656
652
  if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
657
653
  pm_buffer_append_byte(buffer, 0);
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1190
1186
  break;
1191
1187
  }
1192
1188
  case PM_KEYWORD_REST_PARAMETER_NODE: {
1189
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1193
1190
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
1194
1191
  if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1195
1192
  pm_buffer_append_byte(buffer, 0);
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1206
1203
  for (uint32_t index = 0; index < locals_size; index++) {
1207
1204
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
1208
1205
  }
1209
- pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
1210
1206
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
1211
1207
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
1212
1208
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1402
1398
  break;
1403
1399
  }
1404
1400
  case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
1401
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1405
1402
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
1406
1403
  pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
1407
1404
  pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
1408
1405
  break;
1409
1406
  }
1410
1407
  case PM_OPTIONAL_PARAMETER_NODE: {
1408
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1411
1409
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
1412
1410
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
1413
1411
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1542
1540
  break;
1543
1541
  }
1544
1542
  case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
1543
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1545
1544
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
1546
1545
  pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
1547
1546
  break;
1548
1547
  }
1549
1548
  case PM_REQUIRED_PARAMETER_NODE: {
1549
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1550
1550
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
1551
1551
  break;
1552
1552
  }
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1587
1587
  break;
1588
1588
  }
1589
1589
  case PM_REST_PARAMETER_NODE: {
1590
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1590
1591
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
1591
1592
  if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1592
1593
  pm_buffer_append_byte(buffer, 0);
@@ -1904,6 +1905,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
1904
1905
 
1905
1906
  // serialize location
1906
1907
  pm_serialize_location(parser, &diagnostic->location, buffer);
1908
+
1909
+ pm_buffer_append_byte(buffer, diagnostic->level);
1907
1910
  }
1908
1911
 
1909
1912
  static void
@@ -1926,7 +1929,7 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
1926
1929
  pm_buffer_append_string(buffer, encoding->name, encoding_length);
1927
1930
  }
1928
1931
 
1929
- #line 216 "serialize.c.erb"
1932
+ #line 218 "serialize.c.erb"
1930
1933
  /**
1931
1934
  * Serialize the encoding, metadata, nodes, and constant pool.
1932
1935
  */
data/src/token_type.c CHANGED
@@ -13,8 +13,7 @@
13
13
  * Returns a string representation of the given token type.
14
14
  */
15
15
  PRISM_EXPORTED_FUNCTION const char *
16
- pm_token_type_to_str(pm_token_type_t token_type)
17
- {
16
+ pm_token_type_name(pm_token_type_t token_type) {
18
17
  switch (token_type) {
19
18
  case PM_TOKEN_EOF:
20
19
  return "EOF";
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
345
344
  case PM_TOKEN___END__:
346
345
  return "__END__";
347
346
  case PM_TOKEN_MAXIMUM:
348
- return "MAXIMUM";
347
+ assert(false && "unreachable");
348
+ return "";
349
349
  }
350
- return "\0";
350
+
351
+ // Provide a default, because some compilers can't determine that the above
352
+ // switch is exhaustive.
353
+ assert(false && "unreachable");
354
+ return "";
355
+ }
356
+
357
+ /**
358
+ * Returns the human name of the given token type.
359
+ */
360
+ const char *
361
+ pm_token_type_human(pm_token_type_t token_type) {
362
+ switch (token_type) {
363
+ case PM_TOKEN_EOF:
364
+ return "end of file";
365
+ case PM_TOKEN_MISSING:
366
+ return "missing token";
367
+ case PM_TOKEN_NOT_PROVIDED:
368
+ return "not provided token";
369
+ case PM_TOKEN_AMPERSAND:
370
+ return "'&'";
371
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
372
+ return "'&&'";
373
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
374
+ return "'&&='";
375
+ case PM_TOKEN_AMPERSAND_DOT:
376
+ return "'&.'";
377
+ case PM_TOKEN_AMPERSAND_EQUAL:
378
+ return "'&='";
379
+ case PM_TOKEN_BACKTICK:
380
+ return "'`'";
381
+ case PM_TOKEN_BACK_REFERENCE:
382
+ return "back reference";
383
+ case PM_TOKEN_BANG:
384
+ return "'!'";
385
+ case PM_TOKEN_BANG_EQUAL:
386
+ return "'!='";
387
+ case PM_TOKEN_BANG_TILDE:
388
+ return "'!~'";
389
+ case PM_TOKEN_BRACE_LEFT:
390
+ return "'{'";
391
+ case PM_TOKEN_BRACE_RIGHT:
392
+ return "'}'";
393
+ case PM_TOKEN_BRACKET_LEFT:
394
+ return "'['";
395
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
396
+ return "'['";
397
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
398
+ return "'[]'";
399
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
400
+ return "'[]='";
401
+ case PM_TOKEN_BRACKET_RIGHT:
402
+ return "']'";
403
+ case PM_TOKEN_CARET:
404
+ return "'^'";
405
+ case PM_TOKEN_CARET_EQUAL:
406
+ return "'^='";
407
+ case PM_TOKEN_CHARACTER_LITERAL:
408
+ return "character literal";
409
+ case PM_TOKEN_CLASS_VARIABLE:
410
+ return "class variable";
411
+ case PM_TOKEN_COLON:
412
+ return "':'";
413
+ case PM_TOKEN_COLON_COLON:
414
+ return "'::'";
415
+ case PM_TOKEN_COMMA:
416
+ return "','";
417
+ case PM_TOKEN_COMMENT:
418
+ return "comment";
419
+ case PM_TOKEN_CONSTANT:
420
+ return "constant";
421
+ case PM_TOKEN_DOT:
422
+ return "'.'";
423
+ case PM_TOKEN_DOT_DOT:
424
+ return "'..'";
425
+ case PM_TOKEN_DOT_DOT_DOT:
426
+ return "'...'";
427
+ case PM_TOKEN_EMBDOC_BEGIN:
428
+ return "'=begin'";
429
+ case PM_TOKEN_EMBDOC_END:
430
+ return "'=end'";
431
+ case PM_TOKEN_EMBDOC_LINE:
432
+ return "embedded documentation line";
433
+ case PM_TOKEN_EMBEXPR_BEGIN:
434
+ return "'#{'";
435
+ case PM_TOKEN_EMBEXPR_END:
436
+ return "'}'";
437
+ case PM_TOKEN_EMBVAR:
438
+ return "'#'";
439
+ case PM_TOKEN_EQUAL:
440
+ return "'='";
441
+ case PM_TOKEN_EQUAL_EQUAL:
442
+ return "'=='";
443
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
444
+ return "'==='";
445
+ case PM_TOKEN_EQUAL_GREATER:
446
+ return "'=>'";
447
+ case PM_TOKEN_EQUAL_TILDE:
448
+ return "'=~'";
449
+ case PM_TOKEN_FLOAT:
450
+ return "float";
451
+ case PM_TOKEN_FLOAT_IMAGINARY:
452
+ return "imaginary";
453
+ case PM_TOKEN_FLOAT_RATIONAL:
454
+ return "rational";
455
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
456
+ return "imaginary";
457
+ case PM_TOKEN_GLOBAL_VARIABLE:
458
+ return "global variable";
459
+ case PM_TOKEN_GREATER:
460
+ return "'>'";
461
+ case PM_TOKEN_GREATER_EQUAL:
462
+ return "'>='";
463
+ case PM_TOKEN_GREATER_GREATER:
464
+ return "'>>'";
465
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
466
+ return "'>>='";
467
+ case PM_TOKEN_HEREDOC_END:
468
+ return "heredoc ending";
469
+ case PM_TOKEN_HEREDOC_START:
470
+ return "heredoc beginning";
471
+ case PM_TOKEN_IDENTIFIER:
472
+ return "local variable or method identifier";
473
+ case PM_TOKEN_IGNORED_NEWLINE:
474
+ return "ignored newline";
475
+ case PM_TOKEN_INSTANCE_VARIABLE:
476
+ return "instance variable";
477
+ case PM_TOKEN_INTEGER:
478
+ return "integer";
479
+ case PM_TOKEN_INTEGER_IMAGINARY:
480
+ return "imaginary";
481
+ case PM_TOKEN_INTEGER_RATIONAL:
482
+ return "rational";
483
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
484
+ return "imaginary";
485
+ case PM_TOKEN_KEYWORD_ALIAS:
486
+ return "'alias'";
487
+ case PM_TOKEN_KEYWORD_AND:
488
+ return "'and'";
489
+ case PM_TOKEN_KEYWORD_BEGIN:
490
+ return "'begin'";
491
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
492
+ return "'BEGIN'";
493
+ case PM_TOKEN_KEYWORD_BREAK:
494
+ return "'break'";
495
+ case PM_TOKEN_KEYWORD_CASE:
496
+ return "'case'";
497
+ case PM_TOKEN_KEYWORD_CLASS:
498
+ return "'class'";
499
+ case PM_TOKEN_KEYWORD_DEF:
500
+ return "'def'";
501
+ case PM_TOKEN_KEYWORD_DEFINED:
502
+ return "'defined?'";
503
+ case PM_TOKEN_KEYWORD_DO:
504
+ return "'do'";
505
+ case PM_TOKEN_KEYWORD_DO_LOOP:
506
+ return "'do'";
507
+ case PM_TOKEN_KEYWORD_ELSE:
508
+ return "'else'";
509
+ case PM_TOKEN_KEYWORD_ELSIF:
510
+ return "'elsif'";
511
+ case PM_TOKEN_KEYWORD_END:
512
+ return "'end'";
513
+ case PM_TOKEN_KEYWORD_END_UPCASE:
514
+ return "'END'";
515
+ case PM_TOKEN_KEYWORD_ENSURE:
516
+ return "'ensure'";
517
+ case PM_TOKEN_KEYWORD_FALSE:
518
+ return "'false'";
519
+ case PM_TOKEN_KEYWORD_FOR:
520
+ return "'for'";
521
+ case PM_TOKEN_KEYWORD_IF:
522
+ return "'if'";
523
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
524
+ return "'if'";
525
+ case PM_TOKEN_KEYWORD_IN:
526
+ return "'in'";
527
+ case PM_TOKEN_KEYWORD_MODULE:
528
+ return "'module'";
529
+ case PM_TOKEN_KEYWORD_NEXT:
530
+ return "'next'";
531
+ case PM_TOKEN_KEYWORD_NIL:
532
+ return "'nil'";
533
+ case PM_TOKEN_KEYWORD_NOT:
534
+ return "'not'";
535
+ case PM_TOKEN_KEYWORD_OR:
536
+ return "'or'";
537
+ case PM_TOKEN_KEYWORD_REDO:
538
+ return "'redo'";
539
+ case PM_TOKEN_KEYWORD_RESCUE:
540
+ return "'rescue'";
541
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
542
+ return "'rescue'";
543
+ case PM_TOKEN_KEYWORD_RETRY:
544
+ return "'retry'";
545
+ case PM_TOKEN_KEYWORD_RETURN:
546
+ return "'return'";
547
+ case PM_TOKEN_KEYWORD_SELF:
548
+ return "'self'";
549
+ case PM_TOKEN_KEYWORD_SUPER:
550
+ return "'super'";
551
+ case PM_TOKEN_KEYWORD_THEN:
552
+ return "'then'";
553
+ case PM_TOKEN_KEYWORD_TRUE:
554
+ return "'true'";
555
+ case PM_TOKEN_KEYWORD_UNDEF:
556
+ return "'undef'";
557
+ case PM_TOKEN_KEYWORD_UNLESS:
558
+ return "'unless'";
559
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
560
+ return "'unless'";
561
+ case PM_TOKEN_KEYWORD_UNTIL:
562
+ return "'until'";
563
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
564
+ return "'until'";
565
+ case PM_TOKEN_KEYWORD_WHEN:
566
+ return "'when'";
567
+ case PM_TOKEN_KEYWORD_WHILE:
568
+ return "'while'";
569
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
570
+ return "'while'";
571
+ case PM_TOKEN_KEYWORD_YIELD:
572
+ return "'yield'";
573
+ case PM_TOKEN_KEYWORD___ENCODING__:
574
+ return "'__ENCODING__'";
575
+ case PM_TOKEN_KEYWORD___FILE__:
576
+ return "'__FILE__'";
577
+ case PM_TOKEN_KEYWORD___LINE__:
578
+ return "'__LINE__'";
579
+ case PM_TOKEN_LABEL:
580
+ return "label";
581
+ case PM_TOKEN_LABEL_END:
582
+ return "':'";
583
+ case PM_TOKEN_LAMBDA_BEGIN:
584
+ return "'{'";
585
+ case PM_TOKEN_LESS:
586
+ return "'<'";
587
+ case PM_TOKEN_LESS_EQUAL:
588
+ return "'<='";
589
+ case PM_TOKEN_LESS_EQUAL_GREATER:
590
+ return "'<=>'";
591
+ case PM_TOKEN_LESS_LESS:
592
+ return "'<<'";
593
+ case PM_TOKEN_LESS_LESS_EQUAL:
594
+ return "'<<='";
595
+ case PM_TOKEN_METHOD_NAME:
596
+ return "method name";
597
+ case PM_TOKEN_MINUS:
598
+ return "'-'";
599
+ case PM_TOKEN_MINUS_EQUAL:
600
+ return "'-='";
601
+ case PM_TOKEN_MINUS_GREATER:
602
+ return "'->'";
603
+ case PM_TOKEN_NEWLINE:
604
+ return "newline";
605
+ case PM_TOKEN_NUMBERED_REFERENCE:
606
+ return "numbered reference";
607
+ case PM_TOKEN_PARENTHESIS_LEFT:
608
+ return "'('";
609
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
610
+ return "'('";
611
+ case PM_TOKEN_PARENTHESIS_RIGHT:
612
+ return "')'";
613
+ case PM_TOKEN_PERCENT:
614
+ return "'%'";
615
+ case PM_TOKEN_PERCENT_EQUAL:
616
+ return "'%='";
617
+ case PM_TOKEN_PERCENT_LOWER_I:
618
+ return "'%i'";
619
+ case PM_TOKEN_PERCENT_LOWER_W:
620
+ return "'%w'";
621
+ case PM_TOKEN_PERCENT_LOWER_X:
622
+ return "'%x'";
623
+ case PM_TOKEN_PERCENT_UPPER_I:
624
+ return "'%I'";
625
+ case PM_TOKEN_PERCENT_UPPER_W:
626
+ return "'%W'";
627
+ case PM_TOKEN_PIPE:
628
+ return "'|'";
629
+ case PM_TOKEN_PIPE_EQUAL:
630
+ return "'|='";
631
+ case PM_TOKEN_PIPE_PIPE:
632
+ return "'||'";
633
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
634
+ return "'||='";
635
+ case PM_TOKEN_PLUS:
636
+ return "'+'";
637
+ case PM_TOKEN_PLUS_EQUAL:
638
+ return "'+='";
639
+ case PM_TOKEN_QUESTION_MARK:
640
+ return "'?'";
641
+ case PM_TOKEN_REGEXP_BEGIN:
642
+ return "regular expression beginning";
643
+ case PM_TOKEN_REGEXP_END:
644
+ return "regular expression ending";
645
+ case PM_TOKEN_SEMICOLON:
646
+ return "';'";
647
+ case PM_TOKEN_SLASH:
648
+ return "'/'";
649
+ case PM_TOKEN_SLASH_EQUAL:
650
+ return "'/='";
651
+ case PM_TOKEN_STAR:
652
+ return "'*'";
653
+ case PM_TOKEN_STAR_EQUAL:
654
+ return "'*='";
655
+ case PM_TOKEN_STAR_STAR:
656
+ return "'**'";
657
+ case PM_TOKEN_STAR_STAR_EQUAL:
658
+ return "'**='";
659
+ case PM_TOKEN_STRING_BEGIN:
660
+ return "string beginning";
661
+ case PM_TOKEN_STRING_CONTENT:
662
+ return "string content";
663
+ case PM_TOKEN_STRING_END:
664
+ return "string ending";
665
+ case PM_TOKEN_SYMBOL_BEGIN:
666
+ return "symbol beginning";
667
+ case PM_TOKEN_TILDE:
668
+ return "'~'";
669
+ case PM_TOKEN_UAMPERSAND:
670
+ return "'&'";
671
+ case PM_TOKEN_UCOLON_COLON:
672
+ return "'::'";
673
+ case PM_TOKEN_UDOT_DOT:
674
+ return "'..'";
675
+ case PM_TOKEN_UDOT_DOT_DOT:
676
+ return "'...'";
677
+ case PM_TOKEN_UMINUS:
678
+ return "'-'";
679
+ case PM_TOKEN_UMINUS_NUM:
680
+ return "'-'";
681
+ case PM_TOKEN_UPLUS:
682
+ return "'+'";
683
+ case PM_TOKEN_USTAR:
684
+ return "'*'";
685
+ case PM_TOKEN_USTAR_STAR:
686
+ return "'**'";
687
+ case PM_TOKEN_WORDS_SEP:
688
+ return "string separator";
689
+ case PM_TOKEN___END__:
690
+ return "'__END__'";
691
+ case PM_TOKEN_MAXIMUM:
692
+ assert(false && "unreachable");
693
+ return "";
694
+ }
695
+
696
+ // Provide a default, because some compilers can't determine that the above
697
+ // switch is exhaustive.
698
+ assert(false && "unreachable");
699
+ return "";
351
700
  }
data/src/util/pm_buffer.c CHANGED
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
160
160
  pm_buffer_append_varuint(buffer, unsigned_int);
161
161
  }
162
162
 
163
+ /**
164
+ * Prepend the given string to the buffer.
165
+ */
166
+ void
167
+ pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
168
+ size_t cursor = buffer->length;
169
+ pm_buffer_append_length(buffer, length);
170
+ memmove(buffer->value + length, buffer->value, cursor);
171
+ memcpy(buffer->value, value, length);
172
+ }
173
+
163
174
  /**
164
175
  * Concatenate one buffer onto another.
165
176
  */
@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
124
124
 
125
125
  // If an id is set on this constant, then we know we have content here.
126
126
  // In this case we need to insert it into the next constant pool.
127
- if (bucket->id != 0) {
127
+ if (bucket->id != PM_CONSTANT_ID_UNSET) {
128
128
  uint32_t next_index = bucket->hash & mask;
129
129
 
130
130
  // This implements linear scanning to find the next available slot
131
131
  // in case this index is already taken. We don't need to bother
132
132
  // comparing the values since we know that the hash is unique.
133
- while (next_buckets[next_index].id != 0) {
133
+ while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
134
134
  next_index = (next_index + 1) & mask;
135
135
  }
136
136
 
@@ -177,7 +177,7 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
177
177
  */
178
178
  pm_constant_t *
179
179
  pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
180
- assert(constant_id > 0 && constant_id <= pool->size);
180
+ assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
181
181
  return &pool->constants[constant_id - 1];
182
182
  }
183
183
 
@@ -187,7 +187,7 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
187
187
  static inline pm_constant_id_t
188
188
  pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
189
189
  if (pool->size >= (pool->capacity / 4 * 3)) {
190
- if (!pm_constant_pool_resize(pool)) return 0;
190
+ if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
191
191
  }
192
192
 
193
193
  assert(is_power_of_two(pool->capacity));
@@ -197,7 +197,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
197
197
  uint32_t index = hash & mask;
198
198
  pm_constant_pool_bucket_t *bucket;
199
199
 
200
- while (bucket = &pool->buckets[index], bucket->id != 0) {
200
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
201
201
  // If there is a collision, then we need to check if the content is the
202
202
  // same as the content we are trying to insert. If it is, then we can
203
203
  // return the id of the existing constant.
@@ -248,8 +248,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
248
248
  }
249
249
 
250
250
  /**
251
- * Insert a constant into a constant pool. Returns the id of the constant, or 0
252
- * if any potential calls to resize fail.
251
+ * Insert a constant into a constant pool. Returns the id of the constant, or
252
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
253
253
  */
254
254
  pm_constant_id_t
255
255
  pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -258,8 +258,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
258
258
 
259
259
  /**
260
260
  * Insert a constant into a constant pool from memory that is now owned by the
261
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
262
- * resize fail.
261
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
262
+ * potential calls to resize fail.
263
263
  */
264
264
  pm_constant_id_t
265
265
  pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -268,7 +268,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
268
268
 
269
269
  /**
270
270
  * Insert a constant into a constant pool from memory that is constant. Returns
271
- * the id of the constant, or 0 if any potential calls to resize fail.
271
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
272
+ * resize fail.
272
273
  */
273
274
  pm_constant_id_t
274
275
  pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -286,7 +287,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
286
287
  pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
287
288
 
288
289
  // If an id is set on this constant, then we know we have content here.
289
- if (bucket->id != 0 && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290
+ if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290
291
  pm_constant_t *constant = &pool->constants[bucket->id - 1];
291
292
  free((void *) constant->start);
292
293
  }
@@ -45,18 +45,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
45
45
  return true;
46
46
  }
47
47
 
48
- /**
49
- * Conditionally append a new offset to the newline list, if the value passed in
50
- * is a newline.
51
- */
52
- bool
53
- pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
54
- if (*cursor != '\n') {
55
- return true;
56
- }
57
- return pm_newline_list_append(list, cursor);
58
- }
59
-
60
48
  /**
61
49
  * Returns the line and column of the given offset. If the offset is not in the
62
50
  * list, the line and column of the closest offset less than the given offset
@@ -74,7 +62,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
74
62
  size_t mid = left + (right - left) / 2;
75
63
 
76
64
  if (list->offsets[mid] == offset) {
77
- return ((pm_line_column_t) { mid, 0 });
65
+ return ((pm_line_column_t) { mid + 1, 0 });
78
66
  }
79
67
 
80
68
  if (list->offsets[mid] < offset) {
@@ -84,7 +72,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
84
72
  }
85
73
  }
86
74
 
87
- return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
75
+ return ((pm_line_column_t) { left, offset - list->offsets[left - 1] });
88
76
  }
89
77
 
90
78
  /**
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-14 00:00:00.000000000 Z
11
+ date: 2024-02-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -26,8 +26,8 @@ files:
26
26
  - README.md
27
27
  - config.yml
28
28
  - docs/build_system.md
29
- - docs/building.md
30
29
  - docs/configuration.md
30
+ - docs/cruby_compilation.md
31
31
  - docs/design.md
32
32
  - docs/encoding.md
33
33
  - docs/fuzzing.md
@@ -35,6 +35,8 @@ files:
35
35
  - docs/javascript.md
36
36
  - docs/local_variable_depth.md
37
37
  - docs/mapping.md
38
+ - docs/parser_translation.md
39
+ - docs/parsing_rules.md
38
40
  - docs/releasing.md
39
41
  - docs/ripper.md
40
42
  - docs/ruby_api.md
@@ -88,6 +90,11 @@ files:
88
90
  - lib/prism/pattern.rb
89
91
  - lib/prism/ripper_compat.rb
90
92
  - lib/prism/serialize.rb
93
+ - lib/prism/translation.rb
94
+ - lib/prism/translation/parser.rb
95
+ - lib/prism/translation/parser/compiler.rb
96
+ - lib/prism/translation/parser/lexer.rb
97
+ - lib/prism/translation/parser/rubocop.rb
91
98
  - lib/prism/visitor.rb
92
99
  - prism.gemspec
93
100
  - rbi/prism.rbi