prism 0.19.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -1
  3. data/Makefile +5 -0
  4. data/README.md +8 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +3 -3
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/serialization.md +17 -5
  13. data/ext/prism/api_node.c +101 -81
  14. data/ext/prism/extension.c +74 -11
  15. data/ext/prism/extension.h +1 -1
  16. data/include/prism/ast.h +1699 -504
  17. data/include/prism/defines.h +8 -0
  18. data/include/prism/diagnostic.h +39 -2
  19. data/include/prism/encoding.h +10 -0
  20. data/include/prism/options.h +40 -14
  21. data/include/prism/parser.h +33 -17
  22. data/include/prism/util/pm_buffer.h +9 -0
  23. data/include/prism/util/pm_constant_pool.h +7 -0
  24. data/include/prism/util/pm_newline_list.h +0 -11
  25. data/include/prism/version.h +2 -2
  26. data/include/prism.h +19 -2
  27. data/lib/prism/debug.rb +11 -5
  28. data/lib/prism/dot_visitor.rb +36 -14
  29. data/lib/prism/dsl.rb +22 -22
  30. data/lib/prism/ffi.rb +2 -2
  31. data/lib/prism/node.rb +1020 -737
  32. data/lib/prism/node_ext.rb +2 -2
  33. data/lib/prism/parse_result.rb +17 -9
  34. data/lib/prism/serialize.rb +53 -29
  35. data/lib/prism/translation/parser/compiler.rb +1831 -0
  36. data/lib/prism/translation/parser/lexer.rb +335 -0
  37. data/lib/prism/translation/parser/rubocop.rb +37 -0
  38. data/lib/prism/translation/parser.rb +163 -0
  39. data/lib/prism/translation.rb +11 -0
  40. data/lib/prism.rb +1 -0
  41. data/prism.gemspec +12 -5
  42. data/rbi/prism.rbi +150 -88
  43. data/rbi/prism_static.rbi +15 -3
  44. data/sig/prism.rbs +996 -961
  45. data/sig/prism_static.rbs +123 -46
  46. data/src/diagnostic.c +259 -219
  47. data/src/encoding.c +4 -8
  48. data/src/node.c +2 -6
  49. data/src/options.c +24 -5
  50. data/src/prettyprint.c +174 -42
  51. data/src/prism.c +1136 -328
  52. data/src/serialize.c +12 -9
  53. data/src/token_type.c +353 -4
  54. data/src/util/pm_buffer.c +11 -0
  55. data/src/util/pm_constant_pool.c +12 -11
  56. data/src/util/pm_newline_list.c +2 -14
  57. metadata +10 -3
  58. data/docs/building.md +0 -29
data/src/serialize.c CHANGED
@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
158
158
  }
159
159
  case PM_ASSOC_NODE: {
160
160
  pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
161
- if (((pm_assoc_node_t *)node)->value == NULL) {
162
- pm_buffer_append_byte(buffer, 0);
163
- } else {
164
- pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
165
- }
161
+ pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
166
162
  if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
167
163
  pm_buffer_append_byte(buffer, 0);
168
164
  } else {
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
229
225
  break;
230
226
  }
231
227
  case PM_BLOCK_LOCAL_VARIABLE_NODE: {
228
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
232
229
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
233
230
  break;
234
231
  }
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
238
235
  for (uint32_t index = 0; index < locals_size; index++) {
239
236
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
240
237
  }
241
- pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
242
238
  if (((pm_block_node_t *)node)->parameters == NULL) {
243
239
  pm_buffer_append_byte(buffer, 0);
244
240
  } else {
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
254
250
  break;
255
251
  }
256
252
  case PM_BLOCK_PARAMETER_NODE: {
253
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
257
254
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
258
255
  if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
259
256
  pm_buffer_append_byte(buffer, 0);
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
651
648
  for (uint32_t index = 0; index < locals_size; index++) {
652
649
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
653
650
  }
654
- pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
655
651
  pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
656
652
  if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
657
653
  pm_buffer_append_byte(buffer, 0);
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1190
1186
  break;
1191
1187
  }
1192
1188
  case PM_KEYWORD_REST_PARAMETER_NODE: {
1189
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1193
1190
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
1194
1191
  if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1195
1192
  pm_buffer_append_byte(buffer, 0);
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1206
1203
  for (uint32_t index = 0; index < locals_size; index++) {
1207
1204
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
1208
1205
  }
1209
- pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
1210
1206
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
1211
1207
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
1212
1208
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1402
1398
  break;
1403
1399
  }
1404
1400
  case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
1401
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1405
1402
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
1406
1403
  pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
1407
1404
  pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
1408
1405
  break;
1409
1406
  }
1410
1407
  case PM_OPTIONAL_PARAMETER_NODE: {
1408
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1411
1409
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
1412
1410
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
1413
1411
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1542
1540
  break;
1543
1541
  }
1544
1542
  case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
1543
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1545
1544
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
1546
1545
  pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
1547
1546
  break;
1548
1547
  }
1549
1548
  case PM_REQUIRED_PARAMETER_NODE: {
1549
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1550
1550
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
1551
1551
  break;
1552
1552
  }
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1587
1587
  break;
1588
1588
  }
1589
1589
  case PM_REST_PARAMETER_NODE: {
1590
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1590
1591
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
1591
1592
  if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1592
1593
  pm_buffer_append_byte(buffer, 0);
@@ -1904,6 +1905,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
1904
1905
 
1905
1906
  // serialize location
1906
1907
  pm_serialize_location(parser, &diagnostic->location, buffer);
1908
+
1909
+ pm_buffer_append_byte(buffer, diagnostic->level);
1907
1910
  }
1908
1911
 
1909
1912
  static void
@@ -1926,7 +1929,7 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
1926
1929
  pm_buffer_append_string(buffer, encoding->name, encoding_length);
1927
1930
  }
1928
1931
 
1929
- #line 216 "serialize.c.erb"
1932
+ #line 218 "serialize.c.erb"
1930
1933
  /**
1931
1934
  * Serialize the encoding, metadata, nodes, and constant pool.
1932
1935
  */
data/src/token_type.c CHANGED
@@ -13,8 +13,7 @@
13
13
  * Returns a string representation of the given token type.
14
14
  */
15
15
  PRISM_EXPORTED_FUNCTION const char *
16
- pm_token_type_to_str(pm_token_type_t token_type)
17
- {
16
+ pm_token_type_name(pm_token_type_t token_type) {
18
17
  switch (token_type) {
19
18
  case PM_TOKEN_EOF:
20
19
  return "EOF";
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
345
344
  case PM_TOKEN___END__:
346
345
  return "__END__";
347
346
  case PM_TOKEN_MAXIMUM:
348
- return "MAXIMUM";
347
+ assert(false && "unreachable");
348
+ return "";
349
349
  }
350
- return "\0";
350
+
351
+ // Provide a default, because some compilers can't determine that the above
352
+ // switch is exhaustive.
353
+ assert(false && "unreachable");
354
+ return "";
355
+ }
356
+
357
+ /**
358
+ * Returns the human name of the given token type.
359
+ */
360
+ const char *
361
+ pm_token_type_human(pm_token_type_t token_type) {
362
+ switch (token_type) {
363
+ case PM_TOKEN_EOF:
364
+ return "end of file";
365
+ case PM_TOKEN_MISSING:
366
+ return "missing token";
367
+ case PM_TOKEN_NOT_PROVIDED:
368
+ return "not provided token";
369
+ case PM_TOKEN_AMPERSAND:
370
+ return "'&'";
371
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
372
+ return "'&&'";
373
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
374
+ return "'&&='";
375
+ case PM_TOKEN_AMPERSAND_DOT:
376
+ return "'&.'";
377
+ case PM_TOKEN_AMPERSAND_EQUAL:
378
+ return "'&='";
379
+ case PM_TOKEN_BACKTICK:
380
+ return "'`'";
381
+ case PM_TOKEN_BACK_REFERENCE:
382
+ return "back reference";
383
+ case PM_TOKEN_BANG:
384
+ return "'!'";
385
+ case PM_TOKEN_BANG_EQUAL:
386
+ return "'!='";
387
+ case PM_TOKEN_BANG_TILDE:
388
+ return "'!~'";
389
+ case PM_TOKEN_BRACE_LEFT:
390
+ return "'{'";
391
+ case PM_TOKEN_BRACE_RIGHT:
392
+ return "'}'";
393
+ case PM_TOKEN_BRACKET_LEFT:
394
+ return "'['";
395
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
396
+ return "'['";
397
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
398
+ return "'[]'";
399
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
400
+ return "'[]='";
401
+ case PM_TOKEN_BRACKET_RIGHT:
402
+ return "']'";
403
+ case PM_TOKEN_CARET:
404
+ return "'^'";
405
+ case PM_TOKEN_CARET_EQUAL:
406
+ return "'^='";
407
+ case PM_TOKEN_CHARACTER_LITERAL:
408
+ return "character literal";
409
+ case PM_TOKEN_CLASS_VARIABLE:
410
+ return "class variable";
411
+ case PM_TOKEN_COLON:
412
+ return "':'";
413
+ case PM_TOKEN_COLON_COLON:
414
+ return "'::'";
415
+ case PM_TOKEN_COMMA:
416
+ return "','";
417
+ case PM_TOKEN_COMMENT:
418
+ return "comment";
419
+ case PM_TOKEN_CONSTANT:
420
+ return "constant";
421
+ case PM_TOKEN_DOT:
422
+ return "'.'";
423
+ case PM_TOKEN_DOT_DOT:
424
+ return "'..'";
425
+ case PM_TOKEN_DOT_DOT_DOT:
426
+ return "'...'";
427
+ case PM_TOKEN_EMBDOC_BEGIN:
428
+ return "'=begin'";
429
+ case PM_TOKEN_EMBDOC_END:
430
+ return "'=end'";
431
+ case PM_TOKEN_EMBDOC_LINE:
432
+ return "embedded documentation line";
433
+ case PM_TOKEN_EMBEXPR_BEGIN:
434
+ return "'#{'";
435
+ case PM_TOKEN_EMBEXPR_END:
436
+ return "'}'";
437
+ case PM_TOKEN_EMBVAR:
438
+ return "'#'";
439
+ case PM_TOKEN_EQUAL:
440
+ return "'='";
441
+ case PM_TOKEN_EQUAL_EQUAL:
442
+ return "'=='";
443
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
444
+ return "'==='";
445
+ case PM_TOKEN_EQUAL_GREATER:
446
+ return "'=>'";
447
+ case PM_TOKEN_EQUAL_TILDE:
448
+ return "'=~'";
449
+ case PM_TOKEN_FLOAT:
450
+ return "float";
451
+ case PM_TOKEN_FLOAT_IMAGINARY:
452
+ return "imaginary";
453
+ case PM_TOKEN_FLOAT_RATIONAL:
454
+ return "rational";
455
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
456
+ return "imaginary";
457
+ case PM_TOKEN_GLOBAL_VARIABLE:
458
+ return "global variable";
459
+ case PM_TOKEN_GREATER:
460
+ return "'>'";
461
+ case PM_TOKEN_GREATER_EQUAL:
462
+ return "'>='";
463
+ case PM_TOKEN_GREATER_GREATER:
464
+ return "'>>'";
465
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
466
+ return "'>>='";
467
+ case PM_TOKEN_HEREDOC_END:
468
+ return "heredoc ending";
469
+ case PM_TOKEN_HEREDOC_START:
470
+ return "heredoc beginning";
471
+ case PM_TOKEN_IDENTIFIER:
472
+ return "local variable or method identifier";
473
+ case PM_TOKEN_IGNORED_NEWLINE:
474
+ return "ignored newline";
475
+ case PM_TOKEN_INSTANCE_VARIABLE:
476
+ return "instance variable";
477
+ case PM_TOKEN_INTEGER:
478
+ return "integer";
479
+ case PM_TOKEN_INTEGER_IMAGINARY:
480
+ return "imaginary";
481
+ case PM_TOKEN_INTEGER_RATIONAL:
482
+ return "rational";
483
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
484
+ return "imaginary";
485
+ case PM_TOKEN_KEYWORD_ALIAS:
486
+ return "'alias'";
487
+ case PM_TOKEN_KEYWORD_AND:
488
+ return "'and'";
489
+ case PM_TOKEN_KEYWORD_BEGIN:
490
+ return "'begin'";
491
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
492
+ return "'BEGIN'";
493
+ case PM_TOKEN_KEYWORD_BREAK:
494
+ return "'break'";
495
+ case PM_TOKEN_KEYWORD_CASE:
496
+ return "'case'";
497
+ case PM_TOKEN_KEYWORD_CLASS:
498
+ return "'class'";
499
+ case PM_TOKEN_KEYWORD_DEF:
500
+ return "'def'";
501
+ case PM_TOKEN_KEYWORD_DEFINED:
502
+ return "'defined?'";
503
+ case PM_TOKEN_KEYWORD_DO:
504
+ return "'do'";
505
+ case PM_TOKEN_KEYWORD_DO_LOOP:
506
+ return "'do'";
507
+ case PM_TOKEN_KEYWORD_ELSE:
508
+ return "'else'";
509
+ case PM_TOKEN_KEYWORD_ELSIF:
510
+ return "'elsif'";
511
+ case PM_TOKEN_KEYWORD_END:
512
+ return "'end'";
513
+ case PM_TOKEN_KEYWORD_END_UPCASE:
514
+ return "'END'";
515
+ case PM_TOKEN_KEYWORD_ENSURE:
516
+ return "'ensure'";
517
+ case PM_TOKEN_KEYWORD_FALSE:
518
+ return "'false'";
519
+ case PM_TOKEN_KEYWORD_FOR:
520
+ return "'for'";
521
+ case PM_TOKEN_KEYWORD_IF:
522
+ return "'if'";
523
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
524
+ return "'if'";
525
+ case PM_TOKEN_KEYWORD_IN:
526
+ return "'in'";
527
+ case PM_TOKEN_KEYWORD_MODULE:
528
+ return "'module'";
529
+ case PM_TOKEN_KEYWORD_NEXT:
530
+ return "'next'";
531
+ case PM_TOKEN_KEYWORD_NIL:
532
+ return "'nil'";
533
+ case PM_TOKEN_KEYWORD_NOT:
534
+ return "'not'";
535
+ case PM_TOKEN_KEYWORD_OR:
536
+ return "'or'";
537
+ case PM_TOKEN_KEYWORD_REDO:
538
+ return "'redo'";
539
+ case PM_TOKEN_KEYWORD_RESCUE:
540
+ return "'rescue'";
541
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
542
+ return "'rescue'";
543
+ case PM_TOKEN_KEYWORD_RETRY:
544
+ return "'retry'";
545
+ case PM_TOKEN_KEYWORD_RETURN:
546
+ return "'return'";
547
+ case PM_TOKEN_KEYWORD_SELF:
548
+ return "'self'";
549
+ case PM_TOKEN_KEYWORD_SUPER:
550
+ return "'super'";
551
+ case PM_TOKEN_KEYWORD_THEN:
552
+ return "'then'";
553
+ case PM_TOKEN_KEYWORD_TRUE:
554
+ return "'true'";
555
+ case PM_TOKEN_KEYWORD_UNDEF:
556
+ return "'undef'";
557
+ case PM_TOKEN_KEYWORD_UNLESS:
558
+ return "'unless'";
559
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
560
+ return "'unless'";
561
+ case PM_TOKEN_KEYWORD_UNTIL:
562
+ return "'until'";
563
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
564
+ return "'until'";
565
+ case PM_TOKEN_KEYWORD_WHEN:
566
+ return "'when'";
567
+ case PM_TOKEN_KEYWORD_WHILE:
568
+ return "'while'";
569
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
570
+ return "'while'";
571
+ case PM_TOKEN_KEYWORD_YIELD:
572
+ return "'yield'";
573
+ case PM_TOKEN_KEYWORD___ENCODING__:
574
+ return "'__ENCODING__'";
575
+ case PM_TOKEN_KEYWORD___FILE__:
576
+ return "'__FILE__'";
577
+ case PM_TOKEN_KEYWORD___LINE__:
578
+ return "'__LINE__'";
579
+ case PM_TOKEN_LABEL:
580
+ return "label";
581
+ case PM_TOKEN_LABEL_END:
582
+ return "':'";
583
+ case PM_TOKEN_LAMBDA_BEGIN:
584
+ return "'{'";
585
+ case PM_TOKEN_LESS:
586
+ return "'<'";
587
+ case PM_TOKEN_LESS_EQUAL:
588
+ return "'<='";
589
+ case PM_TOKEN_LESS_EQUAL_GREATER:
590
+ return "'<=>'";
591
+ case PM_TOKEN_LESS_LESS:
592
+ return "'<<'";
593
+ case PM_TOKEN_LESS_LESS_EQUAL:
594
+ return "'<<='";
595
+ case PM_TOKEN_METHOD_NAME:
596
+ return "method name";
597
+ case PM_TOKEN_MINUS:
598
+ return "'-'";
599
+ case PM_TOKEN_MINUS_EQUAL:
600
+ return "'-='";
601
+ case PM_TOKEN_MINUS_GREATER:
602
+ return "'->'";
603
+ case PM_TOKEN_NEWLINE:
604
+ return "newline";
605
+ case PM_TOKEN_NUMBERED_REFERENCE:
606
+ return "numbered reference";
607
+ case PM_TOKEN_PARENTHESIS_LEFT:
608
+ return "'('";
609
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
610
+ return "'('";
611
+ case PM_TOKEN_PARENTHESIS_RIGHT:
612
+ return "')'";
613
+ case PM_TOKEN_PERCENT:
614
+ return "'%'";
615
+ case PM_TOKEN_PERCENT_EQUAL:
616
+ return "'%='";
617
+ case PM_TOKEN_PERCENT_LOWER_I:
618
+ return "'%i'";
619
+ case PM_TOKEN_PERCENT_LOWER_W:
620
+ return "'%w'";
621
+ case PM_TOKEN_PERCENT_LOWER_X:
622
+ return "'%x'";
623
+ case PM_TOKEN_PERCENT_UPPER_I:
624
+ return "'%I'";
625
+ case PM_TOKEN_PERCENT_UPPER_W:
626
+ return "'%W'";
627
+ case PM_TOKEN_PIPE:
628
+ return "'|'";
629
+ case PM_TOKEN_PIPE_EQUAL:
630
+ return "'|='";
631
+ case PM_TOKEN_PIPE_PIPE:
632
+ return "'||'";
633
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
634
+ return "'||='";
635
+ case PM_TOKEN_PLUS:
636
+ return "'+'";
637
+ case PM_TOKEN_PLUS_EQUAL:
638
+ return "'+='";
639
+ case PM_TOKEN_QUESTION_MARK:
640
+ return "'?'";
641
+ case PM_TOKEN_REGEXP_BEGIN:
642
+ return "regular expression beginning";
643
+ case PM_TOKEN_REGEXP_END:
644
+ return "regular expression ending";
645
+ case PM_TOKEN_SEMICOLON:
646
+ return "';'";
647
+ case PM_TOKEN_SLASH:
648
+ return "'/'";
649
+ case PM_TOKEN_SLASH_EQUAL:
650
+ return "'/='";
651
+ case PM_TOKEN_STAR:
652
+ return "'*'";
653
+ case PM_TOKEN_STAR_EQUAL:
654
+ return "'*='";
655
+ case PM_TOKEN_STAR_STAR:
656
+ return "'**'";
657
+ case PM_TOKEN_STAR_STAR_EQUAL:
658
+ return "'**='";
659
+ case PM_TOKEN_STRING_BEGIN:
660
+ return "string beginning";
661
+ case PM_TOKEN_STRING_CONTENT:
662
+ return "string content";
663
+ case PM_TOKEN_STRING_END:
664
+ return "string ending";
665
+ case PM_TOKEN_SYMBOL_BEGIN:
666
+ return "symbol beginning";
667
+ case PM_TOKEN_TILDE:
668
+ return "'~'";
669
+ case PM_TOKEN_UAMPERSAND:
670
+ return "'&'";
671
+ case PM_TOKEN_UCOLON_COLON:
672
+ return "'::'";
673
+ case PM_TOKEN_UDOT_DOT:
674
+ return "'..'";
675
+ case PM_TOKEN_UDOT_DOT_DOT:
676
+ return "'...'";
677
+ case PM_TOKEN_UMINUS:
678
+ return "'-'";
679
+ case PM_TOKEN_UMINUS_NUM:
680
+ return "'-'";
681
+ case PM_TOKEN_UPLUS:
682
+ return "'+'";
683
+ case PM_TOKEN_USTAR:
684
+ return "'*'";
685
+ case PM_TOKEN_USTAR_STAR:
686
+ return "'**'";
687
+ case PM_TOKEN_WORDS_SEP:
688
+ return "string separator";
689
+ case PM_TOKEN___END__:
690
+ return "'__END__'";
691
+ case PM_TOKEN_MAXIMUM:
692
+ assert(false && "unreachable");
693
+ return "";
694
+ }
695
+
696
+ // Provide a default, because some compilers can't determine that the above
697
+ // switch is exhaustive.
698
+ assert(false && "unreachable");
699
+ return "";
351
700
  }
data/src/util/pm_buffer.c CHANGED
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
160
160
  pm_buffer_append_varuint(buffer, unsigned_int);
161
161
  }
162
162
 
163
+ /**
164
+ * Prepend the given string to the buffer.
165
+ */
166
+ void
167
+ pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
168
+ size_t cursor = buffer->length;
169
+ pm_buffer_append_length(buffer, length);
170
+ memmove(buffer->value + length, buffer->value, cursor);
171
+ memcpy(buffer->value, value, length);
172
+ }
173
+
163
174
  /**
164
175
  * Concatenate one buffer onto another.
165
176
  */
@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
124
124
 
125
125
  // If an id is set on this constant, then we know we have content here.
126
126
  // In this case we need to insert it into the next constant pool.
127
- if (bucket->id != 0) {
127
+ if (bucket->id != PM_CONSTANT_ID_UNSET) {
128
128
  uint32_t next_index = bucket->hash & mask;
129
129
 
130
130
  // This implements linear scanning to find the next available slot
131
131
  // in case this index is already taken. We don't need to bother
132
132
  // comparing the values since we know that the hash is unique.
133
- while (next_buckets[next_index].id != 0) {
133
+ while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
134
134
  next_index = (next_index + 1) & mask;
135
135
  }
136
136
 
@@ -177,7 +177,7 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
177
177
  */
178
178
  pm_constant_t *
179
179
  pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
180
- assert(constant_id > 0 && constant_id <= pool->size);
180
+ assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
181
181
  return &pool->constants[constant_id - 1];
182
182
  }
183
183
 
@@ -187,7 +187,7 @@ pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t
187
187
  static inline pm_constant_id_t
188
188
  pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
189
189
  if (pool->size >= (pool->capacity / 4 * 3)) {
190
- if (!pm_constant_pool_resize(pool)) return 0;
190
+ if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
191
191
  }
192
192
 
193
193
  assert(is_power_of_two(pool->capacity));
@@ -197,7 +197,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
197
197
  uint32_t index = hash & mask;
198
198
  pm_constant_pool_bucket_t *bucket;
199
199
 
200
- while (bucket = &pool->buckets[index], bucket->id != 0) {
200
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
201
201
  // If there is a collision, then we need to check if the content is the
202
202
  // same as the content we are trying to insert. If it is, then we can
203
203
  // return the id of the existing constant.
@@ -248,8 +248,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
248
248
  }
249
249
 
250
250
  /**
251
- * Insert a constant into a constant pool. Returns the id of the constant, or 0
252
- * if any potential calls to resize fail.
251
+ * Insert a constant into a constant pool. Returns the id of the constant, or
252
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
253
253
  */
254
254
  pm_constant_id_t
255
255
  pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -258,8 +258,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
258
258
 
259
259
  /**
260
260
  * Insert a constant into a constant pool from memory that is now owned by the
261
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
262
- * resize fail.
261
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
262
+ * potential calls to resize fail.
263
263
  */
264
264
  pm_constant_id_t
265
265
  pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -268,7 +268,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
268
268
 
269
269
  /**
270
270
  * Insert a constant into a constant pool from memory that is constant. Returns
271
- * the id of the constant, or 0 if any potential calls to resize fail.
271
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
272
+ * resize fail.
272
273
  */
273
274
  pm_constant_id_t
274
275
  pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -286,7 +287,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
286
287
  pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
287
288
 
288
289
  // If an id is set on this constant, then we know we have content here.
289
- if (bucket->id != 0 && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290
+ if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290
291
  pm_constant_t *constant = &pool->constants[bucket->id - 1];
291
292
  free((void *) constant->start);
292
293
  }
@@ -45,18 +45,6 @@ pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor) {
45
45
  return true;
46
46
  }
47
47
 
48
- /**
49
- * Conditionally append a new offset to the newline list, if the value passed in
50
- * is a newline.
51
- */
52
- bool
53
- pm_newline_list_check_append(pm_newline_list_t *list, const uint8_t *cursor) {
54
- if (*cursor != '\n') {
55
- return true;
56
- }
57
- return pm_newline_list_append(list, cursor);
58
- }
59
-
60
48
  /**
61
49
  * Returns the line and column of the given offset. If the offset is not in the
62
50
  * list, the line and column of the closest offset less than the given offset
@@ -74,7 +62,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
74
62
  size_t mid = left + (right - left) / 2;
75
63
 
76
64
  if (list->offsets[mid] == offset) {
77
- return ((pm_line_column_t) { mid, 0 });
65
+ return ((pm_line_column_t) { mid + 1, 0 });
78
66
  }
79
67
 
80
68
  if (list->offsets[mid] < offset) {
@@ -84,7 +72,7 @@ pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor
84
72
  }
85
73
  }
86
74
 
87
- return ((pm_line_column_t) { left - 1, offset - list->offsets[left - 1] });
75
+ return ((pm_line_column_t) { left, offset - list->offsets[left - 1] });
88
76
  }
89
77
 
90
78
  /**
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: prism
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shopify
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-14 00:00:00.000000000 Z
11
+ date: 2024-02-01 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -26,8 +26,8 @@ files:
26
26
  - README.md
27
27
  - config.yml
28
28
  - docs/build_system.md
29
- - docs/building.md
30
29
  - docs/configuration.md
30
+ - docs/cruby_compilation.md
31
31
  - docs/design.md
32
32
  - docs/encoding.md
33
33
  - docs/fuzzing.md
@@ -35,6 +35,8 @@ files:
35
35
  - docs/javascript.md
36
36
  - docs/local_variable_depth.md
37
37
  - docs/mapping.md
38
+ - docs/parser_translation.md
39
+ - docs/parsing_rules.md
38
40
  - docs/releasing.md
39
41
  - docs/ripper.md
40
42
  - docs/ruby_api.md
@@ -88,6 +90,11 @@ files:
88
90
  - lib/prism/pattern.rb
89
91
  - lib/prism/ripper_compat.rb
90
92
  - lib/prism/serialize.rb
93
+ - lib/prism/translation.rb
94
+ - lib/prism/translation/parser.rb
95
+ - lib/prism/translation/parser/compiler.rb
96
+ - lib/prism/translation/parser/lexer.rb
97
+ - lib/prism/translation/parser/rubocop.rb
91
98
  - lib/prism/visitor.rb
92
99
  - prism.gemspec
93
100
  - rbi/prism.rbi