prism 0.19.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +102 -1
  3. data/Makefile +5 -0
  4. data/README.md +9 -6
  5. data/config.yml +236 -38
  6. data/docs/build_system.md +19 -2
  7. data/docs/cruby_compilation.md +27 -0
  8. data/docs/parser_translation.md +34 -0
  9. data/docs/parsing_rules.md +19 -0
  10. data/docs/releasing.md +84 -16
  11. data/docs/ruby_api.md +1 -1
  12. data/docs/ruby_parser_translation.md +19 -0
  13. data/docs/serialization.md +19 -5
  14. data/ext/prism/api_node.c +1989 -1525
  15. data/ext/prism/extension.c +130 -30
  16. data/ext/prism/extension.h +2 -2
  17. data/include/prism/ast.h +1700 -505
  18. data/include/prism/defines.h +8 -0
  19. data/include/prism/diagnostic.h +49 -7
  20. data/include/prism/encoding.h +17 -0
  21. data/include/prism/options.h +40 -14
  22. data/include/prism/parser.h +34 -18
  23. data/include/prism/util/pm_buffer.h +9 -0
  24. data/include/prism/util/pm_constant_pool.h +18 -0
  25. data/include/prism/util/pm_newline_list.h +4 -14
  26. data/include/prism/util/pm_strpbrk.h +4 -1
  27. data/include/prism/version.h +2 -2
  28. data/include/prism.h +19 -2
  29. data/lib/prism/debug.rb +11 -5
  30. data/lib/prism/desugar_compiler.rb +225 -80
  31. data/lib/prism/dot_visitor.rb +36 -14
  32. data/lib/prism/dsl.rb +302 -299
  33. data/lib/prism/ffi.rb +107 -76
  34. data/lib/prism/lex_compat.rb +17 -1
  35. data/lib/prism/node.rb +4580 -2607
  36. data/lib/prism/node_ext.rb +27 -4
  37. data/lib/prism/parse_result.rb +75 -29
  38. data/lib/prism/serialize.rb +633 -305
  39. data/lib/prism/translation/parser/compiler.rb +1838 -0
  40. data/lib/prism/translation/parser/lexer.rb +335 -0
  41. data/lib/prism/translation/parser/rubocop.rb +45 -0
  42. data/lib/prism/translation/parser.rb +190 -0
  43. data/lib/prism/translation/parser33.rb +12 -0
  44. data/lib/prism/translation/parser34.rb +12 -0
  45. data/lib/prism/translation/ripper.rb +696 -0
  46. data/lib/prism/translation/ruby_parser.rb +1521 -0
  47. data/lib/prism/translation.rb +11 -0
  48. data/lib/prism.rb +1 -1
  49. data/prism.gemspec +18 -7
  50. data/rbi/prism.rbi +150 -88
  51. data/rbi/prism_static.rbi +15 -3
  52. data/sig/prism.rbs +996 -961
  53. data/sig/prism_static.rbs +123 -46
  54. data/src/diagnostic.c +264 -219
  55. data/src/encoding.c +21 -26
  56. data/src/node.c +2 -6
  57. data/src/options.c +29 -5
  58. data/src/prettyprint.c +176 -44
  59. data/src/prism.c +1499 -564
  60. data/src/serialize.c +35 -21
  61. data/src/token_type.c +353 -4
  62. data/src/util/pm_buffer.c +11 -0
  63. data/src/util/pm_constant_pool.c +37 -11
  64. data/src/util/pm_newline_list.c +6 -15
  65. data/src/util/pm_string.c +0 -7
  66. data/src/util/pm_strpbrk.c +122 -14
  67. metadata +16 -5
  68. data/docs/building.md +0 -29
  69. data/lib/prism/ripper_compat.rb +0 -207
data/src/serialize.c CHANGED
@@ -158,11 +158,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
158
158
  }
159
159
  case PM_ASSOC_NODE: {
160
160
  pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->key, buffer);
161
- if (((pm_assoc_node_t *)node)->value == NULL) {
162
- pm_buffer_append_byte(buffer, 0);
163
- } else {
164
- pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
165
- }
161
+ pm_serialize_node(parser, (pm_node_t *)((pm_assoc_node_t *)node)->value, buffer);
166
162
  if (((pm_assoc_node_t *)node)->operator_loc.start == NULL) {
167
163
  pm_buffer_append_byte(buffer, 0);
168
164
  } else {
@@ -229,6 +225,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
229
225
  break;
230
226
  }
231
227
  case PM_BLOCK_LOCAL_VARIABLE_NODE: {
228
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
232
229
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_local_variable_node_t *)node)->name));
233
230
  break;
234
231
  }
@@ -238,7 +235,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
238
235
  for (uint32_t index = 0; index < locals_size; index++) {
239
236
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_node_t *)node)->locals.ids[index]));
240
237
  }
241
- pm_buffer_append_varuint(buffer, ((pm_block_node_t *)node)->locals_body_index);
242
238
  if (((pm_block_node_t *)node)->parameters == NULL) {
243
239
  pm_buffer_append_byte(buffer, 0);
244
240
  } else {
@@ -254,6 +250,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
254
250
  break;
255
251
  }
256
252
  case PM_BLOCK_PARAMETER_NODE: {
253
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
257
254
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_block_parameter_node_t *)node)->name));
258
255
  if (((pm_block_parameter_node_t *)node)->name_loc.start == NULL) {
259
256
  pm_buffer_append_byte(buffer, 0);
@@ -651,7 +648,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
651
648
  for (uint32_t index = 0; index < locals_size; index++) {
652
649
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_def_node_t *)node)->locals.ids[index]));
653
650
  }
654
- pm_buffer_append_varuint(buffer, ((pm_def_node_t *)node)->locals_body_index);
655
651
  pm_serialize_location(parser, &((pm_def_node_t *)node)->def_keyword_loc, buffer);
656
652
  if (((pm_def_node_t *)node)->operator_loc.start == NULL) {
657
653
  pm_buffer_append_byte(buffer, 0);
@@ -1190,6 +1186,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1190
1186
  break;
1191
1187
  }
1192
1188
  case PM_KEYWORD_REST_PARAMETER_NODE: {
1189
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1193
1190
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_keyword_rest_parameter_node_t *)node)->name));
1194
1191
  if (((pm_keyword_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1195
1192
  pm_buffer_append_byte(buffer, 0);
@@ -1206,7 +1203,6 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1206
1203
  for (uint32_t index = 0; index < locals_size; index++) {
1207
1204
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_lambda_node_t *)node)->locals.ids[index]));
1208
1205
  }
1209
- pm_buffer_append_varuint(buffer, ((pm_lambda_node_t *)node)->locals_body_index);
1210
1206
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->operator_loc, buffer);
1211
1207
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->opening_loc, buffer);
1212
1208
  pm_serialize_location(parser, &((pm_lambda_node_t *)node)->closing_loc, buffer);
@@ -1402,12 +1398,14 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1402
1398
  break;
1403
1399
  }
1404
1400
  case PM_OPTIONAL_KEYWORD_PARAMETER_NODE: {
1401
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1405
1402
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_keyword_parameter_node_t *)node)->name));
1406
1403
  pm_serialize_location(parser, &((pm_optional_keyword_parameter_node_t *)node)->name_loc, buffer);
1407
1404
  pm_serialize_node(parser, (pm_node_t *)((pm_optional_keyword_parameter_node_t *)node)->value, buffer);
1408
1405
  break;
1409
1406
  }
1410
1407
  case PM_OPTIONAL_PARAMETER_NODE: {
1408
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1411
1409
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_optional_parameter_node_t *)node)->name));
1412
1410
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->name_loc, buffer);
1413
1411
  pm_serialize_location(parser, &((pm_optional_parameter_node_t *)node)->operator_loc, buffer);
@@ -1542,11 +1540,13 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1542
1540
  break;
1543
1541
  }
1544
1542
  case PM_REQUIRED_KEYWORD_PARAMETER_NODE: {
1543
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1545
1544
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_keyword_parameter_node_t *)node)->name));
1546
1545
  pm_serialize_location(parser, &((pm_required_keyword_parameter_node_t *)node)->name_loc, buffer);
1547
1546
  break;
1548
1547
  }
1549
1548
  case PM_REQUIRED_PARAMETER_NODE: {
1549
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1550
1550
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_required_parameter_node_t *)node)->name));
1551
1551
  break;
1552
1552
  }
@@ -1587,6 +1587,7 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1587
1587
  break;
1588
1588
  }
1589
1589
  case PM_REST_PARAMETER_NODE: {
1590
+ pm_buffer_append_varuint(buffer, (uint32_t)(node->flags & ~PM_NODE_FLAG_COMMON_MASK));
1590
1591
  pm_buffer_append_varuint(buffer, pm_sizet_to_u32(((pm_rest_parameter_node_t *)node)->name));
1591
1592
  if (((pm_rest_parameter_node_t *)node)->name_loc.start == NULL) {
1592
1593
  pm_buffer_append_byte(buffer, 0);
@@ -1842,6 +1843,17 @@ pm_serialize_node(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1842
1843
  }
1843
1844
  }
1844
1845
 
1846
+ static void
1847
+ pm_serialize_newline_list(pm_newline_list_t *list, pm_buffer_t *buffer) {
1848
+ uint32_t size = pm_sizet_to_u32(list->size);
1849
+ pm_buffer_append_varuint(buffer, size);
1850
+
1851
+ for (uint32_t i = 0; i < size; i++) {
1852
+ uint32_t offset = pm_sizet_to_u32(list->offsets[i]);
1853
+ pm_buffer_append_varuint(buffer, offset);
1854
+ }
1855
+ }
1856
+
1845
1857
  static void
1846
1858
  pm_serialize_comment(pm_parser_t *parser, pm_comment_t *comment, pm_buffer_t *buffer) {
1847
1859
  // serialize type
@@ -1904,6 +1916,8 @@ pm_serialize_diagnostic(pm_parser_t *parser, pm_diagnostic_t *diagnostic, pm_buf
1904
1916
 
1905
1917
  // serialize location
1906
1918
  pm_serialize_location(parser, &diagnostic->location, buffer);
1919
+
1920
+ pm_buffer_append_byte(buffer, diagnostic->level);
1907
1921
  }
1908
1922
 
1909
1923
  static void
@@ -1926,19 +1940,25 @@ pm_serialize_encoding(const pm_encoding_t *encoding, pm_buffer_t *buffer) {
1926
1940
  pm_buffer_append_string(buffer, encoding->name, encoding_length);
1927
1941
  }
1928
1942
 
1929
- #line 216 "serialize.c.erb"
1930
- /**
1931
- * Serialize the encoding, metadata, nodes, and constant pool.
1932
- */
1933
- void
1934
- pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1943
+ static void
1944
+ pm_serialize_metadata(pm_parser_t *parser, pm_buffer_t *buffer) {
1935
1945
  pm_serialize_encoding(parser->encoding, buffer);
1936
1946
  pm_buffer_append_varsint(buffer, parser->start_line);
1947
+ pm_serialize_newline_list(&parser->newline_list, buffer);
1937
1948
  pm_serialize_comment_list(parser, &parser->comment_list, buffer);
1938
1949
  pm_serialize_magic_comment_list(parser, &parser->magic_comment_list, buffer);
1939
1950
  pm_serialize_data_loc(parser, buffer);
1940
1951
  pm_serialize_diagnostic_list(parser, &parser->error_list, buffer);
1941
1952
  pm_serialize_diagnostic_list(parser, &parser->warning_list, buffer);
1953
+ }
1954
+
1955
+ #line 243 "serialize.c.erb"
1956
+ /**
1957
+ * Serialize the metadata, nodes, and constant pool.
1958
+ */
1959
+ void
1960
+ pm_serialize_content(pm_parser_t *parser, pm_node_t *node, pm_buffer_t *buffer) {
1961
+ pm_serialize_metadata(parser, buffer);
1942
1962
 
1943
1963
  // Here we're going to leave space for the offset of the constant pool in
1944
1964
  // the buffer.
@@ -2029,13 +2049,7 @@ pm_serialize_lex(pm_buffer_t *buffer, const uint8_t *source, size_t size, const
2029
2049
  // Append 0 to mark end of tokens.
2030
2050
  pm_buffer_append_byte(buffer, 0);
2031
2051
 
2032
- pm_serialize_encoding(parser.encoding, buffer);
2033
- pm_buffer_append_varsint(buffer, parser.start_line);
2034
- pm_serialize_comment_list(&parser, &parser.comment_list, buffer);
2035
- pm_serialize_magic_comment_list(&parser, &parser.magic_comment_list, buffer);
2036
- pm_serialize_data_loc(&parser, buffer);
2037
- pm_serialize_diagnostic_list(&parser, &parser.error_list, buffer);
2038
- pm_serialize_diagnostic_list(&parser, &parser.warning_list, buffer);
2052
+ pm_serialize_metadata(&parser, buffer);
2039
2053
 
2040
2054
  pm_node_destroy(&parser, node);
2041
2055
  pm_parser_free(&parser);
data/src/token_type.c CHANGED
@@ -13,8 +13,7 @@
13
13
  * Returns a string representation of the given token type.
14
14
  */
15
15
  PRISM_EXPORTED_FUNCTION const char *
16
- pm_token_type_to_str(pm_token_type_t token_type)
17
- {
16
+ pm_token_type_name(pm_token_type_t token_type) {
18
17
  switch (token_type) {
19
18
  case PM_TOKEN_EOF:
20
19
  return "EOF";
@@ -345,7 +344,357 @@ pm_token_type_to_str(pm_token_type_t token_type)
345
344
  case PM_TOKEN___END__:
346
345
  return "__END__";
347
346
  case PM_TOKEN_MAXIMUM:
348
- return "MAXIMUM";
347
+ assert(false && "unreachable");
348
+ return "";
349
349
  }
350
- return "\0";
350
+
351
+ // Provide a default, because some compilers can't determine that the above
352
+ // switch is exhaustive.
353
+ assert(false && "unreachable");
354
+ return "";
355
+ }
356
+
357
+ /**
358
+ * Returns the human name of the given token type.
359
+ */
360
+ const char *
361
+ pm_token_type_human(pm_token_type_t token_type) {
362
+ switch (token_type) {
363
+ case PM_TOKEN_EOF:
364
+ return "end of file";
365
+ case PM_TOKEN_MISSING:
366
+ return "missing token";
367
+ case PM_TOKEN_NOT_PROVIDED:
368
+ return "not provided token";
369
+ case PM_TOKEN_AMPERSAND:
370
+ return "'&'";
371
+ case PM_TOKEN_AMPERSAND_AMPERSAND:
372
+ return "'&&'";
373
+ case PM_TOKEN_AMPERSAND_AMPERSAND_EQUAL:
374
+ return "'&&='";
375
+ case PM_TOKEN_AMPERSAND_DOT:
376
+ return "'&.'";
377
+ case PM_TOKEN_AMPERSAND_EQUAL:
378
+ return "'&='";
379
+ case PM_TOKEN_BACKTICK:
380
+ return "'`'";
381
+ case PM_TOKEN_BACK_REFERENCE:
382
+ return "back reference";
383
+ case PM_TOKEN_BANG:
384
+ return "'!'";
385
+ case PM_TOKEN_BANG_EQUAL:
386
+ return "'!='";
387
+ case PM_TOKEN_BANG_TILDE:
388
+ return "'!~'";
389
+ case PM_TOKEN_BRACE_LEFT:
390
+ return "'{'";
391
+ case PM_TOKEN_BRACE_RIGHT:
392
+ return "'}'";
393
+ case PM_TOKEN_BRACKET_LEFT:
394
+ return "'['";
395
+ case PM_TOKEN_BRACKET_LEFT_ARRAY:
396
+ return "'['";
397
+ case PM_TOKEN_BRACKET_LEFT_RIGHT:
398
+ return "'[]'";
399
+ case PM_TOKEN_BRACKET_LEFT_RIGHT_EQUAL:
400
+ return "'[]='";
401
+ case PM_TOKEN_BRACKET_RIGHT:
402
+ return "']'";
403
+ case PM_TOKEN_CARET:
404
+ return "'^'";
405
+ case PM_TOKEN_CARET_EQUAL:
406
+ return "'^='";
407
+ case PM_TOKEN_CHARACTER_LITERAL:
408
+ return "character literal";
409
+ case PM_TOKEN_CLASS_VARIABLE:
410
+ return "class variable";
411
+ case PM_TOKEN_COLON:
412
+ return "':'";
413
+ case PM_TOKEN_COLON_COLON:
414
+ return "'::'";
415
+ case PM_TOKEN_COMMA:
416
+ return "','";
417
+ case PM_TOKEN_COMMENT:
418
+ return "comment";
419
+ case PM_TOKEN_CONSTANT:
420
+ return "constant";
421
+ case PM_TOKEN_DOT:
422
+ return "'.'";
423
+ case PM_TOKEN_DOT_DOT:
424
+ return "'..'";
425
+ case PM_TOKEN_DOT_DOT_DOT:
426
+ return "'...'";
427
+ case PM_TOKEN_EMBDOC_BEGIN:
428
+ return "'=begin'";
429
+ case PM_TOKEN_EMBDOC_END:
430
+ return "'=end'";
431
+ case PM_TOKEN_EMBDOC_LINE:
432
+ return "embedded documentation line";
433
+ case PM_TOKEN_EMBEXPR_BEGIN:
434
+ return "'#{'";
435
+ case PM_TOKEN_EMBEXPR_END:
436
+ return "'}'";
437
+ case PM_TOKEN_EMBVAR:
438
+ return "'#'";
439
+ case PM_TOKEN_EQUAL:
440
+ return "'='";
441
+ case PM_TOKEN_EQUAL_EQUAL:
442
+ return "'=='";
443
+ case PM_TOKEN_EQUAL_EQUAL_EQUAL:
444
+ return "'==='";
445
+ case PM_TOKEN_EQUAL_GREATER:
446
+ return "'=>'";
447
+ case PM_TOKEN_EQUAL_TILDE:
448
+ return "'=~'";
449
+ case PM_TOKEN_FLOAT:
450
+ return "float";
451
+ case PM_TOKEN_FLOAT_IMAGINARY:
452
+ return "imaginary";
453
+ case PM_TOKEN_FLOAT_RATIONAL:
454
+ return "rational";
455
+ case PM_TOKEN_FLOAT_RATIONAL_IMAGINARY:
456
+ return "imaginary";
457
+ case PM_TOKEN_GLOBAL_VARIABLE:
458
+ return "global variable";
459
+ case PM_TOKEN_GREATER:
460
+ return "'>'";
461
+ case PM_TOKEN_GREATER_EQUAL:
462
+ return "'>='";
463
+ case PM_TOKEN_GREATER_GREATER:
464
+ return "'>>'";
465
+ case PM_TOKEN_GREATER_GREATER_EQUAL:
466
+ return "'>>='";
467
+ case PM_TOKEN_HEREDOC_END:
468
+ return "heredoc ending";
469
+ case PM_TOKEN_HEREDOC_START:
470
+ return "heredoc beginning";
471
+ case PM_TOKEN_IDENTIFIER:
472
+ return "local variable or method";
473
+ case PM_TOKEN_IGNORED_NEWLINE:
474
+ return "ignored newline";
475
+ case PM_TOKEN_INSTANCE_VARIABLE:
476
+ return "instance variable";
477
+ case PM_TOKEN_INTEGER:
478
+ return "integer";
479
+ case PM_TOKEN_INTEGER_IMAGINARY:
480
+ return "imaginary";
481
+ case PM_TOKEN_INTEGER_RATIONAL:
482
+ return "rational";
483
+ case PM_TOKEN_INTEGER_RATIONAL_IMAGINARY:
484
+ return "imaginary";
485
+ case PM_TOKEN_KEYWORD_ALIAS:
486
+ return "'alias'";
487
+ case PM_TOKEN_KEYWORD_AND:
488
+ return "'and'";
489
+ case PM_TOKEN_KEYWORD_BEGIN:
490
+ return "'begin'";
491
+ case PM_TOKEN_KEYWORD_BEGIN_UPCASE:
492
+ return "'BEGIN'";
493
+ case PM_TOKEN_KEYWORD_BREAK:
494
+ return "'break'";
495
+ case PM_TOKEN_KEYWORD_CASE:
496
+ return "'case'";
497
+ case PM_TOKEN_KEYWORD_CLASS:
498
+ return "'class'";
499
+ case PM_TOKEN_KEYWORD_DEF:
500
+ return "'def'";
501
+ case PM_TOKEN_KEYWORD_DEFINED:
502
+ return "'defined?'";
503
+ case PM_TOKEN_KEYWORD_DO:
504
+ return "'do'";
505
+ case PM_TOKEN_KEYWORD_DO_LOOP:
506
+ return "'do'";
507
+ case PM_TOKEN_KEYWORD_ELSE:
508
+ return "'else'";
509
+ case PM_TOKEN_KEYWORD_ELSIF:
510
+ return "'elsif'";
511
+ case PM_TOKEN_KEYWORD_END:
512
+ return "'end'";
513
+ case PM_TOKEN_KEYWORD_END_UPCASE:
514
+ return "'END'";
515
+ case PM_TOKEN_KEYWORD_ENSURE:
516
+ return "'ensure'";
517
+ case PM_TOKEN_KEYWORD_FALSE:
518
+ return "'false'";
519
+ case PM_TOKEN_KEYWORD_FOR:
520
+ return "'for'";
521
+ case PM_TOKEN_KEYWORD_IF:
522
+ return "'if'";
523
+ case PM_TOKEN_KEYWORD_IF_MODIFIER:
524
+ return "'if'";
525
+ case PM_TOKEN_KEYWORD_IN:
526
+ return "'in'";
527
+ case PM_TOKEN_KEYWORD_MODULE:
528
+ return "'module'";
529
+ case PM_TOKEN_KEYWORD_NEXT:
530
+ return "'next'";
531
+ case PM_TOKEN_KEYWORD_NIL:
532
+ return "'nil'";
533
+ case PM_TOKEN_KEYWORD_NOT:
534
+ return "'not'";
535
+ case PM_TOKEN_KEYWORD_OR:
536
+ return "'or'";
537
+ case PM_TOKEN_KEYWORD_REDO:
538
+ return "'redo'";
539
+ case PM_TOKEN_KEYWORD_RESCUE:
540
+ return "'rescue'";
541
+ case PM_TOKEN_KEYWORD_RESCUE_MODIFIER:
542
+ return "'rescue'";
543
+ case PM_TOKEN_KEYWORD_RETRY:
544
+ return "'retry'";
545
+ case PM_TOKEN_KEYWORD_RETURN:
546
+ return "'return'";
547
+ case PM_TOKEN_KEYWORD_SELF:
548
+ return "'self'";
549
+ case PM_TOKEN_KEYWORD_SUPER:
550
+ return "'super'";
551
+ case PM_TOKEN_KEYWORD_THEN:
552
+ return "'then'";
553
+ case PM_TOKEN_KEYWORD_TRUE:
554
+ return "'true'";
555
+ case PM_TOKEN_KEYWORD_UNDEF:
556
+ return "'undef'";
557
+ case PM_TOKEN_KEYWORD_UNLESS:
558
+ return "'unless'";
559
+ case PM_TOKEN_KEYWORD_UNLESS_MODIFIER:
560
+ return "'unless'";
561
+ case PM_TOKEN_KEYWORD_UNTIL:
562
+ return "'until'";
563
+ case PM_TOKEN_KEYWORD_UNTIL_MODIFIER:
564
+ return "'until'";
565
+ case PM_TOKEN_KEYWORD_WHEN:
566
+ return "'when'";
567
+ case PM_TOKEN_KEYWORD_WHILE:
568
+ return "'while'";
569
+ case PM_TOKEN_KEYWORD_WHILE_MODIFIER:
570
+ return "'while'";
571
+ case PM_TOKEN_KEYWORD_YIELD:
572
+ return "'yield'";
573
+ case PM_TOKEN_KEYWORD___ENCODING__:
574
+ return "'__ENCODING__'";
575
+ case PM_TOKEN_KEYWORD___FILE__:
576
+ return "'__FILE__'";
577
+ case PM_TOKEN_KEYWORD___LINE__:
578
+ return "'__LINE__'";
579
+ case PM_TOKEN_LABEL:
580
+ return "label";
581
+ case PM_TOKEN_LABEL_END:
582
+ return "label terminator";
583
+ case PM_TOKEN_LAMBDA_BEGIN:
584
+ return "'{'";
585
+ case PM_TOKEN_LESS:
586
+ return "'<'";
587
+ case PM_TOKEN_LESS_EQUAL:
588
+ return "'<='";
589
+ case PM_TOKEN_LESS_EQUAL_GREATER:
590
+ return "'<=>'";
591
+ case PM_TOKEN_LESS_LESS:
592
+ return "'<<'";
593
+ case PM_TOKEN_LESS_LESS_EQUAL:
594
+ return "'<<='";
595
+ case PM_TOKEN_METHOD_NAME:
596
+ return "method name";
597
+ case PM_TOKEN_MINUS:
598
+ return "'-'";
599
+ case PM_TOKEN_MINUS_EQUAL:
600
+ return "'-='";
601
+ case PM_TOKEN_MINUS_GREATER:
602
+ return "'->'";
603
+ case PM_TOKEN_NEWLINE:
604
+ return "newline";
605
+ case PM_TOKEN_NUMBERED_REFERENCE:
606
+ return "numbered reference";
607
+ case PM_TOKEN_PARENTHESIS_LEFT:
608
+ return "'('";
609
+ case PM_TOKEN_PARENTHESIS_LEFT_PARENTHESES:
610
+ return "'('";
611
+ case PM_TOKEN_PARENTHESIS_RIGHT:
612
+ return "')'";
613
+ case PM_TOKEN_PERCENT:
614
+ return "'%'";
615
+ case PM_TOKEN_PERCENT_EQUAL:
616
+ return "'%='";
617
+ case PM_TOKEN_PERCENT_LOWER_I:
618
+ return "'%i'";
619
+ case PM_TOKEN_PERCENT_LOWER_W:
620
+ return "'%w'";
621
+ case PM_TOKEN_PERCENT_LOWER_X:
622
+ return "'%x'";
623
+ case PM_TOKEN_PERCENT_UPPER_I:
624
+ return "'%I'";
625
+ case PM_TOKEN_PERCENT_UPPER_W:
626
+ return "'%W'";
627
+ case PM_TOKEN_PIPE:
628
+ return "'|'";
629
+ case PM_TOKEN_PIPE_EQUAL:
630
+ return "'|='";
631
+ case PM_TOKEN_PIPE_PIPE:
632
+ return "'||'";
633
+ case PM_TOKEN_PIPE_PIPE_EQUAL:
634
+ return "'||='";
635
+ case PM_TOKEN_PLUS:
636
+ return "'+'";
637
+ case PM_TOKEN_PLUS_EQUAL:
638
+ return "'+='";
639
+ case PM_TOKEN_QUESTION_MARK:
640
+ return "'?'";
641
+ case PM_TOKEN_REGEXP_BEGIN:
642
+ return "regular expression beginning";
643
+ case PM_TOKEN_REGEXP_END:
644
+ return "regular expression ending";
645
+ case PM_TOKEN_SEMICOLON:
646
+ return "';'";
647
+ case PM_TOKEN_SLASH:
648
+ return "'/'";
649
+ case PM_TOKEN_SLASH_EQUAL:
650
+ return "'/='";
651
+ case PM_TOKEN_STAR:
652
+ return "'*'";
653
+ case PM_TOKEN_STAR_EQUAL:
654
+ return "'*='";
655
+ case PM_TOKEN_STAR_STAR:
656
+ return "'**'";
657
+ case PM_TOKEN_STAR_STAR_EQUAL:
658
+ return "'**='";
659
+ case PM_TOKEN_STRING_BEGIN:
660
+ return "string beginning";
661
+ case PM_TOKEN_STRING_CONTENT:
662
+ return "string content";
663
+ case PM_TOKEN_STRING_END:
664
+ return "string ending";
665
+ case PM_TOKEN_SYMBOL_BEGIN:
666
+ return "symbol beginning";
667
+ case PM_TOKEN_TILDE:
668
+ return "'~'";
669
+ case PM_TOKEN_UAMPERSAND:
670
+ return "'&'";
671
+ case PM_TOKEN_UCOLON_COLON:
672
+ return "'::'";
673
+ case PM_TOKEN_UDOT_DOT:
674
+ return "'..'";
675
+ case PM_TOKEN_UDOT_DOT_DOT:
676
+ return "'...'";
677
+ case PM_TOKEN_UMINUS:
678
+ return "'-'";
679
+ case PM_TOKEN_UMINUS_NUM:
680
+ return "'-'";
681
+ case PM_TOKEN_UPLUS:
682
+ return "'+'";
683
+ case PM_TOKEN_USTAR:
684
+ return "*";
685
+ case PM_TOKEN_USTAR_STAR:
686
+ return "'**'";
687
+ case PM_TOKEN_WORDS_SEP:
688
+ return "string separator";
689
+ case PM_TOKEN___END__:
690
+ return "'__END__'";
691
+ case PM_TOKEN_MAXIMUM:
692
+ assert(false && "unreachable");
693
+ return "";
694
+ }
695
+
696
+ // Provide a default, because some compilers can't determine that the above
697
+ // switch is exhaustive.
698
+ assert(false && "unreachable");
699
+ return "";
351
700
  }
data/src/util/pm_buffer.c CHANGED
@@ -160,6 +160,17 @@ pm_buffer_append_varsint(pm_buffer_t *buffer, int32_t value) {
160
160
  pm_buffer_append_varuint(buffer, unsigned_int);
161
161
  }
162
162
 
163
+ /**
164
+ * Prepend the given string to the buffer.
165
+ */
166
+ void
167
+ pm_buffer_prepend_string(pm_buffer_t *buffer, const char *value, size_t length) {
168
+ size_t cursor = buffer->length;
169
+ pm_buffer_append_length(buffer, length);
170
+ memmove(buffer->value + length, buffer->value, cursor);
171
+ memcpy(buffer->value, value, length);
172
+ }
173
+
163
174
  /**
164
175
  * Concatenate one buffer onto another.
165
176
  */
@@ -124,13 +124,13 @@ pm_constant_pool_resize(pm_constant_pool_t *pool) {
124
124
 
125
125
  // If an id is set on this constant, then we know we have content here.
126
126
  // In this case we need to insert it into the next constant pool.
127
- if (bucket->id != 0) {
127
+ if (bucket->id != PM_CONSTANT_ID_UNSET) {
128
128
  uint32_t next_index = bucket->hash & mask;
129
129
 
130
130
  // This implements linear scanning to find the next available slot
131
131
  // in case this index is already taken. We don't need to bother
132
132
  // comparing the values since we know that the hash is unique.
133
- while (next_buckets[next_index].id != 0) {
133
+ while (next_buckets[next_index].id != PM_CONSTANT_ID_UNSET) {
134
134
  next_index = (next_index + 1) & mask;
135
135
  }
136
136
 
@@ -177,17 +177,42 @@ pm_constant_pool_init(pm_constant_pool_t *pool, uint32_t capacity) {
177
177
  */
178
178
  pm_constant_t *
179
179
  pm_constant_pool_id_to_constant(const pm_constant_pool_t *pool, pm_constant_id_t constant_id) {
180
- assert(constant_id > 0 && constant_id <= pool->size);
180
+ assert(constant_id != PM_CONSTANT_ID_UNSET && constant_id <= pool->size);
181
181
  return &pool->constants[constant_id - 1];
182
182
  }
183
183
 
184
+ /**
185
+ * Find a constant in a constant pool. Returns the id of the constant, or 0 if
186
+ * the constant is not found.
187
+ */
188
+ pm_constant_id_t
189
+ pm_constant_pool_find(const pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
190
+ assert(is_power_of_two(pool->capacity));
191
+ const uint32_t mask = pool->capacity - 1;
192
+
193
+ uint32_t hash = pm_constant_pool_hash(start, length);
194
+ uint32_t index = hash & mask;
195
+ pm_constant_pool_bucket_t *bucket;
196
+
197
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
198
+ pm_constant_t *constant = &pool->constants[bucket->id - 1];
199
+ if ((constant->length == length) && memcmp(constant->start, start, length) == 0) {
200
+ return bucket->id;
201
+ }
202
+
203
+ index = (index + 1) & mask;
204
+ }
205
+
206
+ return PM_CONSTANT_ID_UNSET;
207
+ }
208
+
184
209
  /**
185
210
  * Insert a constant into a constant pool and return its index in the pool.
186
211
  */
187
212
  static inline pm_constant_id_t
188
213
  pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t length, pm_constant_pool_bucket_type_t type) {
189
214
  if (pool->size >= (pool->capacity / 4 * 3)) {
190
- if (!pm_constant_pool_resize(pool)) return 0;
215
+ if (!pm_constant_pool_resize(pool)) return PM_CONSTANT_ID_UNSET;
191
216
  }
192
217
 
193
218
  assert(is_power_of_two(pool->capacity));
@@ -197,7 +222,7 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
197
222
  uint32_t index = hash & mask;
198
223
  pm_constant_pool_bucket_t *bucket;
199
224
 
200
- while (bucket = &pool->buckets[index], bucket->id != 0) {
225
+ while (bucket = &pool->buckets[index], bucket->id != PM_CONSTANT_ID_UNSET) {
201
226
  // If there is a collision, then we need to check if the content is the
202
227
  // same as the content we are trying to insert. If it is, then we can
203
228
  // return the id of the existing constant.
@@ -248,8 +273,8 @@ pm_constant_pool_insert(pm_constant_pool_t *pool, const uint8_t *start, size_t l
248
273
  }
249
274
 
250
275
  /**
251
- * Insert a constant into a constant pool. Returns the id of the constant, or 0
252
- * if any potential calls to resize fail.
276
+ * Insert a constant into a constant pool. Returns the id of the constant, or
277
+ * PM_CONSTANT_ID_UNSET if any potential calls to resize fail.
253
278
  */
254
279
  pm_constant_id_t
255
280
  pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -258,8 +283,8 @@ pm_constant_pool_insert_shared(pm_constant_pool_t *pool, const uint8_t *start, s
258
283
 
259
284
  /**
260
285
  * Insert a constant into a constant pool from memory that is now owned by the
261
- * constant pool. Returns the id of the constant, or 0 if any potential calls to
262
- * resize fail.
286
+ * constant pool. Returns the id of the constant, or PM_CONSTANT_ID_UNSET if any
287
+ * potential calls to resize fail.
263
288
  */
264
289
  pm_constant_id_t
265
290
  pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -268,7 +293,8 @@ pm_constant_pool_insert_owned(pm_constant_pool_t *pool, const uint8_t *start, si
268
293
 
269
294
  /**
270
295
  * Insert a constant into a constant pool from memory that is constant. Returns
271
- * the id of the constant, or 0 if any potential calls to resize fail.
296
+ * the id of the constant, or PM_CONSTANT_ID_UNSET if any potential calls to
297
+ * resize fail.
272
298
  */
273
299
  pm_constant_id_t
274
300
  pm_constant_pool_insert_constant(pm_constant_pool_t *pool, const uint8_t *start, size_t length) {
@@ -286,7 +312,7 @@ pm_constant_pool_free(pm_constant_pool_t *pool) {
286
312
  pm_constant_pool_bucket_t *bucket = &pool->buckets[index];
287
313
 
288
314
  // If an id is set on this constant, then we know we have content here.
289
- if (bucket->id != 0 && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
315
+ if (bucket->id != PM_CONSTANT_ID_UNSET && bucket->type == PM_CONSTANT_POOL_BUCKET_OWNED) {
290
316
  pm_constant_t *constant = &pool->constants[bucket->id - 1];
291
317
  free((void *) constant->start);
292
318
  }