prism 0.22.0 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -1
  3. data/README.md +2 -1
  4. data/docs/releasing.md +67 -17
  5. data/docs/ruby_parser_translation.md +19 -0
  6. data/docs/serialization.md +2 -0
  7. data/ext/prism/api_node.c +1982 -1538
  8. data/ext/prism/extension.c +12 -7
  9. data/ext/prism/extension.h +2 -2
  10. data/include/prism/diagnostic.h +3 -4
  11. data/include/prism/encoding.h +7 -0
  12. data/include/prism/util/pm_constant_pool.h +1 -1
  13. data/include/prism/util/pm_newline_list.h +4 -3
  14. data/include/prism/util/pm_strpbrk.h +4 -1
  15. data/include/prism/version.h +2 -2
  16. data/lib/prism/desugar_compiler.rb +225 -80
  17. data/lib/prism/dsl.rb +302 -299
  18. data/lib/prism/ffi.rb +103 -77
  19. data/lib/prism/lex_compat.rb +1 -0
  20. data/lib/prism/node.rb +3624 -2114
  21. data/lib/prism/node_ext.rb +25 -2
  22. data/lib/prism/parse_result.rb +56 -19
  23. data/lib/prism/serialize.rb +605 -303
  24. data/lib/prism/translation/parser/compiler.rb +1 -1
  25. data/lib/prism/translation/parser/rubocop.rb +11 -3
  26. data/lib/prism/translation/parser.rb +25 -12
  27. data/lib/prism/translation/parser33.rb +12 -0
  28. data/lib/prism/translation/parser34.rb +12 -0
  29. data/lib/prism/translation/ripper.rb +696 -0
  30. data/lib/prism/translation/ruby_parser.rb +1521 -0
  31. data/lib/prism/translation.rb +3 -3
  32. data/lib/prism.rb +0 -1
  33. data/prism.gemspec +6 -2
  34. data/src/diagnostic.c +10 -11
  35. data/src/encoding.c +16 -17
  36. data/src/options.c +7 -2
  37. data/src/prettyprint.c +3 -3
  38. data/src/prism.c +172 -97
  39. data/src/serialize.c +24 -13
  40. data/src/token_type.c +3 -3
  41. data/src/util/pm_constant_pool.c +1 -1
  42. data/src/util/pm_newline_list.c +6 -3
  43. data/src/util/pm_strpbrk.c +122 -14
  44. metadata +8 -4
  45. data/lib/prism/ripper_compat.rb +0 -285
data/src/prism.c CHANGED
@@ -51,6 +51,7 @@ debug_context(pm_context_t context) {
51
51
  case PM_CONTEXT_IF: return "IF";
52
52
  case PM_CONTEXT_MAIN: return "MAIN";
53
53
  case PM_CONTEXT_MODULE: return "MODULE";
54
+ case PM_CONTEXT_NONE: return "NONE";
54
55
  case PM_CONTEXT_PARENS: return "PARENS";
55
56
  case PM_CONTEXT_POSTEXE: return "POSTEXE";
56
57
  case PM_CONTEXT_PREDICATE: return "PREDICATE";
@@ -492,7 +493,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
492
493
  /**
493
494
  * Append an error to the list of errors on the parser using a format string.
494
495
  */
495
- #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
496
+ #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
497
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
496
498
 
497
499
  /**
498
500
  * Append an error to the list of errors on the parser using the location of the
@@ -507,7 +509,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
507
509
  * Append an error to the list of errors on the parser using the given location
508
510
  * using a format string.
509
511
  */
510
- #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
512
+ #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
513
+ PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
511
514
 
512
515
  /**
513
516
  * Append an error to the list of errors on the parser using the location of the
@@ -522,7 +525,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
522
525
  * Append an error to the list of errors on the parser using the location of the
523
526
  * given node and a format string.
524
527
  */
525
- #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
528
+ #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
529
+ PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
530
+
531
+ /**
532
+ * Append an error to the list of errors on the parser using the location of the
533
+ * given node and a format string, and add on the content of the node.
534
+ */
535
+ #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
536
+ PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
526
537
 
527
538
  /**
528
539
  * Append an error to the list of errors on the parser using the location of the
@@ -546,7 +557,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
546
557
  * Append an error to the list of errors on the parser using the location of the
547
558
  * given token and a format string.
548
559
  */
549
- #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
560
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
561
+ PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
562
+
563
+ /**
564
+ * Append an error to the list of errors on the parser using the location of the
565
+ * given token and a format string, and add on the content of the token.
566
+ */
567
+ #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
568
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
550
569
 
551
570
  /**
552
571
  * Append a warning to the list of warnings on the parser.
@@ -2890,7 +2909,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2890
2909
  static pm_def_node_t *
2891
2910
  pm_def_node_create(
2892
2911
  pm_parser_t *parser,
2893
- const pm_token_t *name,
2912
+ pm_constant_id_t name,
2913
+ const pm_token_t *name_loc,
2894
2914
  pm_node_t *receiver,
2895
2915
  pm_parameters_node_t *parameters,
2896
2916
  pm_node_t *body,
@@ -2920,8 +2940,8 @@ pm_def_node_create(
2920
2940
  .type = PM_DEF_NODE,
2921
2941
  .location = { .start = def_keyword->start, .end = end },
2922
2942
  },
2923
- .name = pm_parser_constant_id_token(parser, name),
2924
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
2943
+ .name = name,
2944
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
2925
2945
  .receiver = receiver,
2926
2946
  .parameters = parameters,
2927
2947
  .body = body,
@@ -4642,13 +4662,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
4642
4662
  */
4643
4663
  static void
4644
4664
  pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4645
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4665
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
4646
4666
  if (node->rest == NULL) {
4647
4667
  node->rest = target;
4648
4668
  } else {
4649
4669
  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
4650
4670
  pm_node_list_append(&node->rights, target);
4651
4671
  }
4672
+ } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4673
+ if (node->rest == NULL) {
4674
+ node->rest = target;
4675
+ } else {
4676
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
4677
+ pm_node_list_append(&node->rights, target);
4678
+ }
4652
4679
  } else if (node->rest == NULL) {
4653
4680
  pm_node_list_append(&node->lefts, target);
4654
4681
  } else {
@@ -7172,7 +7199,7 @@ lex_numeric(pm_parser_t *parser) {
7172
7199
  static pm_token_type_t
7173
7200
  lex_global_variable(pm_parser_t *parser) {
7174
7201
  if (parser->current.end >= parser->end) {
7175
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7202
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7176
7203
  return PM_TOKEN_GLOBAL_VARIABLE;
7177
7204
  }
7178
7205
 
@@ -7213,7 +7240,7 @@ lex_global_variable(pm_parser_t *parser) {
7213
7240
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
7214
7241
 
7215
7242
  // $0 isn't allowed to be followed by anything.
7216
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7243
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7217
7244
  }
7218
7245
 
7219
7246
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -7244,7 +7271,7 @@ lex_global_variable(pm_parser_t *parser) {
7244
7271
  } else {
7245
7272
  // If we get here, then we have a $ followed by something that isn't
7246
7273
  // recognized as a global variable.
7247
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7274
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7248
7275
  }
7249
7276
 
7250
7277
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -8148,10 +8175,10 @@ lex_at_variable(pm_parser_t *parser) {
8148
8175
  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
8149
8176
  parser->current.end += width;
8150
8177
  }
8151
- } else if (type == PM_TOKEN_CLASS_VARIABLE) {
8152
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
8153
8178
  } else {
8154
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
8179
+ pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
8180
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8181
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8155
8182
  }
8156
8183
 
8157
8184
  // If we're lexing an embedded variable, then we need to pop back into the
@@ -9711,7 +9738,7 @@ parser_lex(pm_parser_t *parser) {
9711
9738
  // and then find the first one.
9712
9739
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9713
9740
  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
9714
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9741
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9715
9742
 
9716
9743
  // If we haven't found an escape yet, then this buffer will be
9717
9744
  // unallocated since we can refer directly to the source string.
@@ -9720,7 +9747,7 @@ parser_lex(pm_parser_t *parser) {
9720
9747
  while (breakpoint != NULL) {
9721
9748
  // If we hit a null byte, skip directly past it.
9722
9749
  if (*breakpoint == '\0') {
9723
- breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
9750
+ breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
9724
9751
  continue;
9725
9752
  }
9726
9753
 
@@ -9739,7 +9766,7 @@ parser_lex(pm_parser_t *parser) {
9739
9766
  // we need to continue on past it.
9740
9767
  if (lex_mode->as.list.nesting > 0) {
9741
9768
  parser->current.end = breakpoint + 1;
9742
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9769
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9743
9770
  lex_mode->as.list.nesting--;
9744
9771
  continue;
9745
9772
  }
@@ -9824,7 +9851,7 @@ parser_lex(pm_parser_t *parser) {
9824
9851
  }
9825
9852
 
9826
9853
  token_buffer.cursor = parser->current.end;
9827
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9854
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9828
9855
  continue;
9829
9856
  }
9830
9857
 
@@ -9837,7 +9864,7 @@ parser_lex(pm_parser_t *parser) {
9837
9864
  // that looked like an interpolated class or instance variable
9838
9865
  // like "#@" but wasn't actually. In this case we'll just skip
9839
9866
  // to the next breakpoint.
9840
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9867
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9841
9868
  continue;
9842
9869
  }
9843
9870
 
@@ -9852,7 +9879,7 @@ parser_lex(pm_parser_t *parser) {
9852
9879
  // and find the next breakpoint.
9853
9880
  assert(*breakpoint == lex_mode->as.list.incrementor);
9854
9881
  parser->current.end = breakpoint + 1;
9855
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9882
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9856
9883
  lex_mode->as.list.nesting++;
9857
9884
  continue;
9858
9885
  }
@@ -9891,14 +9918,14 @@ parser_lex(pm_parser_t *parser) {
9891
9918
  // regular expression. We'll use strpbrk to find the first of these
9892
9919
  // characters.
9893
9920
  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
9894
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9921
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9895
9922
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
9896
9923
 
9897
9924
  while (breakpoint != NULL) {
9898
9925
  // If we hit a null byte, skip directly past it.
9899
9926
  if (*breakpoint == '\0') {
9900
9927
  parser->current.end = breakpoint + 1;
9901
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9928
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9902
9929
  continue;
9903
9930
  }
9904
9931
 
@@ -9920,7 +9947,7 @@ parser_lex(pm_parser_t *parser) {
9920
9947
  // If the terminator is not a newline, then we can set
9921
9948
  // the next breakpoint and continue.
9922
9949
  parser->current.end = breakpoint + 1;
9923
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9950
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9924
9951
  continue;
9925
9952
  }
9926
9953
  }
@@ -9930,7 +9957,7 @@ parser_lex(pm_parser_t *parser) {
9930
9957
  if (*breakpoint == lex_mode->as.regexp.terminator) {
9931
9958
  if (lex_mode->as.regexp.nesting > 0) {
9932
9959
  parser->current.end = breakpoint + 1;
9933
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9960
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9934
9961
  lex_mode->as.regexp.nesting--;
9935
9962
  continue;
9936
9963
  }
@@ -10029,7 +10056,7 @@ parser_lex(pm_parser_t *parser) {
10029
10056
  }
10030
10057
 
10031
10058
  token_buffer.cursor = parser->current.end;
10032
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10059
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10033
10060
  continue;
10034
10061
  }
10035
10062
 
@@ -10042,7 +10069,7 @@ parser_lex(pm_parser_t *parser) {
10042
10069
  // something that looked like an interpolated class or
10043
10070
  // instance variable like "#@" but wasn't actually. In
10044
10071
  // this case we'll just skip to the next breakpoint.
10045
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10072
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10046
10073
  continue;
10047
10074
  }
10048
10075
 
@@ -10057,7 +10084,7 @@ parser_lex(pm_parser_t *parser) {
10057
10084
  // and find the next breakpoint.
10058
10085
  assert(*breakpoint == lex_mode->as.regexp.incrementor);
10059
10086
  parser->current.end = breakpoint + 1;
10060
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10087
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10061
10088
  lex_mode->as.regexp.nesting++;
10062
10089
  continue;
10063
10090
  }
@@ -10093,7 +10120,7 @@ parser_lex(pm_parser_t *parser) {
10093
10120
  // string. We'll use strpbrk to find the first of these characters.
10094
10121
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10095
10122
  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
10096
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10123
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10097
10124
 
10098
10125
  // If we haven't found an escape yet, then this buffer will be
10099
10126
  // unallocated since we can refer directly to the source string.
@@ -10105,7 +10132,7 @@ parser_lex(pm_parser_t *parser) {
10105
10132
  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
10106
10133
  lex_mode->as.string.nesting++;
10107
10134
  parser->current.end = breakpoint + 1;
10108
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10135
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10109
10136
  continue;
10110
10137
  }
10111
10138
 
@@ -10117,7 +10144,7 @@ parser_lex(pm_parser_t *parser) {
10117
10144
  // to continue on past it.
10118
10145
  if (lex_mode->as.string.nesting > 0) {
10119
10146
  parser->current.end = breakpoint + 1;
10120
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10147
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10121
10148
  lex_mode->as.string.nesting--;
10122
10149
  continue;
10123
10150
  }
@@ -10159,7 +10186,7 @@ parser_lex(pm_parser_t *parser) {
10159
10186
  if (parser->heredoc_end == NULL) {
10160
10187
  pm_newline_list_append(&parser->newline_list, breakpoint);
10161
10188
  parser->current.end = breakpoint + 1;
10162
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10189
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10163
10190
  continue;
10164
10191
  } else {
10165
10192
  parser->current.end = breakpoint + 1;
@@ -10173,7 +10200,7 @@ parser_lex(pm_parser_t *parser) {
10173
10200
  case '\0':
10174
10201
  // Skip directly past the null character.
10175
10202
  parser->current.end = breakpoint + 1;
10176
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10203
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10177
10204
  break;
10178
10205
  case '\\': {
10179
10206
  // Here we hit escapes.
@@ -10242,7 +10269,7 @@ parser_lex(pm_parser_t *parser) {
10242
10269
  }
10243
10270
 
10244
10271
  token_buffer.cursor = parser->current.end;
10245
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10272
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10246
10273
  break;
10247
10274
  }
10248
10275
  case '#': {
@@ -10253,7 +10280,7 @@ parser_lex(pm_parser_t *parser) {
10253
10280
  // looked like an interpolated class or instance variable like "#@"
10254
10281
  // but wasn't actually. In this case we'll just skip to the next
10255
10282
  // breakpoint.
10256
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10283
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10257
10284
  break;
10258
10285
  }
10259
10286
 
@@ -10381,7 +10408,7 @@ parser_lex(pm_parser_t *parser) {
10381
10408
  breakpoints[2] = '\0';
10382
10409
  }
10383
10410
 
10384
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10411
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10385
10412
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
10386
10413
  bool was_escaped_newline = false;
10387
10414
 
@@ -10390,7 +10417,7 @@ parser_lex(pm_parser_t *parser) {
10390
10417
  case '\0':
10391
10418
  // Skip directly past the null character.
10392
10419
  parser->current.end = breakpoint + 1;
10393
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10420
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10394
10421
  break;
10395
10422
  case '\n': {
10396
10423
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
@@ -10465,7 +10492,7 @@ parser_lex(pm_parser_t *parser) {
10465
10492
  // Otherwise we hit a newline and it wasn't followed by
10466
10493
  // a terminator, so we can continue parsing.
10467
10494
  parser->current.end = breakpoint + 1;
10468
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10495
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10469
10496
  break;
10470
10497
  }
10471
10498
  case '\\': {
@@ -10529,7 +10556,7 @@ parser_lex(pm_parser_t *parser) {
10529
10556
  }
10530
10557
 
10531
10558
  token_buffer.cursor = parser->current.end;
10532
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10559
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10533
10560
  break;
10534
10561
  }
10535
10562
  case '#': {
@@ -10541,7 +10568,7 @@ parser_lex(pm_parser_t *parser) {
10541
10568
  // or instance variable like "#@" but wasn't
10542
10569
  // actually. In this case we'll just skip to the
10543
10570
  // next breakpoint.
10544
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10571
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10545
10572
  break;
10546
10573
  }
10547
10574
 
@@ -11054,7 +11081,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
11054
11081
  return target;
11055
11082
  case PM_BACK_REFERENCE_READ_NODE:
11056
11083
  case PM_NUMBERED_REFERENCE_READ_NODE:
11057
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11084
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11058
11085
  return target;
11059
11086
  case PM_GLOBAL_VARIABLE_READ_NODE:
11060
11087
  assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
@@ -11192,7 +11219,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
11192
11219
  }
11193
11220
  case PM_BACK_REFERENCE_READ_NODE:
11194
11221
  case PM_NUMBERED_REFERENCE_READ_NODE:
11195
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11222
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11196
11223
  /* fallthrough */
11197
11224
  case PM_GLOBAL_VARIABLE_READ_NODE: {
11198
11225
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
@@ -11367,7 +11394,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
11367
11394
  pm_multi_target_node_targets_append(parser, result, target);
11368
11395
  } else if (!match1(parser, PM_TOKEN_EOF)) {
11369
11396
  // If we get here, then we have a trailing , in a multi target node.
11370
- // We'll set the implicit rest flag to indicate this.
11397
+ // We'll add an implicit rest node to represent this.
11371
11398
  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11372
11399
  pm_multi_target_node_targets_append(parser, result, rest);
11373
11400
  break;
@@ -11457,8 +11484,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11457
11484
 
11458
11485
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
11459
11486
  if (context_terminator(context, &parser->current)) break;
11460
- } else {
11461
- expect1(parser, PM_TOKEN_NEWLINE, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
11487
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
11488
+ // This is an inlined version of accept1 because the error that we
11489
+ // want to add has varargs. If this happens again, we should
11490
+ // probably extract a helper function.
11491
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
11492
+ parser->previous.start = parser->previous.end;
11493
+ parser->previous.type = PM_TOKEN_MISSING;
11462
11494
  }
11463
11495
  }
11464
11496
 
@@ -13852,7 +13884,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13852
13884
  pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13853
13885
  variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13854
13886
  } else {
13855
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13887
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
13856
13888
  variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13857
13889
  }
13858
13890
  }
@@ -14161,7 +14193,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14161
14193
  parser_lex(parser);
14162
14194
 
14163
14195
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14164
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14196
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14165
14197
  // If we get here, then we have an end immediately after a
14166
14198
  // start. In that case we'll create an empty content token and
14167
14199
  // return an uninterpolated string.
@@ -14174,7 +14206,6 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14174
14206
  // If we get here, then we have an end of a label immediately
14175
14207
  // after a start. In that case we'll create an empty symbol
14176
14208
  // node.
14177
- pm_token_t opening = not_provided(parser);
14178
14209
  pm_token_t content = parse_strings_empty_content(parser->previous.start);
14179
14210
  pm_symbol_node_t *symbol = pm_symbol_node_create(parser, &opening, &content, &parser->previous);
14180
14211
 
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14218
14249
  parser_lex(parser);
14219
14250
  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
14220
14251
 
14221
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14252
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14222
14253
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
14223
14254
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
14224
14255
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14225
14256
  } else if (match1(parser, PM_TOKEN_EOF)) {
14226
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
14257
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
14227
14258
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14259
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
14260
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14228
14261
  } else {
14229
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14262
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
14263
+ parser->previous.start = parser->previous.end;
14264
+ parser->previous.type = PM_TOKEN_MISSING;
14230
14265
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14231
14266
  }
14232
14267
  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14241
14276
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14242
14277
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14243
14278
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
14244
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14279
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14245
14280
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
14246
14281
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14247
14282
  } else {
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14332
14367
  return current;
14333
14368
  }
14334
14369
 
14370
+ /**
14371
+ * Append an error to the error list on the parser using the given diagnostic
14372
+ * ID. This function is a specialization that handles formatting the specific
14373
+ * kind of error that is being appended.
14374
+ */
14375
+ static void
14376
+ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
14377
+ switch (diag_id) {
14378
+ case PM_ERR_HASH_KEY: {
14379
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
14380
+ break;
14381
+ }
14382
+ case PM_ERR_UNARY_RECEIVER: {
14383
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
14384
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
14385
+ break;
14386
+ }
14387
+ default:
14388
+ pm_parser_err_previous(parser, diag_id);
14389
+ break;
14390
+ }
14391
+ }
14392
+
14335
14393
  /**
14336
14394
  * Parse an expression that begins with the previous node that we just lexed.
14337
14395
  */
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14516
14574
  // If we didn't find a terminator and we didn't find a right
14517
14575
  // parenthesis, then this is a syntax error.
14518
14576
  if (!terminator_found) {
14519
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14577
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14520
14578
  }
14521
14579
 
14522
14580
  // Parse each statement within the parentheses.
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14545
14603
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14546
14604
  break;
14547
14605
  } else {
14548
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14606
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14549
14607
  }
14550
14608
  }
14551
14609
 
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15626
15684
  * methods to override the unary operators, we should ignore
15627
15685
  * the @ in the same way we do for symbols.
15628
15686
  */
15629
- name.end = parse_operator_symbol_name(&name);
15687
+ pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
15630
15688
 
15631
15689
  return (pm_node_t *) pm_def_node_create(
15632
15690
  parser,
15691
+ name_id,
15633
15692
  &name,
15634
15693
  receiver,
15635
15694
  params,
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16458
16517
  // context of a multiple assignment. We enforce that here. We'll
16459
16518
  // still lex past it though and create a missing node place.
16460
16519
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16461
- pm_parser_err_previous(parser, diag_id);
16520
+ pm_parser_err_prefix(parser, diag_id);
16462
16521
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16463
16522
  }
16464
16523
 
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16481
16540
  parser_lex(parser);
16482
16541
 
16483
16542
  pm_token_t operator = parser->previous;
16484
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
16543
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
16485
16544
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
16486
16545
 
16487
16546
  pm_conditional_predicate(receiver);
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16491
16550
  parser_lex(parser);
16492
16551
 
16493
16552
  pm_token_t operator = parser->previous;
16494
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
16553
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16495
16554
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
16496
16555
 
16497
16556
  return (pm_node_t *) node;
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16500
16559
  parser_lex(parser);
16501
16560
 
16502
16561
  pm_token_t operator = parser->previous;
16503
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16562
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16504
16563
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
16505
16564
 
16506
16565
  return (pm_node_t *) node;
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16509
16568
  parser_lex(parser);
16510
16569
 
16511
16570
  pm_token_t operator = parser->previous;
16512
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16571
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16513
16572
 
16514
16573
  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
16515
16574
  pm_token_t exponent_operator = parser->previous;
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16625
16684
  parser_lex(parser);
16626
16685
 
16627
16686
  pm_token_t operator = parser->previous;
16628
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
16687
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16629
16688
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
16630
16689
 
16631
16690
  return (pm_node_t *) node;
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16648
16707
  // here because it will provide more context in addition to the
16649
16708
  // recoverable error that we will also add.
16650
16709
  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16651
- pm_parser_err_previous(parser, diag_id);
16710
+ pm_parser_err_prefix(parser, diag_id);
16652
16711
  }
16653
16712
 
16654
16713
  // If we get here, then we are assuming this token is closing a
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16661
16720
  // have an unexpected token.
16662
16721
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16663
16722
  } else {
16664
- pm_parser_err_previous(parser, diag_id);
16723
+ pm_parser_err_prefix(parser, diag_id);
16665
16724
  }
16666
16725
 
16667
16726
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
@@ -16710,7 +16769,18 @@ parse_assignment_values(pm_parser_t *parser, pm_binding_power_t previous_binding
16710
16769
  if (is_single_value && match1(parser, PM_TOKEN_KEYWORD_RESCUE_MODIFIER)) {
16711
16770
  pm_token_t rescue = parser->current;
16712
16771
  parser_lex(parser);
16713
- pm_node_t *right = parse_expression(parser, binding_power, false, PM_ERR_RESCUE_MODIFIER_VALUE);
16772
+
16773
+ bool accepts_command_call_inner = false;
16774
+
16775
+ // RHS can accept command call iff the value is a call with arguments but without paranthesis.
16776
+ if (PM_NODE_TYPE_P(value, PM_CALL_NODE)) {
16777
+ pm_call_node_t *call_node = (pm_call_node_t *)value;
16778
+ if ((call_node->arguments != NULL) && (call_node->opening_loc.start == NULL)) {
16779
+ accepts_command_call_inner = true;
16780
+ }
16781
+ }
16782
+
16783
+ pm_node_t *right = parse_expression(parser, binding_power, accepts_command_call_inner, PM_ERR_RESCUE_MODIFIER_VALUE);
16714
16784
 
16715
16785
  return (pm_node_t *) pm_rescue_modifier_node_create(parser, value, &rescue, right);
16716
16786
  }
@@ -16895,7 +16965,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16895
16965
  switch (PM_NODE_TYPE(node)) {
16896
16966
  case PM_BACK_REFERENCE_READ_NODE:
16897
16967
  case PM_NUMBERED_REFERENCE_READ_NODE:
16898
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16968
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16899
16969
  /* fallthrough */
16900
16970
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16901
16971
  parser_lex(parser);
@@ -17006,7 +17076,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17006
17076
  switch (PM_NODE_TYPE(node)) {
17007
17077
  case PM_BACK_REFERENCE_READ_NODE:
17008
17078
  case PM_NUMBERED_REFERENCE_READ_NODE:
17009
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17079
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17010
17080
  /* fallthrough */
17011
17081
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17012
17082
  parser_lex(parser);
@@ -17127,7 +17197,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17127
17197
  switch (PM_NODE_TYPE(node)) {
17128
17198
  case PM_BACK_REFERENCE_READ_NODE:
17129
17199
  case PM_NUMBERED_REFERENCE_READ_NODE:
17130
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17200
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17131
17201
  /* fallthrough */
17132
17202
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17133
17203
  parser_lex(parser);
@@ -17791,6 +17861,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17791
17861
  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
17792
17862
  .next_start = NULL,
17793
17863
  .heredoc_end = NULL,
17864
+ .data_loc = { .start = NULL, .end = NULL },
17794
17865
  .comment_list = { 0 },
17795
17866
  .magic_comment_list = { 0 },
17796
17867
  .warning_list = { 0 },
@@ -18055,7 +18126,7 @@ typedef struct {
18055
18126
  pm_diagnostic_t *error;
18056
18127
 
18057
18128
  /** The start line of the diagnostic message. */
18058
- uint32_t line;
18129
+ int32_t line;
18059
18130
 
18060
18131
  /** The column start of the diagnostic message. */
18061
18132
  uint32_t column_start;
@@ -18087,12 +18158,13 @@ typedef struct {
18087
18158
  #define PM_COLOR_RESET "\033[0m"
18088
18159
 
18089
18160
  static inline pm_error_t *
18090
- pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18161
+ pm_parser_errors_format_sort(const pm_parser_t *parser, const pm_list_t *error_list, const pm_newline_list_t *newline_list) {
18091
18162
  pm_error_t *errors = calloc(error_list->size, sizeof(pm_error_t));
18163
+ int32_t start_line = parser->start_line;
18092
18164
 
18093
18165
  for (pm_diagnostic_t *error = (pm_diagnostic_t *) error_list->head; error != NULL; error = (pm_diagnostic_t *) error->node.next) {
18094
- pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start);
18095
- pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end);
18166
+ pm_line_column_t start = pm_newline_list_line_column(newline_list, error->location.start, start_line);
18167
+ pm_line_column_t end = pm_newline_list_line_column(newline_list, error->location.end, start_line);
18096
18168
 
18097
18169
  // We're going to insert this error into the array in sorted order. We
18098
18170
  // do this by finding the first error that has a line number greater
@@ -18103,8 +18175,8 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
18103
18175
  (index < error_list->size) &&
18104
18176
  (errors[index].error != NULL) &&
18105
18177
  (
18106
- (errors[index].line < ((uint32_t) start.line)) ||
18107
- (errors[index].line == ((uint32_t) start.line) && errors[index].column_start < ((uint32_t) start.column))
18178
+ (errors[index].line < start.line) ||
18179
+ ((errors[index].line == start.line) && (errors[index].column_start < start.column))
18108
18180
  )
18109
18181
  ) index++;
18110
18182
 
@@ -18117,18 +18189,18 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
18117
18189
  // Finally, we'll insert the error into the array.
18118
18190
  uint32_t column_end;
18119
18191
  if (start.line == end.line) {
18120
- column_end = (uint32_t) end.column;
18192
+ column_end = end.column;
18121
18193
  } else {
18122
- column_end = (uint32_t) (newline_list->offsets[start.line] - newline_list->offsets[start.line - 1] - 1);
18194
+ column_end = (uint32_t) (newline_list->offsets[start.line - start_line + 1] - newline_list->offsets[start.line - start_line] - 1);
18123
18195
  }
18124
18196
 
18125
18197
  // Ensure we have at least one column of error.
18126
- if (((uint32_t) start.column) == column_end) column_end++;
18198
+ if (start.column == column_end) column_end++;
18127
18199
 
18128
18200
  errors[index] = (pm_error_t) {
18129
18201
  .error = error,
18130
- .line = (uint32_t) start.line,
18131
- .column_start = (uint32_t) start.column,
18202
+ .line = start.line,
18203
+ .column_start = start.column,
18132
18204
  .column_end = column_end
18133
18205
  };
18134
18206
  }
@@ -18137,17 +18209,19 @@ pm_parser_errors_format_sort(const pm_list_t *error_list, const pm_newline_list_
18137
18209
  }
18138
18210
 
18139
18211
  static inline void
18140
- pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, size_t line, pm_buffer_t *buffer) {
18141
- const uint8_t *start = &parser->start[newline_list->offsets[line - 1]];
18212
+ pm_parser_errors_format_line(const pm_parser_t *parser, const pm_newline_list_t *newline_list, const char *number_prefix, int32_t line, pm_buffer_t *buffer) {
18213
+ size_t index = (size_t) (line - parser->start_line);
18214
+
18215
+ const uint8_t *start = &parser->start[newline_list->offsets[index]];
18142
18216
  const uint8_t *end;
18143
18217
 
18144
- if (line >= newline_list->size) {
18218
+ if (index >= newline_list->size - 1) {
18145
18219
  end = parser->end;
18146
18220
  } else {
18147
- end = &parser->start[newline_list->offsets[line]];
18221
+ end = &parser->start[newline_list->offsets[index + 1]];
18148
18222
  }
18149
18223
 
18150
- pm_buffer_append_format(buffer, number_prefix, (uint32_t) line);
18224
+ pm_buffer_append_format(buffer, number_prefix, line);
18151
18225
  pm_buffer_append_string(buffer, (const char *) start, (size_t) (end - start));
18152
18226
 
18153
18227
  if (end == parser->end && end[-1] != '\n') {
@@ -18165,25 +18239,26 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18165
18239
 
18166
18240
  // First, we're going to sort all of the errors by line number using an
18167
18241
  // insertion sort into a newly allocated array.
18242
+ const int32_t start_line = parser->start_line;
18168
18243
  const pm_newline_list_t *newline_list = &parser->newline_list;
18169
- pm_error_t *errors = pm_parser_errors_format_sort(error_list, newline_list);
18244
+ pm_error_t *errors = pm_parser_errors_format_sort(parser, error_list, newline_list);
18170
18245
 
18171
18246
  // Now we're going to determine how we're going to format line numbers and
18172
18247
  // blank lines based on the maximum number of digits in the line numbers
18173
18248
  // that are going to be displayed.
18174
18249
  pm_error_format_t error_format;
18175
- size_t max_line_number = errors[error_list->size - 1].line;
18250
+ int32_t max_line_number = errors[error_list->size - 1].line - start_line;
18176
18251
 
18177
18252
  if (max_line_number < 10) {
18178
18253
  if (colorize) {
18179
18254
  error_format = (pm_error_format_t) {
18180
- .number_prefix = PM_COLOR_GRAY "%1" PRIu32 " | " PM_COLOR_RESET,
18255
+ .number_prefix = PM_COLOR_GRAY "%1" PRIi32 " | " PM_COLOR_RESET,
18181
18256
  .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18182
18257
  .divider = PM_COLOR_GRAY " ~~~~~" PM_COLOR_RESET "\n"
18183
18258
  };
18184
18259
  } else {
18185
18260
  error_format = (pm_error_format_t) {
18186
- .number_prefix = "%1" PRIu32 " | ",
18261
+ .number_prefix = "%1" PRIi32 " | ",
18187
18262
  .blank_prefix = " | ",
18188
18263
  .divider = " ~~~~~\n"
18189
18264
  };
@@ -18191,13 +18266,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18191
18266
  } else if (max_line_number < 100) {
18192
18267
  if (colorize) {
18193
18268
  error_format = (pm_error_format_t) {
18194
- .number_prefix = PM_COLOR_GRAY "%2" PRIu32 " | " PM_COLOR_RESET,
18269
+ .number_prefix = PM_COLOR_GRAY "%2" PRIi32 " | " PM_COLOR_RESET,
18195
18270
  .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18196
18271
  .divider = PM_COLOR_GRAY " ~~~~~~" PM_COLOR_RESET "\n"
18197
18272
  };
18198
18273
  } else {
18199
18274
  error_format = (pm_error_format_t) {
18200
- .number_prefix = "%2" PRIu32 " | ",
18275
+ .number_prefix = "%2" PRIi32 " | ",
18201
18276
  .blank_prefix = " | ",
18202
18277
  .divider = " ~~~~~~\n"
18203
18278
  };
@@ -18205,13 +18280,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18205
18280
  } else if (max_line_number < 1000) {
18206
18281
  if (colorize) {
18207
18282
  error_format = (pm_error_format_t) {
18208
- .number_prefix = PM_COLOR_GRAY "%3" PRIu32 " | " PM_COLOR_RESET,
18283
+ .number_prefix = PM_COLOR_GRAY "%3" PRIi32 " | " PM_COLOR_RESET,
18209
18284
  .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18210
18285
  .divider = PM_COLOR_GRAY " ~~~~~~~" PM_COLOR_RESET "\n"
18211
18286
  };
18212
18287
  } else {
18213
18288
  error_format = (pm_error_format_t) {
18214
- .number_prefix = "%3" PRIu32 " | ",
18289
+ .number_prefix = "%3" PRIi32 " | ",
18215
18290
  .blank_prefix = " | ",
18216
18291
  .divider = " ~~~~~~~\n"
18217
18292
  };
@@ -18219,13 +18294,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18219
18294
  } else if (max_line_number < 10000) {
18220
18295
  if (colorize) {
18221
18296
  error_format = (pm_error_format_t) {
18222
- .number_prefix = PM_COLOR_GRAY "%4" PRIu32 " | " PM_COLOR_RESET,
18297
+ .number_prefix = PM_COLOR_GRAY "%4" PRIi32 " | " PM_COLOR_RESET,
18223
18298
  .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18224
18299
  .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18225
18300
  };
18226
18301
  } else {
18227
18302
  error_format = (pm_error_format_t) {
18228
- .number_prefix = "%4" PRIu32 " | ",
18303
+ .number_prefix = "%4" PRIi32 " | ",
18229
18304
  .blank_prefix = " | ",
18230
18305
  .divider = " ~~~~~~~~\n"
18231
18306
  };
@@ -18233,13 +18308,13 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18233
18308
  } else {
18234
18309
  if (colorize) {
18235
18310
  error_format = (pm_error_format_t) {
18236
- .number_prefix = PM_COLOR_GRAY "%5" PRIu32 " | " PM_COLOR_RESET,
18311
+ .number_prefix = PM_COLOR_GRAY "%5" PRIi32 " | " PM_COLOR_RESET,
18237
18312
  .blank_prefix = PM_COLOR_GRAY " | " PM_COLOR_RESET,
18238
18313
  .divider = PM_COLOR_GRAY " ~~~~~~~~" PM_COLOR_RESET "\n"
18239
18314
  };
18240
18315
  } else {
18241
18316
  error_format = (pm_error_format_t) {
18242
- .number_prefix = "%5" PRIu32 " | ",
18317
+ .number_prefix = "%5" PRIi32 " | ",
18243
18318
  .blank_prefix = " | ",
18244
18319
  .divider = " ~~~~~~~~\n"
18245
18320
  };
@@ -18254,7 +18329,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18254
18329
  // the source before the error to give some context. We'll be careful not to
18255
18330
  // display the same line twice in case the errors are close enough in the
18256
18331
  // source.
18257
- uint32_t last_line = 0;
18332
+ int32_t last_line = 0;
18258
18333
  const pm_encoding_t *encoding = parser->encoding;
18259
18334
 
18260
18335
  for (size_t index = 0; index < error_list->size; index++) {
@@ -18300,7 +18375,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18300
18375
  pm_buffer_append_string(buffer, error_format.blank_prefix, error_format.blank_prefix_length);
18301
18376
 
18302
18377
  size_t column = 0;
18303
- const uint8_t *start = &parser->start[newline_list->offsets[error->line - 1]];
18378
+ const uint8_t *start = &parser->start[newline_list->offsets[error->line - start_line]];
18304
18379
 
18305
18380
  while (column < error->column_end) {
18306
18381
  if (column < error->column_start) {
@@ -18324,7 +18399,7 @@ pm_parser_errors_format(const pm_parser_t *parser, pm_buffer_t *buffer, bool col
18324
18399
  // Here we determine how many lines of padding to display after the
18325
18400
  // error, depending on where the next error is in source.
18326
18401
  last_line = error->line;
18327
- size_t next_line = (index == error_list->size - 1) ? newline_list->size : errors[index + 1].line;
18402
+ int32_t next_line = (index == error_list->size - 1) ? ((int32_t) newline_list->size) : errors[index + 1].line;
18328
18403
 
18329
18404
  if (next_line - last_line > 1) {
18330
18405
  pm_buffer_append_string(buffer, " ", 2);