prism 0.22.0 → 0.23.0

Sign up to get free protection for your applications and to get access to all the features.
data/src/prism.c CHANGED
@@ -492,7 +492,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
492
492
  /**
493
493
  * Append an error to the list of errors on the parser using a format string.
494
494
  */
495
- #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
495
+ #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
496
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
496
497
 
497
498
  /**
498
499
  * Append an error to the list of errors on the parser using the location of the
@@ -507,7 +508,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
507
508
  * Append an error to the list of errors on the parser using the given location
508
509
  * using a format string.
509
510
  */
510
- #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
511
+ #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
512
+ PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
511
513
 
512
514
  /**
513
515
  * Append an error to the list of errors on the parser using the location of the
@@ -522,7 +524,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
522
524
  * Append an error to the list of errors on the parser using the location of the
523
525
  * given node and a format string.
524
526
  */
525
- #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
527
+ #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
528
+ PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
529
+
530
+ /**
531
+ * Append an error to the list of errors on the parser using the location of the
532
+ * given node and a format string, and add on the content of the node.
533
+ */
534
+ #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
535
+ PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
526
536
 
527
537
  /**
528
538
  * Append an error to the list of errors on the parser using the location of the
@@ -546,7 +556,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
546
556
  * Append an error to the list of errors on the parser using the location of the
547
557
  * given token and a format string.
548
558
  */
549
- #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
559
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
560
+ PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
561
+
562
+ /**
563
+ * Append an error to the list of errors on the parser using the location of the
564
+ * given token and a format string, and add on the content of the token.
565
+ */
566
+ #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
567
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
550
568
 
551
569
  /**
552
570
  * Append a warning to the list of warnings on the parser.
@@ -2890,7 +2908,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2890
2908
  static pm_def_node_t *
2891
2909
  pm_def_node_create(
2892
2910
  pm_parser_t *parser,
2893
- const pm_token_t *name,
2911
+ pm_constant_id_t name,
2912
+ const pm_token_t *name_loc,
2894
2913
  pm_node_t *receiver,
2895
2914
  pm_parameters_node_t *parameters,
2896
2915
  pm_node_t *body,
@@ -2920,8 +2939,8 @@ pm_def_node_create(
2920
2939
  .type = PM_DEF_NODE,
2921
2940
  .location = { .start = def_keyword->start, .end = end },
2922
2941
  },
2923
- .name = pm_parser_constant_id_token(parser, name),
2924
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
2942
+ .name = name,
2943
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
2925
2944
  .receiver = receiver,
2926
2945
  .parameters = parameters,
2927
2946
  .body = body,
@@ -4642,13 +4661,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
4642
4661
  */
4643
4662
  static void
4644
4663
  pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4645
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4664
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
4646
4665
  if (node->rest == NULL) {
4647
4666
  node->rest = target;
4648
4667
  } else {
4649
4668
  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
4650
4669
  pm_node_list_append(&node->rights, target);
4651
4670
  }
4671
+ } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4672
+ if (node->rest == NULL) {
4673
+ node->rest = target;
4674
+ } else {
4675
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
4676
+ pm_node_list_append(&node->rights, target);
4677
+ }
4652
4678
  } else if (node->rest == NULL) {
4653
4679
  pm_node_list_append(&node->lefts, target);
4654
4680
  } else {
@@ -7172,7 +7198,7 @@ lex_numeric(pm_parser_t *parser) {
7172
7198
  static pm_token_type_t
7173
7199
  lex_global_variable(pm_parser_t *parser) {
7174
7200
  if (parser->current.end >= parser->end) {
7175
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7201
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7176
7202
  return PM_TOKEN_GLOBAL_VARIABLE;
7177
7203
  }
7178
7204
 
@@ -7213,7 +7239,7 @@ lex_global_variable(pm_parser_t *parser) {
7213
7239
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
7214
7240
 
7215
7241
  // $0 isn't allowed to be followed by anything.
7216
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7242
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7217
7243
  }
7218
7244
 
7219
7245
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -7244,7 +7270,7 @@ lex_global_variable(pm_parser_t *parser) {
7244
7270
  } else {
7245
7271
  // If we get here, then we have a $ followed by something that isn't
7246
7272
  // recognized as a global variable.
7247
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7273
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7248
7274
  }
7249
7275
 
7250
7276
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -8148,10 +8174,10 @@ lex_at_variable(pm_parser_t *parser) {
8148
8174
  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
8149
8175
  parser->current.end += width;
8150
8176
  }
8151
- } else if (type == PM_TOKEN_CLASS_VARIABLE) {
8152
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
8153
8177
  } else {
8154
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
8178
+ pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
8179
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8180
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8155
8181
  }
8156
8182
 
8157
8183
  // If we're lexing an embedded variable, then we need to pop back into the
@@ -9711,7 +9737,7 @@ parser_lex(pm_parser_t *parser) {
9711
9737
  // and then find the first one.
9712
9738
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9713
9739
  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
9714
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9740
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9715
9741
 
9716
9742
  // If we haven't found an escape yet, then this buffer will be
9717
9743
  // unallocated since we can refer directly to the source string.
@@ -9720,7 +9746,7 @@ parser_lex(pm_parser_t *parser) {
9720
9746
  while (breakpoint != NULL) {
9721
9747
  // If we hit a null byte, skip directly past it.
9722
9748
  if (*breakpoint == '\0') {
9723
- breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
9749
+ breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
9724
9750
  continue;
9725
9751
  }
9726
9752
 
@@ -9739,7 +9765,7 @@ parser_lex(pm_parser_t *parser) {
9739
9765
  // we need to continue on past it.
9740
9766
  if (lex_mode->as.list.nesting > 0) {
9741
9767
  parser->current.end = breakpoint + 1;
9742
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9768
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9743
9769
  lex_mode->as.list.nesting--;
9744
9770
  continue;
9745
9771
  }
@@ -9824,7 +9850,7 @@ parser_lex(pm_parser_t *parser) {
9824
9850
  }
9825
9851
 
9826
9852
  token_buffer.cursor = parser->current.end;
9827
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9853
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9828
9854
  continue;
9829
9855
  }
9830
9856
 
@@ -9837,7 +9863,7 @@ parser_lex(pm_parser_t *parser) {
9837
9863
  // that looked like an interpolated class or instance variable
9838
9864
  // like "#@" but wasn't actually. In this case we'll just skip
9839
9865
  // to the next breakpoint.
9840
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9866
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9841
9867
  continue;
9842
9868
  }
9843
9869
 
@@ -9852,7 +9878,7 @@ parser_lex(pm_parser_t *parser) {
9852
9878
  // and find the next breakpoint.
9853
9879
  assert(*breakpoint == lex_mode->as.list.incrementor);
9854
9880
  parser->current.end = breakpoint + 1;
9855
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9881
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9856
9882
  lex_mode->as.list.nesting++;
9857
9883
  continue;
9858
9884
  }
@@ -9891,14 +9917,14 @@ parser_lex(pm_parser_t *parser) {
9891
9917
  // regular expression. We'll use strpbrk to find the first of these
9892
9918
  // characters.
9893
9919
  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
9894
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9920
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9895
9921
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
9896
9922
 
9897
9923
  while (breakpoint != NULL) {
9898
9924
  // If we hit a null byte, skip directly past it.
9899
9925
  if (*breakpoint == '\0') {
9900
9926
  parser->current.end = breakpoint + 1;
9901
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9927
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9902
9928
  continue;
9903
9929
  }
9904
9930
 
@@ -9920,7 +9946,7 @@ parser_lex(pm_parser_t *parser) {
9920
9946
  // If the terminator is not a newline, then we can set
9921
9947
  // the next breakpoint and continue.
9922
9948
  parser->current.end = breakpoint + 1;
9923
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9949
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9924
9950
  continue;
9925
9951
  }
9926
9952
  }
@@ -9930,7 +9956,7 @@ parser_lex(pm_parser_t *parser) {
9930
9956
  if (*breakpoint == lex_mode->as.regexp.terminator) {
9931
9957
  if (lex_mode->as.regexp.nesting > 0) {
9932
9958
  parser->current.end = breakpoint + 1;
9933
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9959
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9934
9960
  lex_mode->as.regexp.nesting--;
9935
9961
  continue;
9936
9962
  }
@@ -10029,7 +10055,7 @@ parser_lex(pm_parser_t *parser) {
10029
10055
  }
10030
10056
 
10031
10057
  token_buffer.cursor = parser->current.end;
10032
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10058
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10033
10059
  continue;
10034
10060
  }
10035
10061
 
@@ -10042,7 +10068,7 @@ parser_lex(pm_parser_t *parser) {
10042
10068
  // something that looked like an interpolated class or
10043
10069
  // instance variable like "#@" but wasn't actually. In
10044
10070
  // this case we'll just skip to the next breakpoint.
10045
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10071
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10046
10072
  continue;
10047
10073
  }
10048
10074
 
@@ -10057,7 +10083,7 @@ parser_lex(pm_parser_t *parser) {
10057
10083
  // and find the next breakpoint.
10058
10084
  assert(*breakpoint == lex_mode->as.regexp.incrementor);
10059
10085
  parser->current.end = breakpoint + 1;
10060
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10086
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10061
10087
  lex_mode->as.regexp.nesting++;
10062
10088
  continue;
10063
10089
  }
@@ -10093,7 +10119,7 @@ parser_lex(pm_parser_t *parser) {
10093
10119
  // string. We'll use strpbrk to find the first of these characters.
10094
10120
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10095
10121
  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
10096
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10122
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10097
10123
 
10098
10124
  // If we haven't found an escape yet, then this buffer will be
10099
10125
  // unallocated since we can refer directly to the source string.
@@ -10105,7 +10131,7 @@ parser_lex(pm_parser_t *parser) {
10105
10131
  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
10106
10132
  lex_mode->as.string.nesting++;
10107
10133
  parser->current.end = breakpoint + 1;
10108
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10134
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10109
10135
  continue;
10110
10136
  }
10111
10137
 
@@ -10117,7 +10143,7 @@ parser_lex(pm_parser_t *parser) {
10117
10143
  // to continue on past it.
10118
10144
  if (lex_mode->as.string.nesting > 0) {
10119
10145
  parser->current.end = breakpoint + 1;
10120
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10146
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10121
10147
  lex_mode->as.string.nesting--;
10122
10148
  continue;
10123
10149
  }
@@ -10159,7 +10185,7 @@ parser_lex(pm_parser_t *parser) {
10159
10185
  if (parser->heredoc_end == NULL) {
10160
10186
  pm_newline_list_append(&parser->newline_list, breakpoint);
10161
10187
  parser->current.end = breakpoint + 1;
10162
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10188
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10163
10189
  continue;
10164
10190
  } else {
10165
10191
  parser->current.end = breakpoint + 1;
@@ -10173,7 +10199,7 @@ parser_lex(pm_parser_t *parser) {
10173
10199
  case '\0':
10174
10200
  // Skip directly past the null character.
10175
10201
  parser->current.end = breakpoint + 1;
10176
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10202
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10177
10203
  break;
10178
10204
  case '\\': {
10179
10205
  // Here we hit escapes.
@@ -10242,7 +10268,7 @@ parser_lex(pm_parser_t *parser) {
10242
10268
  }
10243
10269
 
10244
10270
  token_buffer.cursor = parser->current.end;
10245
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10271
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10246
10272
  break;
10247
10273
  }
10248
10274
  case '#': {
@@ -10253,7 +10279,7 @@ parser_lex(pm_parser_t *parser) {
10253
10279
  // looked like an interpolated class or instance variable like "#@"
10254
10280
  // but wasn't actually. In this case we'll just skip to the next
10255
10281
  // breakpoint.
10256
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10282
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10257
10283
  break;
10258
10284
  }
10259
10285
 
@@ -10381,7 +10407,7 @@ parser_lex(pm_parser_t *parser) {
10381
10407
  breakpoints[2] = '\0';
10382
10408
  }
10383
10409
 
10384
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10410
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10385
10411
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
10386
10412
  bool was_escaped_newline = false;
10387
10413
 
@@ -10390,7 +10416,7 @@ parser_lex(pm_parser_t *parser) {
10390
10416
  case '\0':
10391
10417
  // Skip directly past the null character.
10392
10418
  parser->current.end = breakpoint + 1;
10393
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10419
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10394
10420
  break;
10395
10421
  case '\n': {
10396
10422
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
@@ -10465,7 +10491,7 @@ parser_lex(pm_parser_t *parser) {
10465
10491
  // Otherwise we hit a newline and it wasn't followed by
10466
10492
  // a terminator, so we can continue parsing.
10467
10493
  parser->current.end = breakpoint + 1;
10468
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10494
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10469
10495
  break;
10470
10496
  }
10471
10497
  case '\\': {
@@ -10529,7 +10555,7 @@ parser_lex(pm_parser_t *parser) {
10529
10555
  }
10530
10556
 
10531
10557
  token_buffer.cursor = parser->current.end;
10532
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10558
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10533
10559
  break;
10534
10560
  }
10535
10561
  case '#': {
@@ -10541,7 +10567,7 @@ parser_lex(pm_parser_t *parser) {
10541
10567
  // or instance variable like "#@" but wasn't
10542
10568
  // actually. In this case we'll just skip to the
10543
10569
  // next breakpoint.
10544
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10570
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10545
10571
  break;
10546
10572
  }
10547
10573
 
@@ -11054,7 +11080,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
11054
11080
  return target;
11055
11081
  case PM_BACK_REFERENCE_READ_NODE:
11056
11082
  case PM_NUMBERED_REFERENCE_READ_NODE:
11057
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11083
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11058
11084
  return target;
11059
11085
  case PM_GLOBAL_VARIABLE_READ_NODE:
11060
11086
  assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
@@ -11192,7 +11218,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
11192
11218
  }
11193
11219
  case PM_BACK_REFERENCE_READ_NODE:
11194
11220
  case PM_NUMBERED_REFERENCE_READ_NODE:
11195
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11221
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11196
11222
  /* fallthrough */
11197
11223
  case PM_GLOBAL_VARIABLE_READ_NODE: {
11198
11224
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
@@ -11367,7 +11393,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
11367
11393
  pm_multi_target_node_targets_append(parser, result, target);
11368
11394
  } else if (!match1(parser, PM_TOKEN_EOF)) {
11369
11395
  // If we get here, then we have a trailing , in a multi target node.
11370
- // We'll set the implicit rest flag to indicate this.
11396
+ // We'll add an implicit rest node to represent this.
11371
11397
  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11372
11398
  pm_multi_target_node_targets_append(parser, result, rest);
11373
11399
  break;
@@ -11457,8 +11483,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11457
11483
 
11458
11484
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
11459
11485
  if (context_terminator(context, &parser->current)) break;
11460
- } else {
11461
- expect1(parser, PM_TOKEN_NEWLINE, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
11486
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
11487
+ // This is an inlined version of accept1 because the error that we
11488
+ // want to add has varargs. If this happens again, we should
11489
+ // probably extract a helper function.
11490
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
11491
+ parser->previous.start = parser->previous.end;
11492
+ parser->previous.type = PM_TOKEN_MISSING;
11462
11493
  }
11463
11494
  }
11464
11495
 
@@ -13852,7 +13883,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13852
13883
  pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13853
13884
  variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13854
13885
  } else {
13855
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13886
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
13856
13887
  variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13857
13888
  }
13858
13889
  }
@@ -14161,7 +14192,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14161
14192
  parser_lex(parser);
14162
14193
 
14163
14194
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14164
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14195
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14165
14196
  // If we get here, then we have an end immediately after a
14166
14197
  // start. In that case we'll create an empty content token and
14167
14198
  // return an uninterpolated string.
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14218
14249
  parser_lex(parser);
14219
14250
  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
14220
14251
 
14221
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14252
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14222
14253
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
14223
14254
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
14224
14255
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14225
14256
  } else if (match1(parser, PM_TOKEN_EOF)) {
14226
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
14257
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
14227
14258
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14259
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
14260
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14228
14261
  } else {
14229
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14262
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
14263
+ parser->previous.start = parser->previous.end;
14264
+ parser->previous.type = PM_TOKEN_MISSING;
14230
14265
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14231
14266
  }
14232
14267
  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14241
14276
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14242
14277
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14243
14278
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
14244
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14279
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14245
14280
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
14246
14281
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14247
14282
  } else {
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14332
14367
  return current;
14333
14368
  }
14334
14369
 
14370
+ /**
14371
+ * Append an error to the error list on the parser using the given diagnostic
14372
+ * ID. This function is a specialization that handles formatting the specific
14373
+ * kind of error that is being appended.
14374
+ */
14375
+ static void
14376
+ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
14377
+ switch (diag_id) {
14378
+ case PM_ERR_HASH_KEY: {
14379
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
14380
+ break;
14381
+ }
14382
+ case PM_ERR_UNARY_RECEIVER: {
14383
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
14384
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
14385
+ break;
14386
+ }
14387
+ default:
14388
+ pm_parser_err_previous(parser, diag_id);
14389
+ break;
14390
+ }
14391
+ }
14392
+
14335
14393
  /**
14336
14394
  * Parse an expression that begins with the previous node that we just lexed.
14337
14395
  */
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14516
14574
  // If we didn't find a terminator and we didn't find a right
14517
14575
  // parenthesis, then this is a syntax error.
14518
14576
  if (!terminator_found) {
14519
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14577
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14520
14578
  }
14521
14579
 
14522
14580
  // Parse each statement within the parentheses.
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14545
14603
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14546
14604
  break;
14547
14605
  } else {
14548
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14606
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14549
14607
  }
14550
14608
  }
14551
14609
 
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15626
15684
  * methods to override the unary operators, we should ignore
15627
15685
  * the @ in the same way we do for symbols.
15628
15686
  */
15629
- name.end = parse_operator_symbol_name(&name);
15687
+ pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
15630
15688
 
15631
15689
  return (pm_node_t *) pm_def_node_create(
15632
15690
  parser,
15691
+ name_id,
15633
15692
  &name,
15634
15693
  receiver,
15635
15694
  params,
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16458
16517
  // context of a multiple assignment. We enforce that here. We'll
16459
16518
  // still lex past it though and create a missing node place.
16460
16519
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16461
- pm_parser_err_previous(parser, diag_id);
16520
+ pm_parser_err_prefix(parser, diag_id);
16462
16521
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16463
16522
  }
16464
16523
 
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16481
16540
  parser_lex(parser);
16482
16541
 
16483
16542
  pm_token_t operator = parser->previous;
16484
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
16543
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
16485
16544
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
16486
16545
 
16487
16546
  pm_conditional_predicate(receiver);
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16491
16550
  parser_lex(parser);
16492
16551
 
16493
16552
  pm_token_t operator = parser->previous;
16494
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
16553
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16495
16554
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
16496
16555
 
16497
16556
  return (pm_node_t *) node;
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16500
16559
  parser_lex(parser);
16501
16560
 
16502
16561
  pm_token_t operator = parser->previous;
16503
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16562
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16504
16563
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
16505
16564
 
16506
16565
  return (pm_node_t *) node;
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16509
16568
  parser_lex(parser);
16510
16569
 
16511
16570
  pm_token_t operator = parser->previous;
16512
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16571
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16513
16572
 
16514
16573
  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
16515
16574
  pm_token_t exponent_operator = parser->previous;
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16625
16684
  parser_lex(parser);
16626
16685
 
16627
16686
  pm_token_t operator = parser->previous;
16628
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
16687
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16629
16688
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
16630
16689
 
16631
16690
  return (pm_node_t *) node;
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16648
16707
  // here because it will provide more context in addition to the
16649
16708
  // recoverable error that we will also add.
16650
16709
  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16651
- pm_parser_err_previous(parser, diag_id);
16710
+ pm_parser_err_prefix(parser, diag_id);
16652
16711
  }
16653
16712
 
16654
16713
  // If we get here, then we are assuming this token is closing a
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16661
16720
  // have an unexpected token.
16662
16721
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16663
16722
  } else {
16664
- pm_parser_err_previous(parser, diag_id);
16723
+ pm_parser_err_prefix(parser, diag_id);
16665
16724
  }
16666
16725
 
16667
16726
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
@@ -16895,7 +16954,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16895
16954
  switch (PM_NODE_TYPE(node)) {
16896
16955
  case PM_BACK_REFERENCE_READ_NODE:
16897
16956
  case PM_NUMBERED_REFERENCE_READ_NODE:
16898
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16957
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16899
16958
  /* fallthrough */
16900
16959
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16901
16960
  parser_lex(parser);
@@ -17006,7 +17065,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17006
17065
  switch (PM_NODE_TYPE(node)) {
17007
17066
  case PM_BACK_REFERENCE_READ_NODE:
17008
17067
  case PM_NUMBERED_REFERENCE_READ_NODE:
17009
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17068
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17010
17069
  /* fallthrough */
17011
17070
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17012
17071
  parser_lex(parser);
@@ -17127,7 +17186,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17127
17186
  switch (PM_NODE_TYPE(node)) {
17128
17187
  case PM_BACK_REFERENCE_READ_NODE:
17129
17188
  case PM_NUMBERED_REFERENCE_READ_NODE:
17130
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17189
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17131
17190
  /* fallthrough */
17132
17191
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17133
17192
  parser_lex(parser);
@@ -17791,6 +17850,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17791
17850
  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
17792
17851
  .next_start = NULL,
17793
17852
  .heredoc_end = NULL,
17853
+ .data_loc = { .start = NULL, .end = NULL },
17794
17854
  .comment_list = { 0 },
17795
17855
  .magic_comment_list = { 0 },
17796
17856
  .warning_list = { 0 },