prism 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/prism.c CHANGED
@@ -492,7 +492,8 @@ pm_parser_err(pm_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_
492
492
  /**
493
493
  * Append an error to the list of errors on the parser using a format string.
494
494
  */
495
- #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
495
+ #define PM_PARSER_ERR_FORMAT(parser, start, end, diag_id, ...) \
496
+ pm_diagnostic_list_append_format(&parser->error_list, start, end, diag_id, __VA_ARGS__)
496
497
 
497
498
  /**
498
499
  * Append an error to the list of errors on the parser using the location of the
@@ -507,7 +508,8 @@ pm_parser_err_current(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
507
508
  * Append an error to the list of errors on the parser using the given location
508
509
  * using a format string.
509
510
  */
510
- #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (location)->start, (location)->end, diag_id, __VA_ARGS__)
511
+ #define PM_PARSER_ERR_LOCATION_FORMAT(parser, location, diag_id, ...) \
512
+ PM_PARSER_ERR_FORMAT(parser, (location)->start, (location)->end, diag_id, __VA_ARGS__)
511
513
 
512
514
  /**
513
515
  * Append an error to the list of errors on the parser using the location of the
@@ -522,7 +524,15 @@ pm_parser_err_node(pm_parser_t *parser, const pm_node_t *node, pm_diagnostic_id_
522
524
  * Append an error to the list of errors on the parser using the location of the
523
525
  * given node and a format string.
524
526
  */
525
- #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, node->location.start, node->location.end, diag_id, __VA_ARGS__)
527
+ #define PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, ...) \
528
+ PM_PARSER_ERR_FORMAT(parser, (node)->location.start, (node)->location.end, diag_id, __VA_ARGS__)
529
+
530
+ /**
531
+ * Append an error to the list of errors on the parser using the location of the
532
+ * given node and a format string, and add on the content of the node.
533
+ */
534
+ #define PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, diag_id) \
535
+ PM_PARSER_ERR_NODE_FORMAT(parser, node, diag_id, (int) ((node)->location.end - (node)->location.start), (const char *) (node)->location.start)
526
536
 
527
537
  /**
528
538
  * Append an error to the list of errors on the parser using the location of the
@@ -546,7 +556,15 @@ pm_parser_err_token(pm_parser_t *parser, const pm_token_t *token, pm_diagnostic_
546
556
  * Append an error to the list of errors on the parser using the location of the
547
557
  * given token and a format string.
548
558
  */
549
- #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) pm_diagnostic_list_append_format(&parser->error_list, (token).start, (token).end, diag_id, __VA_ARGS__)
559
+ #define PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, ...) \
560
+ PM_PARSER_ERR_FORMAT(parser, (token).start, (token).end, diag_id, __VA_ARGS__)
561
+
562
+ /**
563
+ * Append an error to the list of errors on the parser using the location of the
564
+ * given token and a format string, and add on the content of the token.
565
+ */
566
+ #define PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, token, diag_id) \
567
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, token, diag_id, (int) ((token).end - (token).start), (const char *) (token).start)
550
568
 
551
569
  /**
552
570
  * Append a warning to the list of warnings on the parser.
@@ -2890,7 +2908,8 @@ pm_def_node_receiver_check(pm_parser_t *parser, const pm_node_t *node) {
2890
2908
  static pm_def_node_t *
2891
2909
  pm_def_node_create(
2892
2910
  pm_parser_t *parser,
2893
- const pm_token_t *name,
2911
+ pm_constant_id_t name,
2912
+ const pm_token_t *name_loc,
2894
2913
  pm_node_t *receiver,
2895
2914
  pm_parameters_node_t *parameters,
2896
2915
  pm_node_t *body,
@@ -2920,8 +2939,8 @@ pm_def_node_create(
2920
2939
  .type = PM_DEF_NODE,
2921
2940
  .location = { .start = def_keyword->start, .end = end },
2922
2941
  },
2923
- .name = pm_parser_constant_id_token(parser, name),
2924
- .name_loc = PM_LOCATION_TOKEN_VALUE(name),
2942
+ .name = name,
2943
+ .name_loc = PM_LOCATION_TOKEN_VALUE(name_loc),
2925
2944
  .receiver = receiver,
2926
2945
  .parameters = parameters,
2927
2946
  .body = body,
@@ -4642,13 +4661,20 @@ pm_multi_target_node_create(pm_parser_t *parser) {
4642
4661
  */
4643
4662
  static void
4644
4663
  pm_multi_target_node_targets_append(pm_parser_t *parser, pm_multi_target_node_t *node, pm_node_t *target) {
4645
- if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE) || PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4664
+ if (PM_NODE_TYPE_P(target, PM_SPLAT_NODE)) {
4646
4665
  if (node->rest == NULL) {
4647
4666
  node->rest = target;
4648
4667
  } else {
4649
4668
  pm_parser_err_node(parser, target, PM_ERR_MULTI_ASSIGN_MULTI_SPLATS);
4650
4669
  pm_node_list_append(&node->rights, target);
4651
4670
  }
4671
+ } else if (PM_NODE_TYPE_P(target, PM_IMPLICIT_REST_NODE)) {
4672
+ if (node->rest == NULL) {
4673
+ node->rest = target;
4674
+ } else {
4675
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_MULTI_ASSIGN_UNEXPECTED_REST);
4676
+ pm_node_list_append(&node->rights, target);
4677
+ }
4652
4678
  } else if (node->rest == NULL) {
4653
4679
  pm_node_list_append(&node->lefts, target);
4654
4680
  } else {
@@ -7172,7 +7198,7 @@ lex_numeric(pm_parser_t *parser) {
7172
7198
  static pm_token_type_t
7173
7199
  lex_global_variable(pm_parser_t *parser) {
7174
7200
  if (parser->current.end >= parser->end) {
7175
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7201
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7176
7202
  return PM_TOKEN_GLOBAL_VARIABLE;
7177
7203
  }
7178
7204
 
@@ -7213,7 +7239,7 @@ lex_global_variable(pm_parser_t *parser) {
7213
7239
  } while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0);
7214
7240
 
7215
7241
  // $0 isn't allowed to be followed by anything.
7216
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7242
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7217
7243
  }
7218
7244
 
7219
7245
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -7244,7 +7270,7 @@ lex_global_variable(pm_parser_t *parser) {
7244
7270
  } else {
7245
7271
  // If we get here, then we have a $ followed by something that isn't
7246
7272
  // recognized as a global variable.
7247
- pm_parser_err_current(parser, PM_ERR_INVALID_VARIABLE_GLOBAL);
7273
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->current, PM_ERR_INVALID_VARIABLE_GLOBAL);
7248
7274
  }
7249
7275
 
7250
7276
  return PM_TOKEN_GLOBAL_VARIABLE;
@@ -8148,10 +8174,10 @@ lex_at_variable(pm_parser_t *parser) {
8148
8174
  while (parser->current.end < parser->end && (width = char_is_identifier(parser, parser->current.end)) > 0) {
8149
8175
  parser->current.end += width;
8150
8176
  }
8151
- } else if (type == PM_TOKEN_CLASS_VARIABLE) {
8152
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_CLASS);
8153
8177
  } else {
8154
- pm_parser_err_current(parser, PM_ERR_INCOMPLETE_VARIABLE_INSTANCE);
8178
+ pm_diagnostic_id_t diag_id = (type == PM_TOKEN_CLASS_VARIABLE) ? PM_ERR_INCOMPLETE_VARIABLE_CLASS : PM_ERR_INCOMPLETE_VARIABLE_INSTANCE;
8179
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
8180
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, diag_id, (int) ((parser->current.end + width) - parser->current.start), (const char *) parser->current.start);
8155
8181
  }
8156
8182
 
8157
8183
  // If we're lexing an embedded variable, then we need to pop back into the
@@ -9711,7 +9737,7 @@ parser_lex(pm_parser_t *parser) {
9711
9737
  // and then find the first one.
9712
9738
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
9713
9739
  const uint8_t *breakpoints = lex_mode->as.list.breakpoints;
9714
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9740
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9715
9741
 
9716
9742
  // If we haven't found an escape yet, then this buffer will be
9717
9743
  // unallocated since we can refer directly to the source string.
@@ -9720,7 +9746,7 @@ parser_lex(pm_parser_t *parser) {
9720
9746
  while (breakpoint != NULL) {
9721
9747
  // If we hit a null byte, skip directly past it.
9722
9748
  if (*breakpoint == '\0') {
9723
- breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1));
9749
+ breakpoint = pm_strpbrk(parser, breakpoint + 1, breakpoints, parser->end - (breakpoint + 1), true);
9724
9750
  continue;
9725
9751
  }
9726
9752
 
@@ -9739,7 +9765,7 @@ parser_lex(pm_parser_t *parser) {
9739
9765
  // we need to continue on past it.
9740
9766
  if (lex_mode->as.list.nesting > 0) {
9741
9767
  parser->current.end = breakpoint + 1;
9742
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9768
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9743
9769
  lex_mode->as.list.nesting--;
9744
9770
  continue;
9745
9771
  }
@@ -9824,7 +9850,7 @@ parser_lex(pm_parser_t *parser) {
9824
9850
  }
9825
9851
 
9826
9852
  token_buffer.cursor = parser->current.end;
9827
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9853
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9828
9854
  continue;
9829
9855
  }
9830
9856
 
@@ -9837,7 +9863,7 @@ parser_lex(pm_parser_t *parser) {
9837
9863
  // that looked like an interpolated class or instance variable
9838
9864
  // like "#@" but wasn't actually. In this case we'll just skip
9839
9865
  // to the next breakpoint.
9840
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9866
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9841
9867
  continue;
9842
9868
  }
9843
9869
 
@@ -9852,7 +9878,7 @@ parser_lex(pm_parser_t *parser) {
9852
9878
  // and find the next breakpoint.
9853
9879
  assert(*breakpoint == lex_mode->as.list.incrementor);
9854
9880
  parser->current.end = breakpoint + 1;
9855
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9881
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
9856
9882
  lex_mode->as.list.nesting++;
9857
9883
  continue;
9858
9884
  }
@@ -9891,14 +9917,14 @@ parser_lex(pm_parser_t *parser) {
9891
9917
  // regular expression. We'll use strpbrk to find the first of these
9892
9918
  // characters.
9893
9919
  const uint8_t *breakpoints = lex_mode->as.regexp.breakpoints;
9894
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9920
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9895
9921
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
9896
9922
 
9897
9923
  while (breakpoint != NULL) {
9898
9924
  // If we hit a null byte, skip directly past it.
9899
9925
  if (*breakpoint == '\0') {
9900
9926
  parser->current.end = breakpoint + 1;
9901
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9927
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9902
9928
  continue;
9903
9929
  }
9904
9930
 
@@ -9920,7 +9946,7 @@ parser_lex(pm_parser_t *parser) {
9920
9946
  // If the terminator is not a newline, then we can set
9921
9947
  // the next breakpoint and continue.
9922
9948
  parser->current.end = breakpoint + 1;
9923
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9949
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9924
9950
  continue;
9925
9951
  }
9926
9952
  }
@@ -9930,7 +9956,7 @@ parser_lex(pm_parser_t *parser) {
9930
9956
  if (*breakpoint == lex_mode->as.regexp.terminator) {
9931
9957
  if (lex_mode->as.regexp.nesting > 0) {
9932
9958
  parser->current.end = breakpoint + 1;
9933
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
9959
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
9934
9960
  lex_mode->as.regexp.nesting--;
9935
9961
  continue;
9936
9962
  }
@@ -10029,7 +10055,7 @@ parser_lex(pm_parser_t *parser) {
10029
10055
  }
10030
10056
 
10031
10057
  token_buffer.cursor = parser->current.end;
10032
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10058
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10033
10059
  continue;
10034
10060
  }
10035
10061
 
@@ -10042,7 +10068,7 @@ parser_lex(pm_parser_t *parser) {
10042
10068
  // something that looked like an interpolated class or
10043
10069
  // instance variable like "#@" but wasn't actually. In
10044
10070
  // this case we'll just skip to the next breakpoint.
10045
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10071
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10046
10072
  continue;
10047
10073
  }
10048
10074
 
@@ -10057,7 +10083,7 @@ parser_lex(pm_parser_t *parser) {
10057
10083
  // and find the next breakpoint.
10058
10084
  assert(*breakpoint == lex_mode->as.regexp.incrementor);
10059
10085
  parser->current.end = breakpoint + 1;
10060
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10086
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
10061
10087
  lex_mode->as.regexp.nesting++;
10062
10088
  continue;
10063
10089
  }
@@ -10093,7 +10119,7 @@ parser_lex(pm_parser_t *parser) {
10093
10119
  // string. We'll use strpbrk to find the first of these characters.
10094
10120
  pm_lex_mode_t *lex_mode = parser->lex_modes.current;
10095
10121
  const uint8_t *breakpoints = lex_mode->as.string.breakpoints;
10096
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10122
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10097
10123
 
10098
10124
  // If we haven't found an escape yet, then this buffer will be
10099
10125
  // unallocated since we can refer directly to the source string.
@@ -10105,7 +10131,7 @@ parser_lex(pm_parser_t *parser) {
10105
10131
  if (lex_mode->as.string.incrementor != '\0' && *breakpoint == lex_mode->as.string.incrementor) {
10106
10132
  lex_mode->as.string.nesting++;
10107
10133
  parser->current.end = breakpoint + 1;
10108
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10134
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10109
10135
  continue;
10110
10136
  }
10111
10137
 
@@ -10117,7 +10143,7 @@ parser_lex(pm_parser_t *parser) {
10117
10143
  // to continue on past it.
10118
10144
  if (lex_mode->as.string.nesting > 0) {
10119
10145
  parser->current.end = breakpoint + 1;
10120
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10146
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10121
10147
  lex_mode->as.string.nesting--;
10122
10148
  continue;
10123
10149
  }
@@ -10159,7 +10185,7 @@ parser_lex(pm_parser_t *parser) {
10159
10185
  if (parser->heredoc_end == NULL) {
10160
10186
  pm_newline_list_append(&parser->newline_list, breakpoint);
10161
10187
  parser->current.end = breakpoint + 1;
10162
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10188
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10163
10189
  continue;
10164
10190
  } else {
10165
10191
  parser->current.end = breakpoint + 1;
@@ -10173,7 +10199,7 @@ parser_lex(pm_parser_t *parser) {
10173
10199
  case '\0':
10174
10200
  // Skip directly past the null character.
10175
10201
  parser->current.end = breakpoint + 1;
10176
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10202
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10177
10203
  break;
10178
10204
  case '\\': {
10179
10205
  // Here we hit escapes.
@@ -10242,7 +10268,7 @@ parser_lex(pm_parser_t *parser) {
10242
10268
  }
10243
10269
 
10244
10270
  token_buffer.cursor = parser->current.end;
10245
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10271
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10246
10272
  break;
10247
10273
  }
10248
10274
  case '#': {
@@ -10253,7 +10279,7 @@ parser_lex(pm_parser_t *parser) {
10253
10279
  // looked like an interpolated class or instance variable like "#@"
10254
10280
  // but wasn't actually. In this case we'll just skip to the next
10255
10281
  // breakpoint.
10256
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10282
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10257
10283
  break;
10258
10284
  }
10259
10285
 
@@ -10381,7 +10407,7 @@ parser_lex(pm_parser_t *parser) {
10381
10407
  breakpoints[2] = '\0';
10382
10408
  }
10383
10409
 
10384
- const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10410
+ const uint8_t *breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10385
10411
  pm_token_buffer_t token_buffer = { { 0 }, 0 };
10386
10412
  bool was_escaped_newline = false;
10387
10413
 
@@ -10390,7 +10416,7 @@ parser_lex(pm_parser_t *parser) {
10390
10416
  case '\0':
10391
10417
  // Skip directly past the null character.
10392
10418
  parser->current.end = breakpoint + 1;
10393
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10419
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10394
10420
  break;
10395
10421
  case '\n': {
10396
10422
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
@@ -10465,7 +10491,7 @@ parser_lex(pm_parser_t *parser) {
10465
10491
  // Otherwise we hit a newline and it wasn't followed by
10466
10492
  // a terminator, so we can continue parsing.
10467
10493
  parser->current.end = breakpoint + 1;
10468
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10494
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10469
10495
  break;
10470
10496
  }
10471
10497
  case '\\': {
@@ -10529,7 +10555,7 @@ parser_lex(pm_parser_t *parser) {
10529
10555
  }
10530
10556
 
10531
10557
  token_buffer.cursor = parser->current.end;
10532
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10558
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10533
10559
  break;
10534
10560
  }
10535
10561
  case '#': {
@@ -10541,7 +10567,7 @@ parser_lex(pm_parser_t *parser) {
10541
10567
  // or instance variable like "#@" but wasn't
10542
10568
  // actually. In this case we'll just skip to the
10543
10569
  // next breakpoint.
10544
- breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end);
10570
+ breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, true);
10545
10571
  break;
10546
10572
  }
10547
10573
 
@@ -11054,7 +11080,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target) {
11054
11080
  return target;
11055
11081
  case PM_BACK_REFERENCE_READ_NODE:
11056
11082
  case PM_NUMBERED_REFERENCE_READ_NODE:
11057
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11083
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11058
11084
  return target;
11059
11085
  case PM_GLOBAL_VARIABLE_READ_NODE:
11060
11086
  assert(sizeof(pm_global_variable_target_node_t) == sizeof(pm_global_variable_read_node_t));
@@ -11192,7 +11218,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
11192
11218
  }
11193
11219
  case PM_BACK_REFERENCE_READ_NODE:
11194
11220
  case PM_NUMBERED_REFERENCE_READ_NODE:
11195
- pm_parser_err_node(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11221
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
11196
11222
  /* fallthrough */
11197
11223
  case PM_GLOBAL_VARIABLE_READ_NODE: {
11198
11224
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
@@ -11367,7 +11393,7 @@ parse_targets(pm_parser_t *parser, pm_node_t *first_target, pm_binding_power_t b
11367
11393
  pm_multi_target_node_targets_append(parser, result, target);
11368
11394
  } else if (!match1(parser, PM_TOKEN_EOF)) {
11369
11395
  // If we get here, then we have a trailing , in a multi target node.
11370
- // We'll set the implicit rest flag to indicate this.
11396
+ // We'll add an implicit rest node to represent this.
11371
11397
  pm_node_t *rest = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
11372
11398
  pm_multi_target_node_targets_append(parser, result, rest);
11373
11399
  break;
@@ -11457,8 +11483,13 @@ parse_statements(pm_parser_t *parser, pm_context_t context) {
11457
11483
 
11458
11484
  while (accept2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON));
11459
11485
  if (context_terminator(context, &parser->current)) break;
11460
- } else {
11461
- expect1(parser, PM_TOKEN_NEWLINE, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
11486
+ } else if (!accept1(parser, PM_TOKEN_NEWLINE)) {
11487
+ // This is an inlined version of accept1 because the error that we
11488
+ // want to add has varargs. If this happens again, we should
11489
+ // probably extract a helper function.
11490
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
11491
+ parser->previous.start = parser->previous.end;
11492
+ parser->previous.type = PM_TOKEN_MISSING;
11462
11493
  }
11463
11494
  }
11464
11495
 
@@ -13852,7 +13883,7 @@ parse_pattern_primitive(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
13852
13883
  pm_constant_id_t name_id = pm_parser_constant_id_constant(parser, "0it", 3);
13853
13884
  variable = (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, 0);
13854
13885
  } else {
13855
- PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE, (int) (parser->previous.end - parser->previous.start), parser->previous.start);
13886
+ PM_PARSER_ERR_TOKEN_FORMAT_CONTENT(parser, parser->previous, PM_ERR_NO_LOCAL_VARIABLE);
13856
13887
  variable = (pm_node_t *) pm_local_variable_read_node_create(parser, &parser->previous, 0);
13857
13888
  }
13858
13889
  }
@@ -14161,7 +14192,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14161
14192
  parser_lex(parser);
14162
14193
 
14163
14194
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14164
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14195
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14165
14196
  // If we get here, then we have an end immediately after a
14166
14197
  // start. In that case we'll create an empty content token and
14167
14198
  // return an uninterpolated string.
@@ -14218,15 +14249,19 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14218
14249
  parser_lex(parser);
14219
14250
  } while (match1(parser, PM_TOKEN_STRING_CONTENT));
14220
14251
 
14221
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14252
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14222
14253
  node = (pm_node_t *) pm_interpolated_string_node_create(parser, &opening, &parts, &parser->previous);
14223
14254
  } else if (accept1(parser, PM_TOKEN_LABEL_END) && !state_is_arg_labeled) {
14224
14255
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14225
14256
  } else if (match1(parser, PM_TOKEN_EOF)) {
14226
- pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_TERM);
14257
+ pm_parser_err_token(parser, &opening, PM_ERR_STRING_LITERAL_EOF);
14227
14258
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14259
+ } else if (accept1(parser, PM_TOKEN_STRING_END)) {
14260
+ node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14228
14261
  } else {
14229
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14262
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, PM_ERR_STRING_LITERAL_TERM, pm_token_type_human(parser->previous.type));
14263
+ parser->previous.start = parser->previous.end;
14264
+ parser->previous.type = PM_TOKEN_MISSING;
14230
14265
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped);
14231
14266
  }
14232
14267
  } else if (match1(parser, PM_TOKEN_STRING_CONTENT)) {
@@ -14241,7 +14276,7 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14241
14276
  if (match2(parser, PM_TOKEN_STRING_END, PM_TOKEN_EOF)) {
14242
14277
  node = (pm_node_t *) pm_string_node_create_unescaped(parser, &opening, &content, &parser->current, &unescaped);
14243
14278
  pm_node_flag_set(node, parse_unescaped_encoding(parser));
14244
- expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_TERM);
14279
+ expect1(parser, PM_TOKEN_STRING_END, PM_ERR_STRING_LITERAL_EOF);
14245
14280
  } else if (accept1(parser, PM_TOKEN_LABEL_END)) {
14246
14281
  node = (pm_node_t *) pm_symbol_node_create_unescaped(parser, &opening, &content, &parser->previous, &unescaped, parse_symbol_encoding(parser, &unescaped));
14247
14282
  } else {
@@ -14332,6 +14367,29 @@ parse_strings(pm_parser_t *parser, pm_node_t *current) {
14332
14367
  return current;
14333
14368
  }
14334
14369
 
14370
+ /**
14371
+ * Append an error to the error list on the parser using the given diagnostic
14372
+ * ID. This function is a specialization that handles formatting the specific
14373
+ * kind of error that is being appended.
14374
+ */
14375
+ static void
14376
+ pm_parser_err_prefix(pm_parser_t *parser, pm_diagnostic_id_t diag_id) {
14377
+ switch (diag_id) {
14378
+ case PM_ERR_HASH_KEY: {
14379
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, pm_token_type_human(parser->previous.type));
14380
+ break;
14381
+ }
14382
+ case PM_ERR_UNARY_RECEIVER: {
14383
+ const char *human = (parser->current.type == PM_TOKEN_EOF ? "end-of-input" : pm_token_type_human(parser->current.type));
14384
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->previous, diag_id, human, parser->previous.start[0]);
14385
+ break;
14386
+ }
14387
+ default:
14388
+ pm_parser_err_previous(parser, diag_id);
14389
+ break;
14390
+ }
14391
+ }
14392
+
14335
14393
  /**
14336
14394
  * Parse an expression that begins with the previous node that we just lexed.
14337
14395
  */
@@ -14516,7 +14574,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14516
14574
  // If we didn't find a terminator and we didn't find a right
14517
14575
  // parenthesis, then this is a syntax error.
14518
14576
  if (!terminator_found) {
14519
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14577
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14520
14578
  }
14521
14579
 
14522
14580
  // Parse each statement within the parentheses.
@@ -14545,7 +14603,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
14545
14603
  } else if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
14546
14604
  break;
14547
14605
  } else {
14548
- pm_parser_err(parser, parser->current.start, parser->current.start, PM_ERR_EXPECT_EOL_AFTER_STATEMENT);
14606
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(parser->current.type));
14549
14607
  }
14550
14608
  }
14551
14609
 
@@ -15626,10 +15684,11 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
15626
15684
  * methods to override the unary operators, we should ignore
15627
15685
  * the @ in the same way we do for symbols.
15628
15686
  */
15629
- name.end = parse_operator_symbol_name(&name);
15687
+ pm_constant_id_t name_id = pm_parser_constant_id_location(parser, name.start, parse_operator_symbol_name(&name));
15630
15688
 
15631
15689
  return (pm_node_t *) pm_def_node_create(
15632
15690
  parser,
15691
+ name_id,
15633
15692
  &name,
15634
15693
  receiver,
15635
15694
  params,
@@ -16458,7 +16517,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16458
16517
  // context of a multiple assignment. We enforce that here. We'll
16459
16518
  // still lex past it though and create a missing node place.
16460
16519
  if (binding_power != PM_BINDING_POWER_STATEMENT) {
16461
- pm_parser_err_previous(parser, diag_id);
16520
+ pm_parser_err_prefix(parser, diag_id);
16462
16521
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
16463
16522
  }
16464
16523
 
@@ -16481,7 +16540,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16481
16540
  parser_lex(parser);
16482
16541
 
16483
16542
  pm_token_t operator = parser->previous;
16484
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER_BANG);
16543
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, binding_power < PM_BINDING_POWER_MATCH, PM_ERR_UNARY_RECEIVER);
16485
16544
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "!");
16486
16545
 
16487
16546
  pm_conditional_predicate(receiver);
@@ -16491,7 +16550,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16491
16550
  parser_lex(parser);
16492
16551
 
16493
16552
  pm_token_t operator = parser->previous;
16494
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_TILDE);
16553
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16495
16554
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "~");
16496
16555
 
16497
16556
  return (pm_node_t *) node;
@@ -16500,7 +16559,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16500
16559
  parser_lex(parser);
16501
16560
 
16502
16561
  pm_token_t operator = parser->previous;
16503
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16562
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16504
16563
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "-@");
16505
16564
 
16506
16565
  return (pm_node_t *) node;
@@ -16509,7 +16568,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16509
16568
  parser_lex(parser);
16510
16569
 
16511
16570
  pm_token_t operator = parser->previous;
16512
- pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_MINUS);
16571
+ pm_node_t *node = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16513
16572
 
16514
16573
  if (accept1(parser, PM_TOKEN_STAR_STAR)) {
16515
16574
  pm_token_t exponent_operator = parser->previous;
@@ -16625,7 +16684,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16625
16684
  parser_lex(parser);
16626
16685
 
16627
16686
  pm_token_t operator = parser->previous;
16628
- pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER_PLUS);
16687
+ pm_node_t *receiver = parse_expression(parser, pm_binding_powers[parser->previous.type].right, false, PM_ERR_UNARY_RECEIVER);
16629
16688
  pm_call_node_t *node = pm_call_node_unary_create(parser, &operator, receiver, "+@");
16630
16689
 
16631
16690
  return (pm_node_t *) node;
@@ -16648,7 +16707,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16648
16707
  // here because it will provide more context in addition to the
16649
16708
  // recoverable error that we will also add.
16650
16709
  if (diag_id != PM_ERR_CANNOT_PARSE_EXPRESSION) {
16651
- pm_parser_err_previous(parser, diag_id);
16710
+ pm_parser_err_prefix(parser, diag_id);
16652
16711
  }
16653
16712
 
16654
16713
  // If we get here, then we are assuming this token is closing a
@@ -16661,7 +16720,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
16661
16720
  // have an unexpected token.
16662
16721
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, pm_token_type_human(parser->current.type));
16663
16722
  } else {
16664
- pm_parser_err_previous(parser, diag_id);
16723
+ pm_parser_err_prefix(parser, diag_id);
16665
16724
  }
16666
16725
 
16667
16726
  return (pm_node_t *) pm_missing_node_create(parser, parser->previous.start, parser->previous.end);
@@ -16895,7 +16954,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
16895
16954
  switch (PM_NODE_TYPE(node)) {
16896
16955
  case PM_BACK_REFERENCE_READ_NODE:
16897
16956
  case PM_NUMBERED_REFERENCE_READ_NODE:
16898
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16957
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
16899
16958
  /* fallthrough */
16900
16959
  case PM_GLOBAL_VARIABLE_READ_NODE: {
16901
16960
  parser_lex(parser);
@@ -17006,7 +17065,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17006
17065
  switch (PM_NODE_TYPE(node)) {
17007
17066
  case PM_BACK_REFERENCE_READ_NODE:
17008
17067
  case PM_NUMBERED_REFERENCE_READ_NODE:
17009
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17068
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17010
17069
  /* fallthrough */
17011
17070
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17012
17071
  parser_lex(parser);
@@ -17127,7 +17186,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
17127
17186
  switch (PM_NODE_TYPE(node)) {
17128
17187
  case PM_BACK_REFERENCE_READ_NODE:
17129
17188
  case PM_NUMBERED_REFERENCE_READ_NODE:
17130
- pm_parser_err_node(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17189
+ PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
17131
17190
  /* fallthrough */
17132
17191
  case PM_GLOBAL_VARIABLE_READ_NODE: {
17133
17192
  parser_lex(parser);
@@ -17791,6 +17850,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
17791
17850
  .current = { .type = PM_TOKEN_EOF, .start = source, .end = source },
17792
17851
  .next_start = NULL,
17793
17852
  .heredoc_end = NULL,
17853
+ .data_loc = { .start = NULL, .end = NULL },
17794
17854
  .comment_list = { 0 },
17795
17855
  .magic_comment_list = { 0 },
17796
17856
  .warning_list = { 0 },