prism 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/src/prism.c CHANGED
@@ -4142,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4142
4142
 
4143
4143
  // If errno is set, then it should only be ERANGE. At this point we need to
4144
4144
  // check if it's infinity (it should be).
4145
- if (errno == ERANGE && isinf(value)) {
4145
+ if (errno == ERANGE && PRISM_ISINF(value)) {
4146
4146
  int warn_width;
4147
4147
  const char *ellipsis;
4148
4148
 
@@ -7684,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
7684
7684
  * Allocate a new UntilNode node.
7685
7685
  */
7686
7686
  static pm_until_node_t *
7687
- pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7687
+ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688
7688
  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689
7689
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690
7690
 
@@ -7699,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7699
7699
  },
7700
7700
  },
7701
7701
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7702
7703
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7703
7704
  .predicate = predicate,
7704
7705
  .statements = statements
@@ -7727,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7727
7728
  },
7728
7729
  },
7729
7730
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7730
7732
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7731
7733
  .predicate = predicate,
7732
7734
  .statements = statements
@@ -7794,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
7794
7796
  * Allocate a new WhileNode node.
7795
7797
  */
7796
7798
  static pm_while_node_t *
7797
- pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7799
+ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7798
7800
  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7799
7801
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7800
7802
 
@@ -7809,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7809
7811
  },
7810
7812
  },
7811
7813
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7812
7815
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7813
7816
  .predicate = predicate,
7814
7817
  .statements = statements
@@ -7837,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7837
7840
  },
7838
7841
  },
7839
7842
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7840
7844
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7841
7845
  .predicate = predicate,
7842
7846
  .statements = statements
@@ -7859,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7859
7863
  .location = PM_LOCATION_NULL_VALUE(parser)
7860
7864
  },
7861
7865
  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7862
7867
  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7863
7868
  .predicate = predicate,
7864
7869
  .statements = statements
@@ -9105,7 +9110,7 @@ lex_global_variable(pm_parser_t *parser) {
9105
9110
  case '-':
9106
9111
  parser->current.end++;
9107
9112
  allow_multiple = false;
9108
- /* fallthrough */
9113
+ PRISM_FALLTHROUGH
9109
9114
  default: {
9110
9115
  size_t width;
9111
9116
 
@@ -10041,8 +10046,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10041
10046
  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042
10047
  return;
10043
10048
  }
10049
+ PRISM_FALLTHROUGH
10044
10050
  }
10045
- /* fallthrough */
10046
10051
  default: {
10047
10052
  if (parser->current.end < parser->end) {
10048
10053
  escape_write_escape_encoded(parser, buffer);
@@ -10496,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10496
10501
  }
10497
10502
 
10498
10503
  const uint8_t *end = parser->current.end - 1;
10504
+ assert(end >= start);
10499
10505
  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10500
10506
 
10501
10507
  token_buffer->cursor = end;
@@ -10576,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
10576
10582
  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10577
10583
  }
10578
10584
 
10579
- const uint8_t delimiter = *parser->current.end;
10580
- parser->current.end += eol_length;
10585
+ uint8_t delimiter = *parser->current.end;
10586
+
10587
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
10588
+ // For example, %\r\nfoo\r\n should be "foo"
10589
+ if (eol_length == 2) {
10590
+ delimiter = *(parser->current.end + 1);
10591
+ }
10581
10592
 
10593
+ parser->current.end += eol_length;
10582
10594
  return delimiter;
10583
10595
  }
10584
10596
 
@@ -10688,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
10688
10700
  // We'll check if we're at the end of the file. If we are, then we
10689
10701
  // need to return the EOF token.
10690
10702
  if (parser->current.end >= parser->end) {
10703
+ // If we hit EOF, but the EOF came immediately after a newline,
10704
+ // set the start of the token to the newline. This way any EOF
10705
+ // errors will be reported as happening on that line rather than
10706
+ // a line after. For example "foo(\n" should report an error
10707
+ // on line 1 even though EOF technically occurs on line 2.
10708
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709
+ parser->current.start -= 1;
10710
+ }
10691
10711
  LEX(PM_TOKEN_EOF);
10692
10712
  }
10693
10713
 
@@ -10730,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
10730
10750
 
10731
10751
  lexed_comment = true;
10732
10752
  }
10733
- /* fallthrough */
10753
+ PRISM_FALLTHROUGH
10734
10754
  case '\r':
10735
10755
  case '\n': {
10736
10756
  parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10772,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
10772
10792
  parser->current.type = PM_TOKEN_NEWLINE;
10773
10793
  return;
10774
10794
  }
10775
- /* fallthrough */
10795
+ PRISM_FALLTHROUGH
10776
10796
  case PM_IGNORED_NEWLINE_ALL:
10777
10797
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10778
10798
  lexed_comment = false;
@@ -10869,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
10869
10889
 
10870
10890
  // ,
10871
10891
  case ',':
10892
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894
+ }
10895
+
10872
10896
  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10873
10897
  LEX(PM_TOKEN_COMMA);
10874
10898
 
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
11783
11807
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784
11808
  break;
11785
11809
  }
11786
- /* fallthrough */
11810
+ PRISM_FALLTHROUGH
11787
11811
  default:
11788
11812
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789
11813
  break;
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
11980
12004
  pm_token_buffer_push_byte(&token_buffer, '\r');
11981
12005
  break;
11982
12006
  }
11983
- /* fallthrough */
12007
+ PRISM_FALLTHROUGH
11984
12008
  case '\n':
11985
12009
  pm_token_buffer_push_byte(&token_buffer, '\n');
11986
12010
 
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
12084
12108
  pm_regexp_token_buffer_t token_buffer = { 0 };
12085
12109
 
12086
12110
  while (breakpoint != NULL) {
12111
+ uint8_t term = lex_mode->as.regexp.terminator;
12112
+ bool is_terminator = (*breakpoint == term);
12113
+
12114
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12115
+ // For example: `%\nfoo\r\n`
12116
+ // The string should be "foo", not "foo\r"
12117
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118
+ if (term == '\n') {
12119
+ is_terminator = true;
12120
+ }
12121
+
12122
+ // If the terminator is a CR, but we see a CRLF, we need to
12123
+ // treat the CRLF as a newline, meaning this is _not_ the
12124
+ // terminator
12125
+ if (term == '\r') {
12126
+ is_terminator = false;
12127
+ }
12128
+ }
12129
+
12087
12130
  // If we hit the terminator, we need to determine what kind of
12088
12131
  // token to return.
12089
- if (*breakpoint == lex_mode->as.regexp.terminator) {
12132
+ if (is_terminator) {
12090
12133
  if (lex_mode->as.regexp.nesting > 0) {
12091
12134
  parser->current.end = breakpoint + 1;
12092
12135
  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
12148
12191
  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149
12192
  token_buffer.base.cursor = breakpoint;
12150
12193
 
12151
- /* fallthrough */
12194
+ PRISM_FALLTHROUGH
12152
12195
  case '\n':
12153
12196
  // If we've hit a newline, then we need to track that in
12154
12197
  // the list of newlines.
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
12190
12233
  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191
12234
  break;
12192
12235
  }
12193
- /* fallthrough */
12236
+ PRISM_FALLTHROUGH
12194
12237
  case '\n':
12195
12238
  if (parser->heredoc_end) {
12196
12239
  // ... if we are on the same line as a heredoc,
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
12316
12359
  continue;
12317
12360
  }
12318
12361
 
12362
+ uint8_t term = lex_mode->as.string.terminator;
12363
+ bool is_terminator = (*breakpoint == term);
12364
+
12365
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12366
+ // For example: `%r\nfoo\r\n`
12367
+ // The string should be /foo/, not /foo\r/
12368
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369
+ if (term == '\n') {
12370
+ is_terminator = true;
12371
+ }
12372
+
12373
+ // If the terminator is a CR, but we see a CRLF, we need to
12374
+ // treat the CRLF as a newline, meaning this is _not_ the
12375
+ // terminator
12376
+ if (term == '\r') {
12377
+ is_terminator = false;
12378
+ }
12379
+ }
12380
+
12319
12381
  // Note that we have to check the terminator here first because we could
12320
12382
  // potentially be parsing a % string that has a # character as the
12321
12383
  // terminator.
12322
- if (*breakpoint == lex_mode->as.string.terminator) {
12384
+ if (is_terminator) {
12323
12385
  // If this terminator doesn't actually close the string, then we need
12324
12386
  // to continue on past it.
12325
12387
  if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
12379
12441
  pm_token_buffer_escape(parser, &token_buffer);
12380
12442
  token_buffer.cursor = breakpoint;
12381
12443
 
12382
- /* fallthrough */
12444
+ PRISM_FALLTHROUGH
12383
12445
  case '\n':
12384
12446
  // When we hit a newline, we need to flush any potential
12385
12447
  // heredocs. Note that this has to happen after we check
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
12424
12486
  pm_token_buffer_push_byte(&token_buffer, '\r');
12425
12487
  break;
12426
12488
  }
12427
- /* fallthrough */
12489
+ PRISM_FALLTHROUGH
12428
12490
  case '\n':
12429
12491
  if (!lex_mode->as.string.interpolation) {
12430
12492
  pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12632,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
12632
12694
  pm_token_buffer_escape(parser, &token_buffer);
12633
12695
  token_buffer.cursor = breakpoint;
12634
12696
 
12635
- /* fallthrough */
12697
+ PRISM_FALLTHROUGH
12636
12698
  case '\n': {
12637
12699
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12638
12700
  parser_flush_heredoc_end(parser);
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
12732
12794
  pm_token_buffer_push_byte(&token_buffer, '\r');
12733
12795
  break;
12734
12796
  }
12735
- /* fallthrough */
12797
+ PRISM_FALLTHROUGH
12736
12798
  case '\n':
12737
12799
  pm_token_buffer_push_byte(&token_buffer, '\\');
12738
12800
  pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,7 +12814,7 @@ parser_lex(pm_parser_t *parser) {
12752
12814
  pm_token_buffer_push_byte(&token_buffer, '\r');
12753
12815
  break;
12754
12816
  }
12755
- /* fallthrough */
12817
+ PRISM_FALLTHROUGH
12756
12818
  case '\n':
12757
12819
  // If we are in a tilde here, we should
12758
12820
  // break out of the loop and return the
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13044
13106
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045
13107
  }
13046
13108
 
13047
- /**
13048
- * Returns true if the current token is any of the six given types.
13049
- */
13050
- static inline bool
13051
- match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053
- }
13054
-
13055
13109
  /**
13056
13110
  * Returns true if the current token is any of the seven given types.
13057
13111
  */
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13068
13122
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069
13123
  }
13070
13124
 
13125
+ /**
13126
+ * Returns true if the current token is any of the nine given types.
13127
+ */
13128
+ static inline bool
13129
+ match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131
+ }
13132
+
13071
13133
  /**
13072
13134
  * If the current token is of the specified type, lex forward by one token and
13073
13135
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13096
13158
  return false;
13097
13159
  }
13098
13160
 
13099
- /**
13100
- * If the current token is any of the three given types, lex forward by one
13101
- * token and return true. Otherwise return false.
13102
- */
13103
- static inline bool
13104
- accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105
- if (match3(parser, type1, type2, type3)) {
13106
- parser_lex(parser);
13107
- return true;
13108
- }
13109
- return false;
13110
- }
13111
-
13112
13161
  /**
13113
13162
  * This function indicates that the parser expects a token in a specific
13114
13163
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,20 +13195,6 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
13146
13195
  parser->previous.type = PM_TOKEN_MISSING;
13147
13196
  }
13148
13197
 
13149
- /**
13150
- * This function is the same as expect2, but it expects one of three token types.
13151
- */
13152
- static void
13153
- expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154
- if (accept3(parser, type1, type2, type3)) return;
13155
-
13156
- const uint8_t *location = parser->previous.end;
13157
- pm_parser_err(parser, location, location, diag_id);
13158
-
13159
- parser->previous.start = location;
13160
- parser->previous.type = PM_TOKEN_MISSING;
13161
- }
13162
-
13163
13198
  /**
13164
13199
  * A special expect1 that expects a heredoc terminator and handles popping the
13165
13200
  * lex mode accordingly.
@@ -13501,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
13501
13536
  return (pm_node_t *) pm_index_target_node_create(parser, call);
13502
13537
  }
13503
13538
  }
13504
- /* fallthrough */
13539
+ PRISM_FALLTHROUGH
13505
13540
  default:
13506
13541
  // In this case we have a node that we don't know how to convert
13507
13542
  // into a target. We need to treat it as an error. For now, we'll
@@ -13583,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13583
13618
  case PM_BACK_REFERENCE_READ_NODE:
13584
13619
  case PM_NUMBERED_REFERENCE_READ_NODE:
13585
13620
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13586
- /* fallthrough */
13621
+ PRISM_FALLTHROUGH
13587
13622
  case PM_GLOBAL_VARIABLE_READ_NODE: {
13588
13623
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13589
13624
  pm_node_destroy(parser, target);
@@ -13710,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13710
13745
 
13711
13746
  // Replace the name with "[]=".
13712
13747
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
+
13749
+ // Ensure that the arguments for []= don't contain keywords
13750
+ pm_index_arguments_check(parser, call->arguments, call->block);
13713
13751
  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714
13752
 
13715
13753
  return target;
@@ -13722,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13722
13760
  // is no way for us to attach it to the tree at this point.
13723
13761
  pm_node_destroy(parser, value);
13724
13762
  }
13725
- /* fallthrough */
13763
+ PRISM_FALLTHROUGH
13726
13764
  default:
13727
13765
  // In this case we have a node that we don't know how to convert into a
13728
13766
  // target. We need to treat it as an error. For now, we'll mark it as an
@@ -14191,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14191
14229
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14192
14230
  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14193
14231
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14232
+ if (parsed_bare_hash) {
14233
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14234
+ }
14194
14235
  } else {
14195
14236
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14196
14237
 
@@ -14239,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14239
14280
  }
14240
14281
  }
14241
14282
  }
14242
- /* fallthrough */
14283
+ PRISM_FALLTHROUGH
14243
14284
  default: {
14244
14285
  if (argument == NULL) {
14245
14286
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14482,6 +14523,7 @@ parse_parameters(
14482
14523
  bool allows_trailing_comma,
14483
14524
  bool allows_forwarding_parameters,
14484
14525
  bool accepts_blocks_in_defaults,
14526
+ bool in_block,
14485
14527
  uint16_t depth
14486
14528
  ) {
14487
14529
  pm_do_loop_stack_push(parser, false);
@@ -14646,7 +14688,7 @@ parse_parameters(
14646
14688
  break;
14647
14689
  }
14648
14690
  case PM_TOKEN_LABEL: {
14649
- if (!uses_parentheses) parser->in_keyword_arg = true;
14691
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14650
14692
  update_parameter_state(parser, &parser->current, &order);
14651
14693
 
14652
14694
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -15115,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15115
15157
  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15116
15158
  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15117
15159
  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15118
- default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15160
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15119
15161
  }
15120
15162
 
15121
15163
  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15210,6 +15252,7 @@ parse_block_parameters(
15210
15252
  allows_trailing_comma,
15211
15253
  false,
15212
15254
  accepts_blocks_in_defaults,
15255
+ true,
15213
15256
  (uint16_t) (depth + 1)
15214
15257
  );
15215
15258
  }
@@ -16125,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
16125
16168
  case PM_TOKEN_TILDE:
16126
16169
  case PM_TOKEN_BANG:
16127
16170
  if (name->end[-1] == '@') return name->end - 1;
16128
- /* fallthrough */
16171
+ PRISM_FALLTHROUGH
16129
16172
  default:
16130
16173
  return name->end;
16131
16174
  }
@@ -16381,14 +16424,15 @@ static pm_node_t *
16381
16424
  parse_variable(pm_parser_t *parser) {
16382
16425
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16383
16426
  int depth;
16427
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16384
16428
 
16385
- if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16429
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16386
16430
  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16387
16431
  }
16388
16432
 
16389
16433
  pm_scope_t *current_scope = parser->current_scope;
16390
16434
  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16391
- if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16435
+ if (is_numbered_param) {
16392
16436
  // When you use a numbered parameter, it implies the existence of
16393
16437
  // all of the locals that exist before it. For example, referencing
16394
16438
  // _2 means that _1 must exist. Therefore here we loop through all
@@ -17096,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17096
17140
  break;
17097
17141
  }
17098
17142
  }
17099
- /* fallthrough */
17143
+ PRISM_FALLTHROUGH
17100
17144
  default: {
17101
17145
  // If we get anything else, then this is an error. For this we'll
17102
17146
  // create a missing node for the value and create an assoc node for
@@ -17592,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17592
17636
  break;
17593
17637
  }
17594
17638
  }
17595
- /* fallthrough */
17639
+ PRISM_FALLTHROUGH
17596
17640
  default:
17597
17641
  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17598
17642
  break;
@@ -17614,7 +17658,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17614
17658
  // Gather up all of the patterns into the list.
17615
17659
  while (accept1(parser, PM_TOKEN_COMMA)) {
17616
17660
  // Break early here in case we have a trailing comma.
17617
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
17661
+ if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17618
17662
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17619
17663
  pm_node_list_append(&nodes, node);
17620
17664
  trailing_rest = true;
@@ -18722,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
18766
  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18723
18767
  }
18724
18768
  }
18725
- /* fallthrough */
18769
+ PRISM_FALLTHROUGH
18726
18770
  default:
18727
18771
  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18728
18772
  }
@@ -19213,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19213
19257
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19214
19258
  parser_lex(parser);
19215
19259
 
19260
+ // This will be false if the method name is not a valid identifier
19261
+ // but could be followed by an operator.
19262
+ bool valid_name = true;
19263
+
19216
19264
  switch (parser->current.type) {
19217
19265
  case PM_CASE_OPERATOR:
19218
19266
  pm_parser_scope_push(parser, true);
@@ -19242,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19242
19290
 
19243
19291
  break;
19244
19292
  }
19245
- case PM_TOKEN_CONSTANT:
19246
19293
  case PM_TOKEN_INSTANCE_VARIABLE:
19247
19294
  case PM_TOKEN_CLASS_VARIABLE:
19248
19295
  case PM_TOKEN_GLOBAL_VARIABLE:
19296
+ valid_name = false;
19297
+ PRISM_FALLTHROUGH
19298
+ case PM_TOKEN_CONSTANT:
19249
19299
  case PM_TOKEN_KEYWORD_NIL:
19250
19300
  case PM_TOKEN_KEYWORD_SELF:
19251
19301
  case PM_TOKEN_KEYWORD_TRUE:
@@ -19303,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19303
19353
 
19304
19354
  name = parse_method_definition_name(parser);
19305
19355
  } else {
19356
+ if (!valid_name) {
19357
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19358
+ }
19359
+
19306
19360
  name = identifier;
19307
19361
  }
19308
19362
  break;
@@ -19353,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19353
19407
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19354
19408
  params = NULL;
19355
19409
  } else {
19356
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19410
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19357
19411
  }
19358
19412
 
19359
19413
  lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19378,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19378
19432
 
19379
19433
  lparen = not_provided(parser);
19380
19434
  rparen = not_provided(parser);
19381
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19435
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19382
19436
 
19383
19437
  context_pop(parser);
19384
19438
  break;
@@ -19787,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19787
19841
  pm_do_loop_stack_pop(parser);
19788
19842
  context_pop(parser);
19789
19843
 
19790
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19791
- pm_statements_node_t *statements = NULL;
19844
+ pm_token_t do_keyword;
19845
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19846
+ do_keyword = parser->previous;
19847
+ } else {
19848
+ do_keyword = not_provided(parser);
19849
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19850
+ }
19792
19851
 
19852
+ pm_statements_node_t *statements = NULL;
19793
19853
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19794
19854
  pm_accepts_block_stack_push(parser, true);
19795
19855
  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19800,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19800
19860
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19801
19861
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19802
19862
 
19803
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19863
+ return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19804
19864
  }
19805
19865
  case PM_TOKEN_KEYWORD_WHILE: {
19806
19866
  size_t opening_newline_index = token_newline_index(parser);
@@ -19815,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19815
19875
  pm_do_loop_stack_pop(parser);
19816
19876
  context_pop(parser);
19817
19877
 
19818
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19819
- pm_statements_node_t *statements = NULL;
19878
+ pm_token_t do_keyword;
19879
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19880
+ do_keyword = parser->previous;
19881
+ } else {
19882
+ do_keyword = not_provided(parser);
19883
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19884
+ }
19820
19885
 
19886
+ pm_statements_node_t *statements = NULL;
19821
19887
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19822
19888
  pm_accepts_block_stack_push(parser, true);
19823
19889
  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19828,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19828
19894
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19829
19895
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19830
19896
 
19831
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19897
+ return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19832
19898
  }
19833
19899
  case PM_TOKEN_PERCENT_LOWER_I: {
19834
19900
  parser_lex(parser);
@@ -20898,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20898
20964
  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20899
20965
  }
20900
20966
  }
20901
- /* fallthrough */
20967
+ PRISM_FALLTHROUGH
20902
20968
  case PM_CASE_WRITABLE: {
20903
20969
  parser_lex(parser);
20904
20970
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20944,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20944
21010
  case PM_BACK_REFERENCE_READ_NODE:
20945
21011
  case PM_NUMBERED_REFERENCE_READ_NODE:
20946
21012
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20947
- /* fallthrough */
21013
+ PRISM_FALLTHROUGH
20948
21014
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20949
21015
  parser_lex(parser);
20950
21016
 
@@ -21062,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21062
21128
  case PM_BACK_REFERENCE_READ_NODE:
21063
21129
  case PM_NUMBERED_REFERENCE_READ_NODE:
21064
21130
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21065
- /* fallthrough */
21131
+ PRISM_FALLTHROUGH
21066
21132
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21067
21133
  parser_lex(parser);
21068
21134
 
@@ -21190,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21190
21256
  case PM_BACK_REFERENCE_READ_NODE:
21191
21257
  case PM_NUMBERED_REFERENCE_READ_NODE:
21192
21258
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21193
- /* fallthrough */
21259
+ PRISM_FALLTHROUGH
21194
21260
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21195
21261
  parser_lex(parser);
21196
21262
 
@@ -21400,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21400
21466
  case PM_TOKEN_STAR:
21401
21467
  case PM_TOKEN_STAR_STAR: {
21402
21468
  parser_lex(parser);
21469
+ pm_token_t operator = parser->previous;
21470
+ switch (PM_NODE_TYPE(node)) {
21471
+ case PM_RESCUE_MODIFIER_NODE: {
21472
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21473
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21474
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21475
+ }
21476
+ break;
21477
+ }
21478
+ case PM_AND_NODE: {
21479
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21480
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21481
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21482
+ }
21483
+ break;
21484
+ }
21485
+ case PM_OR_NODE: {
21486
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21487
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21488
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21489
+ }
21490
+ break;
21491
+ }
21492
+ default:
21493
+ break;
21494
+ }
21495
+
21403
21496
  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21404
21497
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21405
21498
  }
@@ -21427,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21427
21520
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21428
21521
  }
21429
21522
 
21523
+ switch (PM_NODE_TYPE(node)) {
21524
+ case PM_RESCUE_MODIFIER_NODE: {
21525
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21526
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21527
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21528
+ }
21529
+ break;
21530
+ }
21531
+ case PM_AND_NODE: {
21532
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21533
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21534
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21535
+ }
21536
+ break;
21537
+ }
21538
+ case PM_OR_NODE: {
21539
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21540
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21541
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21542
+ }
21543
+ break;
21544
+ }
21545
+ default:
21546
+ break;
21547
+ }
21548
+
21430
21549
  pm_token_t message;
21431
21550
 
21432
21551
  switch (parser->current.type) {
@@ -21774,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21774
21893
  if (pm_symbol_node_label_p(node)) {
21775
21894
  return node;
21776
21895
  }
21896
+ break;
21777
21897
  default:
21778
21898
  break;
21779
21899
  }
@@ -22643,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
22643
22763
  }
22644
22764
 
22645
22765
  #endif
22766
+
22767
+ /******************************************************************************/
22768
+ /* Slice queries for the Ruby API */
22769
+ /******************************************************************************/
22770
+
22771
+ /** The category of slice returned from pm_slice_type. */
22772
+ typedef enum {
22773
+ /** Returned when the given encoding name is invalid. */
22774
+ PM_SLICE_TYPE_ERROR = -1,
22775
+
22776
+ /** Returned when no other types apply to the slice. */
22777
+ PM_SLICE_TYPE_NONE,
22778
+
22779
+ /** Returned when the slice is a valid local variable name. */
22780
+ PM_SLICE_TYPE_LOCAL,
22781
+
22782
+ /** Returned when the slice is a valid constant name. */
22783
+ PM_SLICE_TYPE_CONSTANT,
22784
+
22785
+ /** Returned when the slice is a valid method name. */
22786
+ PM_SLICE_TYPE_METHOD_NAME
22787
+ } pm_slice_type_t;
22788
+
22789
+ /**
22790
+ * Check that the slice is a valid local variable name or constant.
22791
+ */
22792
+ pm_slice_type_t
22793
+ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22794
+ // first, get the right encoding object
22795
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22796
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22797
+
22798
+ // check that there is at least one character
22799
+ if (length == 0) return PM_SLICE_TYPE_NONE;
22800
+
22801
+ size_t width;
22802
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22803
+ // valid because alphabetical
22804
+ } else if (*source == '_') {
22805
+ // valid because underscore
22806
+ width = 1;
22807
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22808
+ // valid because multibyte
22809
+ } else {
22810
+ // invalid because no match
22811
+ return PM_SLICE_TYPE_NONE;
22812
+ }
22813
+
22814
+ // determine the type of the slice based on the first character
22815
+ const uint8_t *end = source + length;
22816
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22817
+
22818
+ // next, iterate through all of the bytes of the string to ensure that they
22819
+ // are all valid identifier characters
22820
+ source += width;
22821
+
22822
+ while (source < end) {
22823
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
22824
+ // valid because alphanumeric
22825
+ source += width;
22826
+ } else if (*source == '_') {
22827
+ // valid because underscore
22828
+ source++;
22829
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22830
+ // valid because multibyte
22831
+ source += width;
22832
+ } else {
22833
+ // invalid because no match
22834
+ break;
22835
+ }
22836
+ }
22837
+
22838
+ // accept a ! or ? at the end of the slice as a method name
22839
+ if (*source == '!' || *source == '?' || *source == '=') {
22840
+ source++;
22841
+ result = PM_SLICE_TYPE_METHOD_NAME;
22842
+ }
22843
+
22844
+ // valid if we are at the end of the slice
22845
+ return source == end ? result : PM_SLICE_TYPE_NONE;
22846
+ }
22847
+
22848
+ /**
22849
+ * Check that the slice is a valid local variable name.
22850
+ */
22851
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22852
+ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22853
+ switch (pm_slice_type(source, length, encoding_name)) {
22854
+ case PM_SLICE_TYPE_ERROR:
22855
+ return PM_STRING_QUERY_ERROR;
22856
+ case PM_SLICE_TYPE_NONE:
22857
+ case PM_SLICE_TYPE_CONSTANT:
22858
+ case PM_SLICE_TYPE_METHOD_NAME:
22859
+ return PM_STRING_QUERY_FALSE;
22860
+ case PM_SLICE_TYPE_LOCAL:
22861
+ return PM_STRING_QUERY_TRUE;
22862
+ }
22863
+
22864
+ assert(false && "unreachable");
22865
+ return PM_STRING_QUERY_FALSE;
22866
+ }
22867
+
22868
+ /**
22869
+ * Check that the slice is a valid constant name.
22870
+ */
22871
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22872
+ pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22873
+ switch (pm_slice_type(source, length, encoding_name)) {
22874
+ case PM_SLICE_TYPE_ERROR:
22875
+ return PM_STRING_QUERY_ERROR;
22876
+ case PM_SLICE_TYPE_NONE:
22877
+ case PM_SLICE_TYPE_LOCAL:
22878
+ case PM_SLICE_TYPE_METHOD_NAME:
22879
+ return PM_STRING_QUERY_FALSE;
22880
+ case PM_SLICE_TYPE_CONSTANT:
22881
+ return PM_STRING_QUERY_TRUE;
22882
+ }
22883
+
22884
+ assert(false && "unreachable");
22885
+ return PM_STRING_QUERY_FALSE;
22886
+ }
22887
+
22888
+ /**
22889
+ * Check that the slice is a valid method name.
22890
+ */
22891
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22892
+ pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22893
+ #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22894
+ #define C1(c) (*source == c)
22895
+ #define C2(s) (memcmp(source, s, 2) == 0)
22896
+ #define C3(s) (memcmp(source, s, 3) == 0)
22897
+
22898
+ switch (pm_slice_type(source, length, encoding_name)) {
22899
+ case PM_SLICE_TYPE_ERROR:
22900
+ return PM_STRING_QUERY_ERROR;
22901
+ case PM_SLICE_TYPE_NONE:
22902
+ break;
22903
+ case PM_SLICE_TYPE_LOCAL:
22904
+ // numbered parameters are not valid method names
22905
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22906
+ case PM_SLICE_TYPE_CONSTANT:
22907
+ // all constants are valid method names
22908
+ case PM_SLICE_TYPE_METHOD_NAME:
22909
+ // all method names are valid method names
22910
+ return PM_STRING_QUERY_TRUE;
22911
+ }
22912
+
22913
+ switch (length) {
22914
+ case 1:
22915
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22916
+ case 2:
22917
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22918
+ case 3:
22919
+ return B(C3("===") || C3("<=>") || C3("[]="));
22920
+ default:
22921
+ return PM_STRING_QUERY_FALSE;
22922
+ }
22923
+
22924
+ #undef B
22925
+ #undef C1
22926
+ #undef C2
22927
+ #undef C3
22928
+ }