prism 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/src/prism.c CHANGED
@@ -4142,7 +4142,7 @@ pm_double_parse(pm_parser_t *parser, const pm_token_t *token) {
4142
4142
 
4143
4143
  // If errno is set, then it should only be ERANGE. At this point we need to
4144
4144
  // check if it's infinity (it should be).
4145
- if (errno == ERANGE && isinf(value)) {
4145
+ if (errno == ERANGE && PRISM_ISINF(value)) {
4146
4146
  int warn_width;
4147
4147
  const char *ellipsis;
4148
4148
 
@@ -7684,7 +7684,7 @@ pm_loop_modifier_block_exits(pm_parser_t *parser, pm_statements_node_t *statemen
7684
7684
  * Allocate a new UntilNode node.
7685
7685
  */
7686
7686
  static pm_until_node_t *
7687
- pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7687
+ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7688
7688
  pm_until_node_t *node = PM_NODE_ALLOC(parser, pm_until_node_t);
7689
7689
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7690
7690
 
@@ -7699,6 +7699,7 @@ pm_until_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7699
7699
  },
7700
7700
  },
7701
7701
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7702
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7702
7703
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7703
7704
  .predicate = predicate,
7704
7705
  .statements = statements
@@ -7727,6 +7728,7 @@ pm_until_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7727
7728
  },
7728
7729
  },
7729
7730
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7731
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7730
7732
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7731
7733
  .predicate = predicate,
7732
7734
  .statements = statements
@@ -7794,7 +7796,7 @@ pm_when_node_statements_set(pm_when_node_t *node, pm_statements_node_t *statemen
7794
7796
  * Allocate a new WhileNode node.
7795
7797
  */
7796
7798
  static pm_while_node_t *
7797
- pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7799
+ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_token_t *do_keyword, const pm_token_t *closing, pm_node_t *predicate, pm_statements_node_t *statements, pm_node_flags_t flags) {
7798
7800
  pm_while_node_t *node = PM_NODE_ALLOC(parser, pm_while_node_t);
7799
7801
  pm_conditional_predicate(parser, predicate, PM_CONDITIONAL_PREDICATE_TYPE_CONDITIONAL);
7800
7802
 
@@ -7809,6 +7811,7 @@ pm_while_node_create(pm_parser_t *parser, const pm_token_t *keyword, const pm_to
7809
7811
  },
7810
7812
  },
7811
7813
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7814
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(do_keyword),
7812
7815
  .closing_loc = PM_OPTIONAL_LOCATION_TOKEN_VALUE(closing),
7813
7816
  .predicate = predicate,
7814
7817
  .statements = statements
@@ -7837,6 +7840,7 @@ pm_while_node_modifier_create(pm_parser_t *parser, const pm_token_t *keyword, pm
7837
7840
  },
7838
7841
  },
7839
7842
  .keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword),
7843
+ .do_keyword_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7840
7844
  .closing_loc = PM_OPTIONAL_LOCATION_NOT_PROVIDED_VALUE,
7841
7845
  .predicate = predicate,
7842
7846
  .statements = statements
@@ -7859,6 +7863,7 @@ pm_while_node_synthesized_create(pm_parser_t *parser, pm_node_t *predicate, pm_s
7859
7863
  .location = PM_LOCATION_NULL_VALUE(parser)
7860
7864
  },
7861
7865
  .keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7866
+ .do_keyword_loc = PM_LOCATION_NULL_VALUE(parser),
7862
7867
  .closing_loc = PM_LOCATION_NULL_VALUE(parser),
7863
7868
  .predicate = predicate,
7864
7869
  .statements = statements
@@ -9105,7 +9110,7 @@ lex_global_variable(pm_parser_t *parser) {
9105
9110
  case '-':
9106
9111
  parser->current.end++;
9107
9112
  allow_multiple = false;
9108
- /* fallthrough */
9113
+ PRISM_FALLTHROUGH
9109
9114
  default: {
9110
9115
  size_t width;
9111
9116
 
@@ -10041,8 +10046,8 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
10041
10046
  escape_write_byte_encoded(parser, buffer, escape_byte('\n', flags));
10042
10047
  return;
10043
10048
  }
10049
+ PRISM_FALLTHROUGH
10044
10050
  }
10045
- /* fallthrough */
10046
10051
  default: {
10047
10052
  if (parser->current.end < parser->end) {
10048
10053
  escape_write_escape_encoded(parser, buffer);
@@ -10496,6 +10501,7 @@ pm_token_buffer_escape(pm_parser_t *parser, pm_token_buffer_t *token_buffer) {
10496
10501
  }
10497
10502
 
10498
10503
  const uint8_t *end = parser->current.end - 1;
10504
+ assert(end >= start);
10499
10505
  pm_buffer_append_bytes(&token_buffer->buffer, start, (size_t) (end - start));
10500
10506
 
10501
10507
  token_buffer->cursor = end;
@@ -10576,9 +10582,15 @@ pm_lex_percent_delimiter(pm_parser_t *parser) {
10576
10582
  pm_newline_list_append(&parser->newline_list, parser->current.end + eol_length - 1);
10577
10583
  }
10578
10584
 
10579
- const uint8_t delimiter = *parser->current.end;
10580
- parser->current.end += eol_length;
10585
+ uint8_t delimiter = *parser->current.end;
10586
+
10587
+ // If our delimiter is \r\n, we want to treat it as if it's \n.
10588
+ // For example, %\r\nfoo\r\n should be "foo"
10589
+ if (eol_length == 2) {
10590
+ delimiter = *(parser->current.end + 1);
10591
+ }
10581
10592
 
10593
+ parser->current.end += eol_length;
10582
10594
  return delimiter;
10583
10595
  }
10584
10596
 
@@ -10688,6 +10700,14 @@ parser_lex(pm_parser_t *parser) {
10688
10700
  // We'll check if we're at the end of the file. If we are, then we
10689
10701
  // need to return the EOF token.
10690
10702
  if (parser->current.end >= parser->end) {
10703
+ // If we hit EOF, but the EOF came immediately after a newline,
10704
+ // set the start of the token to the newline. This way any EOF
10705
+ // errors will be reported as happening on that line rather than
10706
+ // a line after. For example "foo(\n" should report an error
10707
+ // on line 1 even though EOF technically occurs on line 2.
10708
+ if (parser->current.start > parser->start && (*(parser->current.start - 1) == '\n')) {
10709
+ parser->current.start -= 1;
10710
+ }
10691
10711
  LEX(PM_TOKEN_EOF);
10692
10712
  }
10693
10713
 
@@ -10730,7 +10750,7 @@ parser_lex(pm_parser_t *parser) {
10730
10750
 
10731
10751
  lexed_comment = true;
10732
10752
  }
10733
- /* fallthrough */
10753
+ PRISM_FALLTHROUGH
10734
10754
  case '\r':
10735
10755
  case '\n': {
10736
10756
  parser->semantic_token_seen = semantic_token_seen & 0x1;
@@ -10772,7 +10792,7 @@ parser_lex(pm_parser_t *parser) {
10772
10792
  parser->current.type = PM_TOKEN_NEWLINE;
10773
10793
  return;
10774
10794
  }
10775
- /* fallthrough */
10795
+ PRISM_FALLTHROUGH
10776
10796
  case PM_IGNORED_NEWLINE_ALL:
10777
10797
  if (!lexed_comment) parser_lex_ignored_newline(parser);
10778
10798
  lexed_comment = false;
@@ -10869,6 +10889,10 @@ parser_lex(pm_parser_t *parser) {
10869
10889
 
10870
10890
  // ,
10871
10891
  case ',':
10892
+ if ((parser->previous.type == PM_TOKEN_COMMA) && (parser->enclosure_nesting > 0)) {
10893
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_ARRAY_TERM, pm_token_type_human(parser->current.type));
10894
+ }
10895
+
10872
10896
  lex_state_set(parser, PM_LEX_STATE_BEG | PM_LEX_STATE_LABEL);
10873
10897
  LEX(PM_TOKEN_COMMA);
10874
10898
 
@@ -11783,7 +11807,7 @@ parser_lex(pm_parser_t *parser) {
11783
11807
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "escaped carriage return");
11784
11808
  break;
11785
11809
  }
11786
- /* fallthrough */
11810
+ PRISM_FALLTHROUGH
11787
11811
  default:
11788
11812
  PM_PARSER_ERR_TOKEN_FORMAT(parser, parser->current, PM_ERR_UNEXPECTED_TOKEN_IGNORE, "backslash");
11789
11813
  break;
@@ -11980,7 +12004,7 @@ parser_lex(pm_parser_t *parser) {
11980
12004
  pm_token_buffer_push_byte(&token_buffer, '\r');
11981
12005
  break;
11982
12006
  }
11983
- /* fallthrough */
12007
+ PRISM_FALLTHROUGH
11984
12008
  case '\n':
11985
12009
  pm_token_buffer_push_byte(&token_buffer, '\n');
11986
12010
 
@@ -12084,9 +12108,28 @@ parser_lex(pm_parser_t *parser) {
12084
12108
  pm_regexp_token_buffer_t token_buffer = { 0 };
12085
12109
 
12086
12110
  while (breakpoint != NULL) {
12111
+ uint8_t term = lex_mode->as.regexp.terminator;
12112
+ bool is_terminator = (*breakpoint == term);
12113
+
12114
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12115
+ // For example: `%\nfoo\r\n`
12116
+ // The string should be "foo", not "foo\r"
12117
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12118
+ if (term == '\n') {
12119
+ is_terminator = true;
12120
+ }
12121
+
12122
+ // If the terminator is a CR, but we see a CRLF, we need to
12123
+ // treat the CRLF as a newline, meaning this is _not_ the
12124
+ // terminator
12125
+ if (term == '\r') {
12126
+ is_terminator = false;
12127
+ }
12128
+ }
12129
+
12087
12130
  // If we hit the terminator, we need to determine what kind of
12088
12131
  // token to return.
12089
- if (*breakpoint == lex_mode->as.regexp.terminator) {
12132
+ if (is_terminator) {
12090
12133
  if (lex_mode->as.regexp.nesting > 0) {
12091
12134
  parser->current.end = breakpoint + 1;
12092
12135
  breakpoint = pm_strpbrk(parser, parser->current.end, breakpoints, parser->end - parser->current.end, false);
@@ -12148,7 +12191,7 @@ parser_lex(pm_parser_t *parser) {
12148
12191
  pm_regexp_token_buffer_escape(parser, &token_buffer);
12149
12192
  token_buffer.base.cursor = breakpoint;
12150
12193
 
12151
- /* fallthrough */
12194
+ PRISM_FALLTHROUGH
12152
12195
  case '\n':
12153
12196
  // If we've hit a newline, then we need to track that in
12154
12197
  // the list of newlines.
@@ -12190,7 +12233,7 @@ parser_lex(pm_parser_t *parser) {
12190
12233
  pm_token_buffer_push_byte(&token_buffer.base, '\r');
12191
12234
  break;
12192
12235
  }
12193
- /* fallthrough */
12236
+ PRISM_FALLTHROUGH
12194
12237
  case '\n':
12195
12238
  if (parser->heredoc_end) {
12196
12239
  // ... if we are on the same line as a heredoc,
@@ -12316,10 +12359,29 @@ parser_lex(pm_parser_t *parser) {
12316
12359
  continue;
12317
12360
  }
12318
12361
 
12362
+ uint8_t term = lex_mode->as.string.terminator;
12363
+ bool is_terminator = (*breakpoint == term);
12364
+
12365
+ // If the terminator is newline, we need to consider \r\n _also_ a newline
12366
+ // For example: `%r\nfoo\r\n`
12367
+ // The string should be /foo/, not /foo\r/
12368
+ if (*breakpoint == '\r' && peek_at(parser, breakpoint + 1) == '\n') {
12369
+ if (term == '\n') {
12370
+ is_terminator = true;
12371
+ }
12372
+
12373
+ // If the terminator is a CR, but we see a CRLF, we need to
12374
+ // treat the CRLF as a newline, meaning this is _not_ the
12375
+ // terminator
12376
+ if (term == '\r') {
12377
+ is_terminator = false;
12378
+ }
12379
+ }
12380
+
12319
12381
  // Note that we have to check the terminator here first because we could
12320
12382
  // potentially be parsing a % string that has a # character as the
12321
12383
  // terminator.
12322
- if (*breakpoint == lex_mode->as.string.terminator) {
12384
+ if (is_terminator) {
12323
12385
  // If this terminator doesn't actually close the string, then we need
12324
12386
  // to continue on past it.
12325
12387
  if (lex_mode->as.string.nesting > 0) {
@@ -12379,7 +12441,7 @@ parser_lex(pm_parser_t *parser) {
12379
12441
  pm_token_buffer_escape(parser, &token_buffer);
12380
12442
  token_buffer.cursor = breakpoint;
12381
12443
 
12382
- /* fallthrough */
12444
+ PRISM_FALLTHROUGH
12383
12445
  case '\n':
12384
12446
  // When we hit a newline, we need to flush any potential
12385
12447
  // heredocs. Note that this has to happen after we check
@@ -12424,7 +12486,7 @@ parser_lex(pm_parser_t *parser) {
12424
12486
  pm_token_buffer_push_byte(&token_buffer, '\r');
12425
12487
  break;
12426
12488
  }
12427
- /* fallthrough */
12489
+ PRISM_FALLTHROUGH
12428
12490
  case '\n':
12429
12491
  if (!lex_mode->as.string.interpolation) {
12430
12492
  pm_token_buffer_push_byte(&token_buffer, '\\');
@@ -12632,7 +12694,7 @@ parser_lex(pm_parser_t *parser) {
12632
12694
  pm_token_buffer_escape(parser, &token_buffer);
12633
12695
  token_buffer.cursor = breakpoint;
12634
12696
 
12635
- /* fallthrough */
12697
+ PRISM_FALLTHROUGH
12636
12698
  case '\n': {
12637
12699
  if (parser->heredoc_end != NULL && (parser->heredoc_end > breakpoint)) {
12638
12700
  parser_flush_heredoc_end(parser);
@@ -12732,7 +12794,7 @@ parser_lex(pm_parser_t *parser) {
12732
12794
  pm_token_buffer_push_byte(&token_buffer, '\r');
12733
12795
  break;
12734
12796
  }
12735
- /* fallthrough */
12797
+ PRISM_FALLTHROUGH
12736
12798
  case '\n':
12737
12799
  pm_token_buffer_push_byte(&token_buffer, '\\');
12738
12800
  pm_token_buffer_push_byte(&token_buffer, '\n');
@@ -12752,7 +12814,7 @@ parser_lex(pm_parser_t *parser) {
12752
12814
  pm_token_buffer_push_byte(&token_buffer, '\r');
12753
12815
  break;
12754
12816
  }
12755
- /* fallthrough */
12817
+ PRISM_FALLTHROUGH
12756
12818
  case '\n':
12757
12819
  // If we are in a tilde here, we should
12758
12820
  // break out of the loop and return the
@@ -13044,14 +13106,6 @@ match4(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13044
13106
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4);
13045
13107
  }
13046
13108
 
13047
- /**
13048
- * Returns true if the current token is any of the six given types.
13049
- */
13050
- static inline bool
13051
- match6(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6) {
13052
- return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6);
13053
- }
13054
-
13055
13109
  /**
13056
13110
  * Returns true if the current token is any of the seven given types.
13057
13111
  */
@@ -13068,6 +13122,14 @@ match8(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2,
13068
13122
  return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8);
13069
13123
  }
13070
13124
 
13125
+ /**
13126
+ * Returns true if the current token is any of the nine given types.
13127
+ */
13128
+ static inline bool
13129
+ match9(const pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_token_type_t type4, pm_token_type_t type5, pm_token_type_t type6, pm_token_type_t type7, pm_token_type_t type8, pm_token_type_t type9) {
13130
+ return match1(parser, type1) || match1(parser, type2) || match1(parser, type3) || match1(parser, type4) || match1(parser, type5) || match1(parser, type6) || match1(parser, type7) || match1(parser, type8) || match1(parser, type9);
13131
+ }
13132
+
13071
13133
  /**
13072
13134
  * If the current token is of the specified type, lex forward by one token and
13073
13135
  * return true. Otherwise, return false. For example:
@@ -13096,19 +13158,6 @@ accept2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2) {
13096
13158
  return false;
13097
13159
  }
13098
13160
 
13099
- /**
13100
- * If the current token is any of the three given types, lex forward by one
13101
- * token and return true. Otherwise return false.
13102
- */
13103
- static inline bool
13104
- accept3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3) {
13105
- if (match3(parser, type1, type2, type3)) {
13106
- parser_lex(parser);
13107
- return true;
13108
- }
13109
- return false;
13110
- }
13111
-
13112
13161
  /**
13113
13162
  * This function indicates that the parser expects a token in a specific
13114
13163
  * position. For example, if you're parsing a BEGIN block, you know that a { is
@@ -13146,20 +13195,6 @@ expect2(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_di
13146
13195
  parser->previous.type = PM_TOKEN_MISSING;
13147
13196
  }
13148
13197
 
13149
- /**
13150
- * This function is the same as expect2, but it expects one of three token types.
13151
- */
13152
- static void
13153
- expect3(pm_parser_t *parser, pm_token_type_t type1, pm_token_type_t type2, pm_token_type_t type3, pm_diagnostic_id_t diag_id) {
13154
- if (accept3(parser, type1, type2, type3)) return;
13155
-
13156
- const uint8_t *location = parser->previous.end;
13157
- pm_parser_err(parser, location, location, diag_id);
13158
-
13159
- parser->previous.start = location;
13160
- parser->previous.type = PM_TOKEN_MISSING;
13161
- }
13162
-
13163
13198
  /**
13164
13199
  * A special expect1 that expects a heredoc terminator and handles popping the
13165
13200
  * lex mode accordingly.
@@ -13501,7 +13536,7 @@ parse_target(pm_parser_t *parser, pm_node_t *target, bool multiple, bool splat_p
13501
13536
  return (pm_node_t *) pm_index_target_node_create(parser, call);
13502
13537
  }
13503
13538
  }
13504
- /* fallthrough */
13539
+ PRISM_FALLTHROUGH
13505
13540
  default:
13506
13541
  // In this case we have a node that we don't know how to convert
13507
13542
  // into a target. We need to treat it as an error. For now, we'll
@@ -13583,7 +13618,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13583
13618
  case PM_BACK_REFERENCE_READ_NODE:
13584
13619
  case PM_NUMBERED_REFERENCE_READ_NODE:
13585
13620
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, target, PM_ERR_WRITE_TARGET_READONLY);
13586
- /* fallthrough */
13621
+ PRISM_FALLTHROUGH
13587
13622
  case PM_GLOBAL_VARIABLE_READ_NODE: {
13588
13623
  pm_global_variable_write_node_t *node = pm_global_variable_write_node_create(parser, target, operator, value);
13589
13624
  pm_node_destroy(parser, target);
@@ -13710,6 +13745,9 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13710
13745
 
13711
13746
  // Replace the name with "[]=".
13712
13747
  call->name = pm_parser_constant_id_constant(parser, "[]=", 3);
13748
+
13749
+ // Ensure that the arguments for []= don't contain keywords
13750
+ pm_index_arguments_check(parser, call->arguments, call->block);
13713
13751
  pm_node_flag_set((pm_node_t *) call, PM_CALL_NODE_FLAGS_ATTRIBUTE_WRITE | pm_implicit_array_write_flags(value, PM_CALL_NODE_FLAGS_IMPLICIT_ARRAY));
13714
13752
 
13715
13753
  return target;
@@ -13722,7 +13760,7 @@ parse_write(pm_parser_t *parser, pm_node_t *target, pm_token_t *operator, pm_nod
13722
13760
  // is no way for us to attach it to the tree at this point.
13723
13761
  pm_node_destroy(parser, value);
13724
13762
  }
13725
- /* fallthrough */
13763
+ PRISM_FALLTHROUGH
13726
13764
  default:
13727
13765
  // In this case we have a node that we don't know how to convert into a
13728
13766
  // target. We need to treat it as an error. For now, we'll mark it as an
@@ -14191,6 +14229,9 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14191
14229
  if (match4(parser, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_COMMA, PM_TOKEN_SEMICOLON, PM_TOKEN_BRACKET_RIGHT)) {
14192
14230
  pm_parser_scope_forwarding_positionals_check(parser, &operator);
14193
14231
  argument = (pm_node_t *) pm_splat_node_create(parser, &operator, NULL);
14232
+ if (parsed_bare_hash) {
14233
+ pm_parser_err_previous(parser, PM_ERR_ARGUMENT_SPLAT_AFTER_ASSOC_SPLAT);
14234
+ }
14194
14235
  } else {
14195
14236
  pm_node_t *expression = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_SPLAT, (uint16_t) (depth + 1));
14196
14237
 
@@ -14239,7 +14280,7 @@ parse_arguments(pm_parser_t *parser, pm_arguments_t *arguments, bool accepts_for
14239
14280
  }
14240
14281
  }
14241
14282
  }
14242
- /* fallthrough */
14283
+ PRISM_FALLTHROUGH
14243
14284
  default: {
14244
14285
  if (argument == NULL) {
14245
14286
  argument = parse_value_expression(parser, PM_BINDING_POWER_DEFINED, !parsed_first_argument, true, PM_ERR_EXPECT_ARGUMENT, (uint16_t) (depth + 1));
@@ -14482,6 +14523,7 @@ parse_parameters(
14482
14523
  bool allows_trailing_comma,
14483
14524
  bool allows_forwarding_parameters,
14484
14525
  bool accepts_blocks_in_defaults,
14526
+ bool in_block,
14485
14527
  uint16_t depth
14486
14528
  ) {
14487
14529
  pm_do_loop_stack_push(parser, false);
@@ -14646,7 +14688,7 @@ parse_parameters(
14646
14688
  break;
14647
14689
  }
14648
14690
  case PM_TOKEN_LABEL: {
14649
- if (!uses_parentheses) parser->in_keyword_arg = true;
14691
+ if (!uses_parentheses && !in_block) parser->in_keyword_arg = true;
14650
14692
  update_parameter_state(parser, &parser->current, &order);
14651
14693
 
14652
14694
  context_push(parser, PM_CONTEXT_DEFAULT_PARAMS);
@@ -15115,7 +15157,7 @@ parse_rescues(pm_parser_t *parser, size_t opening_newline_index, const pm_token_
15115
15157
  case PM_RESCUES_LAMBDA: context = PM_CONTEXT_LAMBDA_ELSE; break;
15116
15158
  case PM_RESCUES_MODULE: context = PM_CONTEXT_MODULE_ELSE; break;
15117
15159
  case PM_RESCUES_SCLASS: context = PM_CONTEXT_SCLASS_ELSE; break;
15118
- default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_RESCUE; break;
15160
+ default: assert(false && "unreachable"); context = PM_CONTEXT_BEGIN_ELSE; break;
15119
15161
  }
15120
15162
 
15121
15163
  else_statements = parse_statements(parser, context, (uint16_t) (depth + 1));
@@ -15210,6 +15252,7 @@ parse_block_parameters(
15210
15252
  allows_trailing_comma,
15211
15253
  false,
15212
15254
  accepts_blocks_in_defaults,
15255
+ true,
15213
15256
  (uint16_t) (depth + 1)
15214
15257
  );
15215
15258
  }
@@ -16125,7 +16168,7 @@ parse_operator_symbol_name(const pm_token_t *name) {
16125
16168
  case PM_TOKEN_TILDE:
16126
16169
  case PM_TOKEN_BANG:
16127
16170
  if (name->end[-1] == '@') return name->end - 1;
16128
- /* fallthrough */
16171
+ PRISM_FALLTHROUGH
16129
16172
  default:
16130
16173
  return name->end;
16131
16174
  }
@@ -16381,14 +16424,15 @@ static pm_node_t *
16381
16424
  parse_variable(pm_parser_t *parser) {
16382
16425
  pm_constant_id_t name_id = pm_parser_constant_id_token(parser, &parser->previous);
16383
16426
  int depth;
16427
+ bool is_numbered_param = pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end);
16384
16428
 
16385
- if ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1) {
16429
+ if (!is_numbered_param && ((depth = pm_parser_local_depth_constant_id(parser, name_id)) != -1)) {
16386
16430
  return (pm_node_t *) pm_local_variable_read_node_create_constant_id(parser, &parser->previous, name_id, (uint32_t) depth, false);
16387
16431
  }
16388
16432
 
16389
16433
  pm_scope_t *current_scope = parser->current_scope;
16390
16434
  if (!current_scope->closed && !(current_scope->parameters & PM_SCOPE_PARAMETERS_IMPLICIT_DISALLOWED)) {
16391
- if (pm_token_is_numbered_parameter(parser->previous.start, parser->previous.end)) {
16435
+ if (is_numbered_param) {
16392
16436
  // When you use a numbered parameter, it implies the existence of
16393
16437
  // all of the locals that exist before it. For example, referencing
16394
16438
  // _2 means that _1 must exist. Therefore here we loop through all
@@ -17096,7 +17140,7 @@ parse_pattern_hash(pm_parser_t *parser, pm_constant_id_list_t *captures, pm_node
17096
17140
  break;
17097
17141
  }
17098
17142
  }
17099
- /* fallthrough */
17143
+ PRISM_FALLTHROUGH
17100
17144
  default: {
17101
17145
  // If we get anything else, then this is an error. For this we'll
17102
17146
  // create a missing node for the value and create an assoc node for
@@ -17592,7 +17636,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17592
17636
  break;
17593
17637
  }
17594
17638
  }
17595
- /* fallthrough */
17639
+ PRISM_FALLTHROUGH
17596
17640
  default:
17597
17641
  node = parse_pattern_primitives(parser, captures, NULL, diag_id, (uint16_t) (depth + 1));
17598
17642
  break;
@@ -17614,7 +17658,7 @@ parse_pattern(pm_parser_t *parser, pm_constant_id_list_t *captures, uint8_t flag
17614
17658
  // Gather up all of the patterns into the list.
17615
17659
  while (accept1(parser, PM_TOKEN_COMMA)) {
17616
17660
  // Break early here in case we have a trailing comma.
17617
- if (match6(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF)) {
17661
+ if (match9(parser, PM_TOKEN_KEYWORD_THEN, PM_TOKEN_BRACE_RIGHT, PM_TOKEN_BRACKET_RIGHT, PM_TOKEN_PARENTHESIS_RIGHT, PM_TOKEN_SEMICOLON, PM_TOKEN_NEWLINE, PM_TOKEN_EOF,PM_TOKEN_KEYWORD_AND, PM_TOKEN_KEYWORD_OR)) {
17618
17662
  node = (pm_node_t *) pm_implicit_rest_node_create(parser, &parser->previous);
17619
17663
  pm_node_list_append(&nodes, node);
17620
17664
  trailing_rest = true;
@@ -18722,7 +18766,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
18722
18766
  pm_parser_err_node(parser, old_name, PM_ERR_ALIAS_ARGUMENT);
18723
18767
  }
18724
18768
  }
18725
- /* fallthrough */
18769
+ PRISM_FALLTHROUGH
18726
18770
  default:
18727
18771
  return (pm_node_t *) pm_alias_method_node_create(parser, &keyword, new_name, old_name);
18728
18772
  }
@@ -19213,6 +19257,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19213
19257
  context_push(parser, PM_CONTEXT_DEF_PARAMS);
19214
19258
  parser_lex(parser);
19215
19259
 
19260
+ // This will be false if the method name is not a valid identifier
19261
+ // but could be followed by an operator.
19262
+ bool valid_name = true;
19263
+
19216
19264
  switch (parser->current.type) {
19217
19265
  case PM_CASE_OPERATOR:
19218
19266
  pm_parser_scope_push(parser, true);
@@ -19242,10 +19290,12 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19242
19290
 
19243
19291
  break;
19244
19292
  }
19245
- case PM_TOKEN_CONSTANT:
19246
19293
  case PM_TOKEN_INSTANCE_VARIABLE:
19247
19294
  case PM_TOKEN_CLASS_VARIABLE:
19248
19295
  case PM_TOKEN_GLOBAL_VARIABLE:
19296
+ valid_name = false;
19297
+ PRISM_FALLTHROUGH
19298
+ case PM_TOKEN_CONSTANT:
19249
19299
  case PM_TOKEN_KEYWORD_NIL:
19250
19300
  case PM_TOKEN_KEYWORD_SELF:
19251
19301
  case PM_TOKEN_KEYWORD_TRUE:
@@ -19303,6 +19353,10 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19303
19353
 
19304
19354
  name = parse_method_definition_name(parser);
19305
19355
  } else {
19356
+ if (!valid_name) {
19357
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, identifier, PM_ERR_DEF_NAME, pm_token_type_human(identifier.type));
19358
+ }
19359
+
19306
19360
  name = identifier;
19307
19361
  }
19308
19362
  break;
@@ -19353,7 +19407,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19353
19407
  if (match1(parser, PM_TOKEN_PARENTHESIS_RIGHT)) {
19354
19408
  params = NULL;
19355
19409
  } else {
19356
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, (uint16_t) (depth + 1));
19410
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, true, false, true, true, false, (uint16_t) (depth + 1));
19357
19411
  }
19358
19412
 
19359
19413
  lex_state_set(parser, PM_LEX_STATE_BEG);
@@ -19378,7 +19432,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19378
19432
 
19379
19433
  lparen = not_provided(parser);
19380
19434
  rparen = not_provided(parser);
19381
- params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, (uint16_t) (depth + 1));
19435
+ params = parse_parameters(parser, PM_BINDING_POWER_DEFINED, false, false, true, true, false, (uint16_t) (depth + 1));
19382
19436
 
19383
19437
  context_pop(parser);
19384
19438
  break;
@@ -19787,9 +19841,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19787
19841
  pm_do_loop_stack_pop(parser);
19788
19842
  context_pop(parser);
19789
19843
 
19790
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19791
- pm_statements_node_t *statements = NULL;
19844
+ pm_token_t do_keyword;
19845
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19846
+ do_keyword = parser->previous;
19847
+ } else {
19848
+ do_keyword = not_provided(parser);
19849
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_UNTIL_PREDICATE);
19850
+ }
19792
19851
 
19852
+ pm_statements_node_t *statements = NULL;
19793
19853
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19794
19854
  pm_accepts_block_stack_push(parser, true);
19795
19855
  statements = parse_statements(parser, PM_CONTEXT_UNTIL, (uint16_t) (depth + 1));
@@ -19800,7 +19860,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19800
19860
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19801
19861
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_UNTIL_TERM);
19802
19862
 
19803
- return (pm_node_t *) pm_until_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19863
+ return (pm_node_t *) pm_until_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19804
19864
  }
19805
19865
  case PM_TOKEN_KEYWORD_WHILE: {
19806
19866
  size_t opening_newline_index = token_newline_index(parser);
@@ -19815,9 +19875,15 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19815
19875
  pm_do_loop_stack_pop(parser);
19816
19876
  context_pop(parser);
19817
19877
 
19818
- expect3(parser, PM_TOKEN_KEYWORD_DO_LOOP, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19819
- pm_statements_node_t *statements = NULL;
19878
+ pm_token_t do_keyword;
19879
+ if (accept1(parser, PM_TOKEN_KEYWORD_DO_LOOP)) {
19880
+ do_keyword = parser->previous;
19881
+ } else {
19882
+ do_keyword = not_provided(parser);
19883
+ expect2(parser, PM_TOKEN_NEWLINE, PM_TOKEN_SEMICOLON, PM_ERR_CONDITIONAL_WHILE_PREDICATE);
19884
+ }
19820
19885
 
19886
+ pm_statements_node_t *statements = NULL;
19821
19887
  if (!match1(parser, PM_TOKEN_KEYWORD_END)) {
19822
19888
  pm_accepts_block_stack_push(parser, true);
19823
19889
  statements = parse_statements(parser, PM_CONTEXT_WHILE, (uint16_t) (depth + 1));
@@ -19828,7 +19894,7 @@ parse_expression_prefix(pm_parser_t *parser, pm_binding_power_t binding_power, b
19828
19894
  parser_warn_indentation_mismatch(parser, opening_newline_index, &keyword, false, false);
19829
19895
  expect1(parser, PM_TOKEN_KEYWORD_END, PM_ERR_WHILE_TERM);
19830
19896
 
19831
- return (pm_node_t *) pm_while_node_create(parser, &keyword, &parser->previous, predicate, statements, 0);
19897
+ return (pm_node_t *) pm_while_node_create(parser, &keyword, &do_keyword, &parser->previous, predicate, statements, 0);
19832
19898
  }
19833
19899
  case PM_TOKEN_PERCENT_LOWER_I: {
19834
19900
  parser_lex(parser);
@@ -20898,7 +20964,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20898
20964
  pm_parser_local_add_location(parser, call_node->message_loc.start, call_node->message_loc.end, 0);
20899
20965
  }
20900
20966
  }
20901
- /* fallthrough */
20967
+ PRISM_FALLTHROUGH
20902
20968
  case PM_CASE_WRITABLE: {
20903
20969
  parser_lex(parser);
20904
20970
  pm_node_t *value = parse_assignment_values(parser, previous_binding_power, PM_NODE_TYPE_P(node, PM_MULTI_TARGET_NODE) ? PM_BINDING_POWER_MULTI_ASSIGNMENT + 1 : binding_power, accepts_command_call, PM_ERR_EXPECT_EXPRESSION_AFTER_EQUAL, (uint16_t) (depth + 1));
@@ -20944,7 +21010,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
20944
21010
  case PM_BACK_REFERENCE_READ_NODE:
20945
21011
  case PM_NUMBERED_REFERENCE_READ_NODE:
20946
21012
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
20947
- /* fallthrough */
21013
+ PRISM_FALLTHROUGH
20948
21014
  case PM_GLOBAL_VARIABLE_READ_NODE: {
20949
21015
  parser_lex(parser);
20950
21016
 
@@ -21062,7 +21128,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21062
21128
  case PM_BACK_REFERENCE_READ_NODE:
21063
21129
  case PM_NUMBERED_REFERENCE_READ_NODE:
21064
21130
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21065
- /* fallthrough */
21131
+ PRISM_FALLTHROUGH
21066
21132
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21067
21133
  parser_lex(parser);
21068
21134
 
@@ -21190,7 +21256,7 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21190
21256
  case PM_BACK_REFERENCE_READ_NODE:
21191
21257
  case PM_NUMBERED_REFERENCE_READ_NODE:
21192
21258
  PM_PARSER_ERR_NODE_FORMAT_CONTENT(parser, node, PM_ERR_WRITE_TARGET_READONLY);
21193
- /* fallthrough */
21259
+ PRISM_FALLTHROUGH
21194
21260
  case PM_GLOBAL_VARIABLE_READ_NODE: {
21195
21261
  parser_lex(parser);
21196
21262
 
@@ -21400,6 +21466,33 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21400
21466
  case PM_TOKEN_STAR:
21401
21467
  case PM_TOKEN_STAR_STAR: {
21402
21468
  parser_lex(parser);
21469
+ pm_token_t operator = parser->previous;
21470
+ switch (PM_NODE_TYPE(node)) {
21471
+ case PM_RESCUE_MODIFIER_NODE: {
21472
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21473
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21474
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21475
+ }
21476
+ break;
21477
+ }
21478
+ case PM_AND_NODE: {
21479
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21480
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21481
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21482
+ }
21483
+ break;
21484
+ }
21485
+ case PM_OR_NODE: {
21486
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21487
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21488
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21489
+ }
21490
+ break;
21491
+ }
21492
+ default:
21493
+ break;
21494
+ }
21495
+
21403
21496
  pm_node_t *argument = parse_expression(parser, binding_power, false, false, PM_ERR_EXPECT_EXPRESSION_AFTER_OPERATOR, (uint16_t) (depth + 1));
21404
21497
  return (pm_node_t *) pm_call_node_binary_create(parser, node, &token, argument, 0);
21405
21498
  }
@@ -21427,6 +21520,32 @@ parse_expression_infix(pm_parser_t *parser, pm_node_t *node, pm_binding_power_t
21427
21520
  return (pm_node_t *) pm_call_node_shorthand_create(parser, node, &operator, &arguments);
21428
21521
  }
21429
21522
 
21523
+ switch (PM_NODE_TYPE(node)) {
21524
+ case PM_RESCUE_MODIFIER_NODE: {
21525
+ pm_rescue_modifier_node_t *cast = (pm_rescue_modifier_node_t *) node;
21526
+ if (PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->rescue_expression, PM_MATCH_REQUIRED_NODE)) {
21527
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21528
+ }
21529
+ break;
21530
+ }
21531
+ case PM_AND_NODE: {
21532
+ pm_and_node_t *cast = (pm_and_node_t *) node;
21533
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21534
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21535
+ }
21536
+ break;
21537
+ }
21538
+ case PM_OR_NODE: {
21539
+ pm_or_node_t *cast = (pm_or_node_t *) node;
21540
+ if (PM_NODE_TYPE_P(cast->right, PM_MATCH_PREDICATE_NODE) || PM_NODE_TYPE_P(cast->right, PM_MATCH_REQUIRED_NODE)) {
21541
+ PM_PARSER_ERR_TOKEN_FORMAT(parser, operator, PM_ERR_EXPECT_EOL_AFTER_STATEMENT, pm_token_type_human(operator.type));
21542
+ }
21543
+ break;
21544
+ }
21545
+ default:
21546
+ break;
21547
+ }
21548
+
21430
21549
  pm_token_t message;
21431
21550
 
21432
21551
  switch (parser->current.type) {
@@ -21774,6 +21893,7 @@ parse_expression(pm_parser_t *parser, pm_binding_power_t binding_power, bool acc
21774
21893
  if (pm_symbol_node_label_p(node)) {
21775
21894
  return node;
21776
21895
  }
21896
+ break;
21777
21897
  default:
21778
21898
  break;
21779
21899
  }
@@ -22643,3 +22763,166 @@ pm_serialize_parse_comments(pm_buffer_t *buffer, const uint8_t *source, size_t s
22643
22763
  }
22644
22764
 
22645
22765
  #endif
22766
+
22767
+ /******************************************************************************/
22768
+ /* Slice queries for the Ruby API */
22769
+ /******************************************************************************/
22770
+
22771
+ /** The category of slice returned from pm_slice_type. */
22772
+ typedef enum {
22773
+ /** Returned when the given encoding name is invalid. */
22774
+ PM_SLICE_TYPE_ERROR = -1,
22775
+
22776
+ /** Returned when no other types apply to the slice. */
22777
+ PM_SLICE_TYPE_NONE,
22778
+
22779
+ /** Returned when the slice is a valid local variable name. */
22780
+ PM_SLICE_TYPE_LOCAL,
22781
+
22782
+ /** Returned when the slice is a valid constant name. */
22783
+ PM_SLICE_TYPE_CONSTANT,
22784
+
22785
+ /** Returned when the slice is a valid method name. */
22786
+ PM_SLICE_TYPE_METHOD_NAME
22787
+ } pm_slice_type_t;
22788
+
22789
+ /**
22790
+ * Check that the slice is a valid local variable name or constant.
22791
+ */
22792
+ pm_slice_type_t
22793
+ pm_slice_type(const uint8_t *source, size_t length, const char *encoding_name) {
22794
+ // first, get the right encoding object
22795
+ const pm_encoding_t *encoding = pm_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name)));
22796
+ if (encoding == NULL) return PM_SLICE_TYPE_ERROR;
22797
+
22798
+ // check that there is at least one character
22799
+ if (length == 0) return PM_SLICE_TYPE_NONE;
22800
+
22801
+ size_t width;
22802
+ if ((width = encoding->alpha_char(source, (ptrdiff_t) length)) != 0) {
22803
+ // valid because alphabetical
22804
+ } else if (*source == '_') {
22805
+ // valid because underscore
22806
+ width = 1;
22807
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, (ptrdiff_t) length)) > 0)) {
22808
+ // valid because multibyte
22809
+ } else {
22810
+ // invalid because no match
22811
+ return PM_SLICE_TYPE_NONE;
22812
+ }
22813
+
22814
+ // determine the type of the slice based on the first character
22815
+ const uint8_t *end = source + length;
22816
+ pm_slice_type_t result = encoding->isupper_char(source, end - source) ? PM_SLICE_TYPE_CONSTANT : PM_SLICE_TYPE_LOCAL;
22817
+
22818
+ // next, iterate through all of the bytes of the string to ensure that they
22819
+ // are all valid identifier characters
22820
+ source += width;
22821
+
22822
+ while (source < end) {
22823
+ if ((width = encoding->alnum_char(source, end - source)) != 0) {
22824
+ // valid because alphanumeric
22825
+ source += width;
22826
+ } else if (*source == '_') {
22827
+ // valid because underscore
22828
+ source++;
22829
+ } else if ((*source >= 0x80) && ((width = encoding->char_width(source, end - source)) > 0)) {
22830
+ // valid because multibyte
22831
+ source += width;
22832
+ } else {
22833
+ // invalid because no match
22834
+ break;
22835
+ }
22836
+ }
22837
+
22838
+ // accept a ! or ? at the end of the slice as a method name
22839
+ if (*source == '!' || *source == '?' || *source == '=') {
22840
+ source++;
22841
+ result = PM_SLICE_TYPE_METHOD_NAME;
22842
+ }
22843
+
22844
+ // valid if we are at the end of the slice
22845
+ return source == end ? result : PM_SLICE_TYPE_NONE;
22846
+ }
22847
+
22848
+ /**
22849
+ * Check that the slice is a valid local variable name.
22850
+ */
22851
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22852
+ pm_string_query_local(const uint8_t *source, size_t length, const char *encoding_name) {
22853
+ switch (pm_slice_type(source, length, encoding_name)) {
22854
+ case PM_SLICE_TYPE_ERROR:
22855
+ return PM_STRING_QUERY_ERROR;
22856
+ case PM_SLICE_TYPE_NONE:
22857
+ case PM_SLICE_TYPE_CONSTANT:
22858
+ case PM_SLICE_TYPE_METHOD_NAME:
22859
+ return PM_STRING_QUERY_FALSE;
22860
+ case PM_SLICE_TYPE_LOCAL:
22861
+ return PM_STRING_QUERY_TRUE;
22862
+ }
22863
+
22864
+ assert(false && "unreachable");
22865
+ return PM_STRING_QUERY_FALSE;
22866
+ }
22867
+
22868
+ /**
22869
+ * Check that the slice is a valid constant name.
22870
+ */
22871
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22872
+ pm_string_query_constant(const uint8_t *source, size_t length, const char *encoding_name) {
22873
+ switch (pm_slice_type(source, length, encoding_name)) {
22874
+ case PM_SLICE_TYPE_ERROR:
22875
+ return PM_STRING_QUERY_ERROR;
22876
+ case PM_SLICE_TYPE_NONE:
22877
+ case PM_SLICE_TYPE_LOCAL:
22878
+ case PM_SLICE_TYPE_METHOD_NAME:
22879
+ return PM_STRING_QUERY_FALSE;
22880
+ case PM_SLICE_TYPE_CONSTANT:
22881
+ return PM_STRING_QUERY_TRUE;
22882
+ }
22883
+
22884
+ assert(false && "unreachable");
22885
+ return PM_STRING_QUERY_FALSE;
22886
+ }
22887
+
22888
+ /**
22889
+ * Check that the slice is a valid method name.
22890
+ */
22891
+ PRISM_EXPORTED_FUNCTION pm_string_query_t
22892
+ pm_string_query_method_name(const uint8_t *source, size_t length, const char *encoding_name) {
22893
+ #define B(p) ((p) ? PM_STRING_QUERY_TRUE : PM_STRING_QUERY_FALSE)
22894
+ #define C1(c) (*source == c)
22895
+ #define C2(s) (memcmp(source, s, 2) == 0)
22896
+ #define C3(s) (memcmp(source, s, 3) == 0)
22897
+
22898
+ switch (pm_slice_type(source, length, encoding_name)) {
22899
+ case PM_SLICE_TYPE_ERROR:
22900
+ return PM_STRING_QUERY_ERROR;
22901
+ case PM_SLICE_TYPE_NONE:
22902
+ break;
22903
+ case PM_SLICE_TYPE_LOCAL:
22904
+ // numbered parameters are not valid method names
22905
+ return B((length != 2) || (source[0] != '_') || (source[1] == '0') || !pm_char_is_decimal_digit(source[1]));
22906
+ case PM_SLICE_TYPE_CONSTANT:
22907
+ // all constants are valid method names
22908
+ case PM_SLICE_TYPE_METHOD_NAME:
22909
+ // all method names are valid method names
22910
+ return PM_STRING_QUERY_TRUE;
22911
+ }
22912
+
22913
+ switch (length) {
22914
+ case 1:
22915
+ return B(C1('&') || C1('`') || C1('!') || C1('^') || C1('>') || C1('<') || C1('-') || C1('%') || C1('|') || C1('+') || C1('/') || C1('*') || C1('~'));
22916
+ case 2:
22917
+ return B(C2("!=") || C2("!~") || C2("[]") || C2("==") || C2("=~") || C2(">=") || C2(">>") || C2("<=") || C2("<<") || C2("**"));
22918
+ case 3:
22919
+ return B(C3("===") || C3("<=>") || C3("[]="));
22920
+ default:
22921
+ return PM_STRING_QUERY_FALSE;
22922
+ }
22923
+
22924
+ #undef B
22925
+ #undef C1
22926
+ #undef C2
22927
+ #undef C3
22928
+ }