yarp 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -8
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +6 -2
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +175 -168
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +143 -142
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
  53. data/lib/{yarp → prism}/node.rb +2012 -2593
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +12 -7
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -614
  66. data/src/diagnostic.c +213 -208
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1293 -1233
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1479 -1479
  77. data/src/{yarp.c → prism.c} +5205 -5083
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1121 -1121
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/{yp_char.c → pm_char.c} +72 -72
  84. data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +70 -61
  95. data/ext/yarp/api_node.c +0 -4728
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1929
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -226
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_string_list.c +0 -29
data/src/regexp.c CHANGED
@@ -1,19 +1,19 @@
1
- #include "yarp/regexp.h"
1
+ #include "prism/regexp.h"
2
2
 
3
3
  // This is the parser that is going to handle parsing regular expressions.
4
4
  typedef struct {
5
5
  const uint8_t *start;
6
6
  const uint8_t *cursor;
7
7
  const uint8_t *end;
8
- yp_string_list_t *named_captures;
8
+ pm_string_list_t *named_captures;
9
9
  bool encoding_changed;
10
- yp_encoding_t *encoding;
11
- } yp_regexp_parser_t;
10
+ pm_encoding_t *encoding;
11
+ } pm_regexp_parser_t;
12
12
 
13
13
  // This initializes a new parser with the given source.
14
14
  static void
15
- yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
16
- *parser = (yp_regexp_parser_t) {
15
+ pm_regexp_parser_init(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
16
+ *parser = (pm_regexp_parser_t) {
17
17
  .start = start,
18
18
  .cursor = start,
19
19
  .end = end,
@@ -25,23 +25,23 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const ui
25
25
 
26
26
  // This appends a new string to the list of named captures.
27
27
  static void
28
- yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
- yp_string_t string;
30
- yp_string_shared_init(&string, start, end);
31
- yp_string_list_append(parser->named_captures, &string);
32
- yp_string_free(&string);
28
+ pm_regexp_parser_named_capture(pm_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
+ pm_string_t string;
30
+ pm_string_shared_init(&string, start, end);
31
+ pm_string_list_append(parser->named_captures, &string);
32
+ pm_string_free(&string);
33
33
  }
34
34
 
35
35
  // Returns true if the next character is the end of the source.
36
36
  static inline bool
37
- yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
37
+ pm_regexp_char_is_eof(pm_regexp_parser_t *parser) {
38
38
  return parser->cursor >= parser->end;
39
39
  }
40
40
 
41
41
  // Optionally accept a char and consume it if it exists.
42
42
  static inline bool
43
- yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
44
- if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
43
+ pm_regexp_char_accept(pm_regexp_parser_t *parser, uint8_t value) {
44
+ if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
45
45
  parser->cursor++;
46
46
  return true;
47
47
  }
@@ -50,8 +50,8 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
50
50
 
51
51
  // Expect a character to be present and consume it.
52
52
  static inline bool
53
- yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
54
- if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
53
+ pm_regexp_char_expect(pm_regexp_parser_t *parser, uint8_t value) {
54
+ if (!pm_regexp_char_is_eof(parser) && *parser->cursor == value) {
55
55
  parser->cursor++;
56
56
  return true;
57
57
  }
@@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
60
60
 
61
61
  // This advances the current token to the next instance of the given character.
62
62
  static bool
63
- yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
64
- if (yp_regexp_char_is_eof(parser)) {
63
+ pm_regexp_char_find(pm_regexp_parser_t *parser, uint8_t value) {
64
+ if (pm_regexp_char_is_eof(parser)) {
65
65
  return false;
66
66
  }
67
67
 
68
- const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
68
+ const uint8_t *end = (const uint8_t *) pm_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
69
69
  if (end == NULL) {
70
70
  return false;
71
71
  }
@@ -106,41 +106,41 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
106
106
  // Note that by the time we've hit this function, the lbrace has already been
107
107
  // consumed so we're in the start state.
108
108
  static bool
109
- yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
109
+ pm_regexp_parse_range_quantifier(pm_regexp_parser_t *parser) {
110
110
  const uint8_t *savepoint = parser->cursor;
111
111
 
112
112
  enum {
113
- YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
114
- YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
115
- YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
116
- YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
117
- } state = YP_REGEXP_RANGE_QUANTIFIER_STATE_START;
113
+ PM_REGEXP_RANGE_QUANTIFIER_STATE_START,
114
+ PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM,
115
+ PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM,
116
+ PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA
117
+ } state = PM_REGEXP_RANGE_QUANTIFIER_STATE_START;
118
118
 
119
119
  while (1) {
120
120
  switch (state) {
121
- case YP_REGEXP_RANGE_QUANTIFIER_STATE_START:
121
+ case PM_REGEXP_RANGE_QUANTIFIER_STATE_START:
122
122
  switch (*parser->cursor) {
123
123
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
124
124
  parser->cursor++;
125
- state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
125
+ state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM;
126
126
  break;
127
127
  case ',':
128
128
  parser->cursor++;
129
- state = YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
129
+ state = PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA;
130
130
  break;
131
131
  default:
132
132
  parser->cursor = savepoint;
133
133
  return true;
134
134
  }
135
135
  break;
136
- case YP_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
136
+ case PM_REGEXP_RANGE_QUANTIFIER_STATE_MINIMUM:
137
137
  switch (*parser->cursor) {
138
138
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
139
139
  parser->cursor++;
140
140
  break;
141
141
  case ',':
142
142
  parser->cursor++;
143
- state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
143
+ state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
144
144
  break;
145
145
  case '}':
146
146
  parser->cursor++;
@@ -150,18 +150,18 @@ yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
150
150
  return true;
151
151
  }
152
152
  break;
153
- case YP_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
153
+ case PM_REGEXP_RANGE_QUANTIFIER_STATE_COMMA:
154
154
  switch (*parser->cursor) {
155
155
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
156
156
  parser->cursor++;
157
- state = YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
157
+ state = PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM;
158
158
  break;
159
159
  default:
160
160
  parser->cursor = savepoint;
161
161
  return true;
162
162
  }
163
163
  break;
164
- case YP_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
164
+ case PM_REGEXP_RANGE_QUANTIFIER_STATE_MAXIMUM:
165
165
  switch (*parser->cursor) {
166
166
  case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
167
167
  parser->cursor++;
@@ -187,7 +187,7 @@ yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
187
187
  // | <empty>
188
188
  // ;
189
189
  static bool
190
- yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
190
+ pm_regexp_parse_quantifier(pm_regexp_parser_t *parser) {
191
191
  switch (*parser->cursor) {
192
192
  case '*':
193
193
  case '+':
@@ -196,7 +196,7 @@ yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
196
196
  return true;
197
197
  case '{':
198
198
  parser->cursor++;
199
- return yp_regexp_parse_range_quantifier(parser);
199
+ return pm_regexp_parse_range_quantifier(parser);
200
200
  default:
201
201
  // In this case there is no quantifier.
202
202
  return true;
@@ -206,37 +206,37 @@ yp_regexp_parse_quantifier(yp_regexp_parser_t *parser) {
206
206
  // match-posix-class : '[' '[' ':' '^'? CHAR+ ':' ']' ']'
207
207
  // ;
208
208
  static bool
209
- yp_regexp_parse_posix_class(yp_regexp_parser_t *parser) {
210
- if (!yp_regexp_char_expect(parser, ':')) {
209
+ pm_regexp_parse_posix_class(pm_regexp_parser_t *parser) {
210
+ if (!pm_regexp_char_expect(parser, ':')) {
211
211
  return false;
212
212
  }
213
213
 
214
- yp_regexp_char_accept(parser, '^');
214
+ pm_regexp_char_accept(parser, '^');
215
215
 
216
216
  return (
217
- yp_regexp_char_find(parser, ':') &&
218
- yp_regexp_char_expect(parser, ']') &&
219
- yp_regexp_char_expect(parser, ']')
217
+ pm_regexp_char_find(parser, ':') &&
218
+ pm_regexp_char_expect(parser, ']') &&
219
+ pm_regexp_char_expect(parser, ']')
220
220
  );
221
221
  }
222
222
 
223
223
  // Forward declaration because character sets can be nested.
224
224
  static bool
225
- yp_regexp_parse_lbracket(yp_regexp_parser_t *parser);
225
+ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser);
226
226
 
227
227
  // match-char-set : '[' '^'? (match-range | match-char)* ']'
228
228
  // ;
229
229
  static bool
230
- yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
231
- yp_regexp_char_accept(parser, '^');
230
+ pm_regexp_parse_character_set(pm_regexp_parser_t *parser) {
231
+ pm_regexp_char_accept(parser, '^');
232
232
 
233
- while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ']') {
233
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ']') {
234
234
  switch (*parser->cursor++) {
235
235
  case '[':
236
- yp_regexp_parse_lbracket(parser);
236
+ pm_regexp_parse_lbracket(parser);
237
237
  break;
238
238
  case '\\':
239
- if (!yp_regexp_char_is_eof(parser)) {
239
+ if (!pm_regexp_char_is_eof(parser)) {
240
240
  parser->cursor++;
241
241
  }
242
242
  break;
@@ -246,78 +246,78 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
246
246
  }
247
247
  }
248
248
 
249
- return yp_regexp_char_expect(parser, ']');
249
+ return pm_regexp_char_expect(parser, ']');
250
250
  }
251
251
 
252
252
  // A left bracket can either mean a POSIX class or a character set.
253
253
  static bool
254
- yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
254
+ pm_regexp_parse_lbracket(pm_regexp_parser_t *parser) {
255
255
  const uint8_t *reset = parser->cursor;
256
256
 
257
257
  if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
258
258
  parser->cursor++;
259
- if (yp_regexp_parse_posix_class(parser)) return true;
259
+ if (pm_regexp_parse_posix_class(parser)) return true;
260
260
 
261
261
  parser->cursor = reset;
262
262
  }
263
263
 
264
- return yp_regexp_parse_character_set(parser);
264
+ return pm_regexp_parse_character_set(parser);
265
265
  }
266
266
 
267
267
  // Forward declaration here since parsing groups needs to go back up the grammar
268
268
  // to parse expressions within them.
269
269
  static bool
270
- yp_regexp_parse_expression(yp_regexp_parser_t *parser);
270
+ pm_regexp_parse_expression(pm_regexp_parser_t *parser);
271
271
 
272
272
  // These are the states of the options that are configurable on the regular
273
273
  // expression (or from within a group).
274
274
  typedef enum {
275
- YP_REGEXP_OPTION_STATE_INVALID,
276
- YP_REGEXP_OPTION_STATE_TOGGLEABLE,
277
- YP_REGEXP_OPTION_STATE_ADDABLE,
278
- YP_REGEXP_OPTION_STATE_ADDED,
279
- YP_REGEXP_OPTION_STATE_REMOVED
280
- } yp_regexp_option_state_t;
275
+ PM_REGEXP_OPTION_STATE_INVALID,
276
+ PM_REGEXP_OPTION_STATE_TOGGLEABLE,
277
+ PM_REGEXP_OPTION_STATE_ADDABLE,
278
+ PM_REGEXP_OPTION_STATE_ADDED,
279
+ PM_REGEXP_OPTION_STATE_REMOVED
280
+ } pm_regexp_option_state_t;
281
281
 
282
282
  // These are the options that are configurable on the regular expression (or
283
283
  // from within a group).
284
- #define YP_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
285
- #define YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
286
- #define YP_REGEXP_OPTION_STATE_SLOTS (YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
284
+ #define PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM 'a'
285
+ #define PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM 'x'
286
+ #define PRISM_REGEXP_OPTION_STATE_SLOTS (PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM + 1)
287
287
 
288
288
  // This is the set of options that are configurable on the regular expression.
289
289
  typedef struct {
290
- uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
291
- } yp_regexp_options_t;
290
+ uint8_t values[PRISM_REGEXP_OPTION_STATE_SLOTS];
291
+ } pm_regexp_options_t;
292
292
 
293
293
  // Initialize a new set of options to their default values.
294
294
  static void
295
- yp_regexp_options_init(yp_regexp_options_t *options) {
296
- memset(options, YP_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * YP_REGEXP_OPTION_STATE_SLOTS);
297
- options->values['i' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
298
- options->values['m' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
299
- options->values['x' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_TOGGLEABLE;
300
- options->values['d' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
301
- options->values['a' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
302
- options->values['u' - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM] = YP_REGEXP_OPTION_STATE_ADDABLE;
295
+ pm_regexp_options_init(pm_regexp_options_t *options) {
296
+ memset(options, PM_REGEXP_OPTION_STATE_INVALID, sizeof(uint8_t) * PRISM_REGEXP_OPTION_STATE_SLOTS);
297
+ options->values['i' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
298
+ options->values['m' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
299
+ options->values['x' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_TOGGLEABLE;
300
+ options->values['d' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
301
+ options->values['a' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
302
+ options->values['u' - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM] = PM_REGEXP_OPTION_STATE_ADDABLE;
303
303
  }
304
304
 
305
305
  // Attempt to add the given option to the set of options. Returns true if it was
306
306
  // added, false if it was already present.
307
307
  static bool
308
- yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
309
- if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
310
- key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
308
+ pm_regexp_options_add(pm_regexp_options_t *options, uint8_t key) {
309
+ if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
310
+ key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
311
311
 
312
312
  switch (options->values[key]) {
313
- case YP_REGEXP_OPTION_STATE_INVALID:
314
- case YP_REGEXP_OPTION_STATE_REMOVED:
313
+ case PM_REGEXP_OPTION_STATE_INVALID:
314
+ case PM_REGEXP_OPTION_STATE_REMOVED:
315
315
  return false;
316
- case YP_REGEXP_OPTION_STATE_TOGGLEABLE:
317
- case YP_REGEXP_OPTION_STATE_ADDABLE:
318
- options->values[key] = YP_REGEXP_OPTION_STATE_ADDED;
316
+ case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
317
+ case PM_REGEXP_OPTION_STATE_ADDABLE:
318
+ options->values[key] = PM_REGEXP_OPTION_STATE_ADDED;
319
319
  return true;
320
- case YP_REGEXP_OPTION_STATE_ADDED:
320
+ case PM_REGEXP_OPTION_STATE_ADDED:
321
321
  return true;
322
322
  }
323
323
  }
@@ -328,18 +328,18 @@ yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
328
328
  // Attempt to remove the given option from the set of options. Returns true if
329
329
  // it was removed, false if it was already absent.
330
330
  static bool
331
- yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
332
- if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
333
- key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
331
+ pm_regexp_options_remove(pm_regexp_options_t *options, uint8_t key) {
332
+ if (key >= PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= PRISM_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
333
+ key = (uint8_t) (key - PRISM_REGEXP_OPTION_STATE_SLOT_MINIMUM);
334
334
 
335
335
  switch (options->values[key]) {
336
- case YP_REGEXP_OPTION_STATE_INVALID:
337
- case YP_REGEXP_OPTION_STATE_ADDABLE:
336
+ case PM_REGEXP_OPTION_STATE_INVALID:
337
+ case PM_REGEXP_OPTION_STATE_ADDABLE:
338
338
  return false;
339
- case YP_REGEXP_OPTION_STATE_TOGGLEABLE:
340
- case YP_REGEXP_OPTION_STATE_ADDED:
341
- case YP_REGEXP_OPTION_STATE_REMOVED:
342
- options->values[key] = YP_REGEXP_OPTION_STATE_REMOVED;
339
+ case PM_REGEXP_OPTION_STATE_TOGGLEABLE:
340
+ case PM_REGEXP_OPTION_STATE_ADDED:
341
+ case PM_REGEXP_OPTION_STATE_REMOVED:
342
+ options->values[key] = PM_REGEXP_OPTION_STATE_REMOVED;
343
343
  return true;
344
344
  }
345
345
  }
@@ -368,14 +368,14 @@ yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
368
368
  // * (?imxdau-imx:subexp) - turn on and off configuration for an expression
369
369
  //
370
370
  static bool
371
- yp_regexp_parse_group(yp_regexp_parser_t *parser) {
371
+ pm_regexp_parse_group(pm_regexp_parser_t *parser) {
372
372
  // First, parse any options for the group.
373
- if (yp_regexp_char_accept(parser, '?')) {
374
- if (yp_regexp_char_is_eof(parser)) {
373
+ if (pm_regexp_char_accept(parser, '?')) {
374
+ if (pm_regexp_char_is_eof(parser)) {
375
375
  return false;
376
376
  }
377
- yp_regexp_options_t options;
378
- yp_regexp_options_init(&options);
377
+ pm_regexp_options_t options;
378
+ pm_regexp_options_init(&options);
379
379
 
380
380
  switch (*parser->cursor) {
381
381
  case '#': { // inline comments
@@ -403,10 +403,10 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
403
403
  // Here we can take the fast path and use memchr to find the
404
404
  // next ) because we are safe checking backward for \ since
405
405
  // it cannot be a trailing character.
406
- bool found = yp_regexp_char_find(parser, ')');
406
+ bool found = pm_regexp_char_find(parser, ')');
407
407
 
408
408
  while (found && (parser->start <= parser->cursor - 2) && (*(parser->cursor - 2) == '\\')) {
409
- found = yp_regexp_char_find(parser, ')');
409
+ found = pm_regexp_char_find(parser, ')');
410
410
  }
411
411
 
412
412
  return found;
@@ -421,7 +421,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
421
421
  break;
422
422
  case '<':
423
423
  parser->cursor++;
424
- if (yp_regexp_char_is_eof(parser)) {
424
+ if (pm_regexp_char_is_eof(parser)) {
425
425
  return false;
426
426
  }
427
427
 
@@ -432,37 +432,37 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
432
432
  break;
433
433
  default: { // named capture group
434
434
  const uint8_t *start = parser->cursor;
435
- if (!yp_regexp_char_find(parser, '>')) {
435
+ if (!pm_regexp_char_find(parser, '>')) {
436
436
  return false;
437
437
  }
438
- yp_regexp_parser_named_capture(parser, start, parser->cursor - 1);
438
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
439
439
  break;
440
440
  }
441
441
  }
442
442
  break;
443
443
  case '\'': { // named capture group
444
444
  const uint8_t *start = ++parser->cursor;
445
- if (!yp_regexp_char_find(parser, '\'')) {
445
+ if (!pm_regexp_char_find(parser, '\'')) {
446
446
  return false;
447
447
  }
448
448
 
449
- yp_regexp_parser_named_capture(parser, start, parser->cursor - 1);
449
+ pm_regexp_parser_named_capture(parser, start, parser->cursor - 1);
450
450
  break;
451
451
  }
452
452
  case '(': // conditional expression
453
- if (!yp_regexp_char_find(parser, ')')) {
453
+ if (!pm_regexp_char_find(parser, ')')) {
454
454
  return false;
455
455
  }
456
456
  break;
457
457
  case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
458
- while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
459
- if (!yp_regexp_options_add(&options, *parser->cursor)) {
458
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
459
+ if (!pm_regexp_options_add(&options, *parser->cursor)) {
460
460
  return false;
461
461
  }
462
462
  parser->cursor++;
463
463
  }
464
464
 
465
- if (yp_regexp_char_is_eof(parser)) {
465
+ if (pm_regexp_char_is_eof(parser)) {
466
466
  return false;
467
467
  }
468
468
 
@@ -473,14 +473,14 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
473
473
  /* fallthrough */
474
474
  case '-':
475
475
  parser->cursor++;
476
- while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
477
- if (!yp_regexp_options_remove(&options, *parser->cursor)) {
476
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
477
+ if (!pm_regexp_options_remove(&options, *parser->cursor)) {
478
478
  return false;
479
479
  }
480
480
  parser->cursor++;
481
481
  }
482
482
 
483
- if (yp_regexp_char_is_eof(parser)) {
483
+ if (pm_regexp_char_is_eof(parser)) {
484
484
  return false;
485
485
  }
486
486
  break;
@@ -490,15 +490,15 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
490
490
  }
491
491
 
492
492
  // Now, parse the expressions within this group.
493
- while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ')') {
494
- if (!yp_regexp_parse_expression(parser)) {
493
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')') {
494
+ if (!pm_regexp_parse_expression(parser)) {
495
495
  return false;
496
496
  }
497
- yp_regexp_char_accept(parser, '|');
497
+ pm_regexp_char_accept(parser, '|');
498
498
  }
499
499
 
500
500
  // Finally, make sure we have a closing parenthesis.
501
- return yp_regexp_char_expect(parser, ')');
501
+ return pm_regexp_char_expect(parser, ')');
502
502
  }
503
503
 
504
504
  // item : anchor
@@ -512,35 +512,35 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
512
512
  // | quantified
513
513
  // ;
514
514
  static bool
515
- yp_regexp_parse_item(yp_regexp_parser_t *parser) {
515
+ pm_regexp_parse_item(pm_regexp_parser_t *parser) {
516
516
  switch (*parser->cursor++) {
517
517
  case '^':
518
518
  case '$':
519
519
  return true;
520
520
  case '\\':
521
- if (!yp_regexp_char_is_eof(parser)) {
521
+ if (!pm_regexp_char_is_eof(parser)) {
522
522
  parser->cursor++;
523
523
  }
524
- return yp_regexp_parse_quantifier(parser);
524
+ return pm_regexp_parse_quantifier(parser);
525
525
  case '(':
526
- return yp_regexp_parse_group(parser) && yp_regexp_parse_quantifier(parser);
526
+ return pm_regexp_parse_group(parser) && pm_regexp_parse_quantifier(parser);
527
527
  case '[':
528
- return yp_regexp_parse_lbracket(parser) && yp_regexp_parse_quantifier(parser);
528
+ return pm_regexp_parse_lbracket(parser) && pm_regexp_parse_quantifier(parser);
529
529
  default:
530
- return yp_regexp_parse_quantifier(parser);
530
+ return pm_regexp_parse_quantifier(parser);
531
531
  }
532
532
  }
533
533
 
534
534
  // expression : item+
535
535
  // ;
536
536
  static bool
537
- yp_regexp_parse_expression(yp_regexp_parser_t *parser) {
538
- if (!yp_regexp_parse_item(parser)) {
537
+ pm_regexp_parse_expression(pm_regexp_parser_t *parser) {
538
+ if (!pm_regexp_parse_item(parser)) {
539
539
  return false;
540
540
  }
541
541
 
542
- while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
543
- if (!yp_regexp_parse_item(parser)) {
542
+ while (!pm_regexp_char_is_eof(parser) && *parser->cursor != ')' && *parser->cursor != '|') {
543
+ if (!pm_regexp_parse_item(parser)) {
544
544
  return false;
545
545
  }
546
546
  }
@@ -553,28 +553,28 @@ yp_regexp_parse_expression(yp_regexp_parser_t *parser) {
553
553
  // | expression '|' pattern
554
554
  // ;
555
555
  static bool
556
- yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
556
+ pm_regexp_parse_pattern(pm_regexp_parser_t *parser) {
557
557
  return (
558
558
  (
559
559
  // Exit early if the pattern is empty.
560
- yp_regexp_char_is_eof(parser) ||
560
+ pm_regexp_char_is_eof(parser) ||
561
561
  // Parse the first expression in the pattern.
562
- yp_regexp_parse_expression(parser)
562
+ pm_regexp_parse_expression(parser)
563
563
  ) &&
564
564
  (
565
565
  // Return now if we've parsed the entire pattern.
566
- yp_regexp_char_is_eof(parser) ||
566
+ pm_regexp_char_is_eof(parser) ||
567
567
  // Otherwise, we should have a pipe character.
568
- (yp_regexp_char_expect(parser, '|') && yp_regexp_parse_pattern(parser))
568
+ (pm_regexp_char_expect(parser, '|') && pm_regexp_parse_pattern(parser))
569
569
  )
570
570
  );
571
571
  }
572
572
 
573
573
  // Parse a regular expression and extract the names of all of the named capture
574
574
  // groups.
575
- YP_EXPORTED_FUNCTION bool
576
- yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
577
- yp_regexp_parser_t parser;
578
- yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
579
- return yp_regexp_parse_pattern(&parser);
575
+ PRISM_EXPORTED_FUNCTION bool
576
+ pm_regexp_named_capture_group_names(const uint8_t *source, size_t size, pm_string_list_t *named_captures, bool encoding_changed, pm_encoding_t *encoding) {
577
+ pm_regexp_parser_t parser;
578
+ pm_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
579
+ return pm_regexp_parse_pattern(&parser);
580
580
  }