yarp 0.8.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/regexp.c CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  // This is the parser that is going to handle parsing regular expressions.
4
4
  typedef struct {
5
- const char *start;
6
- const char *cursor;
7
- const char *end;
5
+ const uint8_t *start;
6
+ const uint8_t *cursor;
7
+ const uint8_t *end;
8
8
  yp_string_list_t *named_captures;
9
9
  bool encoding_changed;
10
10
  yp_encoding_t *encoding;
@@ -12,7 +12,7 @@ typedef struct {
12
12
 
13
13
  // This initializes a new parser with the given source.
14
14
  static void
15
- yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
15
+ yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
16
16
  *parser = (yp_regexp_parser_t) {
17
17
  .start = start,
18
18
  .cursor = start,
@@ -25,7 +25,7 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char
25
25
 
26
26
  // This appends a new string to the list of named captures.
27
27
  static void
28
- yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const char *start, const char *end) {
28
+ yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
29
  yp_string_t string;
30
30
  yp_string_shared_init(&string, start, end);
31
31
  yp_string_list_append(parser->named_captures, &string);
@@ -40,7 +40,7 @@ yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
40
40
 
41
41
  // Optionally accept a char and consume it if it exists.
42
42
  static inline bool
43
- yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
43
+ yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
44
44
  if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
45
45
  parser->cursor++;
46
46
  return true;
@@ -50,7 +50,7 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
50
50
 
51
51
  // Expect a character to be present and consume it.
52
52
  static inline bool
53
- yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
53
+ yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
54
54
  if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
55
55
  parser->cursor++;
56
56
  return true;
@@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
60
60
 
61
61
  // This advances the current token to the next instance of the given character.
62
62
  static bool
63
- yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
63
+ yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
64
64
  if (yp_regexp_char_is_eof(parser)) {
65
65
  return false;
66
66
  }
67
67
 
68
- const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
68
+ const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
69
69
  if (end == NULL) {
70
70
  return false;
71
71
  }
@@ -107,7 +107,7 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
107
107
  // consumed so we're in the start state.
108
108
  static bool
109
109
  yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
110
- const char *savepoint = parser->cursor;
110
+ const uint8_t *savepoint = parser->cursor;
111
111
 
112
112
  enum {
113
113
  YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
@@ -252,7 +252,7 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
252
252
  // A left bracket can either mean a POSIX class or a character set.
253
253
  static bool
254
254
  yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
255
- const char *reset = parser->cursor;
255
+ const uint8_t *reset = parser->cursor;
256
256
 
257
257
  if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
258
258
  parser->cursor++;
@@ -287,7 +287,7 @@ typedef enum {
287
287
 
288
288
  // This is the set of options that are configurable on the regular expression.
289
289
  typedef struct {
290
- unsigned char values[YP_REGEXP_OPTION_STATE_SLOTS];
290
+ uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
291
291
  } yp_regexp_options_t;
292
292
 
293
293
  // Initialize a new set of options to their default values.
@@ -305,9 +305,9 @@ yp_regexp_options_init(yp_regexp_options_t *options) {
305
305
  // Attempt to add the given option to the set of options. Returns true if it was
306
306
  // added, false if it was already present.
307
307
  static bool
308
- yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
308
+ yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
309
309
  if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
310
- key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
310
+ key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
311
311
 
312
312
  switch (options->values[key]) {
313
313
  case YP_REGEXP_OPTION_STATE_INVALID:
@@ -328,9 +328,9 @@ yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
328
328
  // Attempt to remove the given option from the set of options. Returns true if
329
329
  // it was removed, false if it was already absent.
330
330
  static bool
331
- yp_regexp_options_remove(yp_regexp_options_t *options, unsigned char key) {
331
+ yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
332
332
  if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
333
- key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
333
+ key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
334
334
 
335
335
  switch (options->values[key]) {
336
336
  case YP_REGEXP_OPTION_STATE_INVALID:
@@ -431,7 +431,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
431
431
  parser->cursor++;
432
432
  break;
433
433
  default: { // named capture group
434
- const char *start = parser->cursor;
434
+ const uint8_t *start = parser->cursor;
435
435
  if (!yp_regexp_char_find(parser, '>')) {
436
436
  return false;
437
437
  }
@@ -441,7 +441,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
441
441
  }
442
442
  break;
443
443
  case '\'': { // named capture group
444
- const char *start = ++parser->cursor;
444
+ const uint8_t *start = ++parser->cursor;
445
445
  if (!yp_regexp_char_find(parser, '\'')) {
446
446
  return false;
447
447
  }
@@ -456,7 +456,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
456
456
  break;
457
457
  case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
458
458
  while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
459
- if (!yp_regexp_options_add(&options, (unsigned char) *parser->cursor)) {
459
+ if (!yp_regexp_options_add(&options, *parser->cursor)) {
460
460
  return false;
461
461
  }
462
462
  parser->cursor++;
@@ -474,7 +474,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
474
474
  case '-':
475
475
  parser->cursor++;
476
476
  while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
477
- if (!yp_regexp_options_remove(&options, (unsigned char) *parser->cursor)) {
477
+ if (!yp_regexp_options_remove(&options, *parser->cursor)) {
478
478
  return false;
479
479
  }
480
480
  parser->cursor++;
@@ -573,7 +573,7 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
573
573
  // Parse a regular expression and extract the names of all of the named capture
574
574
  // groups.
575
575
  YP_EXPORTED_FUNCTION bool
576
- yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
576
+ yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
577
577
  yp_regexp_parser_t parser;
578
578
  yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
579
579
  return yp_regexp_parse_pattern(&parser);