yarp 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/regexp.c CHANGED
@@ -2,9 +2,9 @@
2
2
 
3
3
  // This is the parser that is going to handle parsing regular expressions.
4
4
  typedef struct {
5
- const char *start;
6
- const char *cursor;
7
- const char *end;
5
+ const uint8_t *start;
6
+ const uint8_t *cursor;
7
+ const uint8_t *end;
8
8
  yp_string_list_t *named_captures;
9
9
  bool encoding_changed;
10
10
  yp_encoding_t *encoding;
@@ -12,7 +12,7 @@ typedef struct {
12
12
 
13
13
  // This initializes a new parser with the given source.
14
14
  static void
15
- yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
15
+ yp_regexp_parser_init(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
16
16
  *parser = (yp_regexp_parser_t) {
17
17
  .start = start,
18
18
  .cursor = start,
@@ -25,7 +25,7 @@ yp_regexp_parser_init(yp_regexp_parser_t *parser, const char *start, const char
25
25
 
26
26
  // This appends a new string to the list of named captures.
27
27
  static void
28
- yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const char *start, const char *end) {
28
+ yp_regexp_parser_named_capture(yp_regexp_parser_t *parser, const uint8_t *start, const uint8_t *end) {
29
29
  yp_string_t string;
30
30
  yp_string_shared_init(&string, start, end);
31
31
  yp_string_list_append(parser->named_captures, &string);
@@ -40,7 +40,7 @@ yp_regexp_char_is_eof(yp_regexp_parser_t *parser) {
40
40
 
41
41
  // Optionally accept a char and consume it if it exists.
42
42
  static inline bool
43
- yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
43
+ yp_regexp_char_accept(yp_regexp_parser_t *parser, uint8_t value) {
44
44
  if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
45
45
  parser->cursor++;
46
46
  return true;
@@ -50,7 +50,7 @@ yp_regexp_char_accept(yp_regexp_parser_t *parser, char value) {
50
50
 
51
51
  // Expect a character to be present and consume it.
52
52
  static inline bool
53
- yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
53
+ yp_regexp_char_expect(yp_regexp_parser_t *parser, uint8_t value) {
54
54
  if (!yp_regexp_char_is_eof(parser) && *parser->cursor == value) {
55
55
  parser->cursor++;
56
56
  return true;
@@ -60,12 +60,12 @@ yp_regexp_char_expect(yp_regexp_parser_t *parser, char value) {
60
60
 
61
61
  // This advances the current token to the next instance of the given character.
62
62
  static bool
63
- yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
63
+ yp_regexp_char_find(yp_regexp_parser_t *parser, uint8_t value) {
64
64
  if (yp_regexp_char_is_eof(parser)) {
65
65
  return false;
66
66
  }
67
67
 
68
- const char *end = (const char *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
68
+ const uint8_t *end = (const uint8_t *) yp_memchr(parser->cursor, value, (size_t) (parser->end - parser->cursor), parser->encoding_changed, parser->encoding);
69
69
  if (end == NULL) {
70
70
  return false;
71
71
  }
@@ -107,7 +107,7 @@ yp_regexp_char_find(yp_regexp_parser_t *parser, char value) {
107
107
  // consumed so we're in the start state.
108
108
  static bool
109
109
  yp_regexp_parse_range_quantifier(yp_regexp_parser_t *parser) {
110
- const char *savepoint = parser->cursor;
110
+ const uint8_t *savepoint = parser->cursor;
111
111
 
112
112
  enum {
113
113
  YP_REGEXP_RANGE_QUANTIFIER_STATE_START,
@@ -252,7 +252,7 @@ yp_regexp_parse_character_set(yp_regexp_parser_t *parser) {
252
252
  // A left bracket can either mean a POSIX class or a character set.
253
253
  static bool
254
254
  yp_regexp_parse_lbracket(yp_regexp_parser_t *parser) {
255
- const char *reset = parser->cursor;
255
+ const uint8_t *reset = parser->cursor;
256
256
 
257
257
  if ((parser->cursor + 2 < parser->end) && parser->cursor[0] == '[' && parser->cursor[1] == ':') {
258
258
  parser->cursor++;
@@ -287,7 +287,7 @@ typedef enum {
287
287
 
288
288
  // This is the set of options that are configurable on the regular expression.
289
289
  typedef struct {
290
- unsigned char values[YP_REGEXP_OPTION_STATE_SLOTS];
290
+ uint8_t values[YP_REGEXP_OPTION_STATE_SLOTS];
291
291
  } yp_regexp_options_t;
292
292
 
293
293
  // Initialize a new set of options to their default values.
@@ -305,9 +305,9 @@ yp_regexp_options_init(yp_regexp_options_t *options) {
305
305
  // Attempt to add the given option to the set of options. Returns true if it was
306
306
  // added, false if it was already present.
307
307
  static bool
308
- yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
308
+ yp_regexp_options_add(yp_regexp_options_t *options, uint8_t key) {
309
309
  if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
310
- key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
310
+ key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
311
311
 
312
312
  switch (options->values[key]) {
313
313
  case YP_REGEXP_OPTION_STATE_INVALID:
@@ -328,9 +328,9 @@ yp_regexp_options_add(yp_regexp_options_t *options, unsigned char key) {
328
328
  // Attempt to remove the given option from the set of options. Returns true if
329
329
  // it was removed, false if it was already absent.
330
330
  static bool
331
- yp_regexp_options_remove(yp_regexp_options_t *options, unsigned char key) {
331
+ yp_regexp_options_remove(yp_regexp_options_t *options, uint8_t key) {
332
332
  if (key >= YP_REGEXP_OPTION_STATE_SLOT_MINIMUM && key <= YP_REGEXP_OPTION_STATE_SLOT_MAXIMUM) {
333
- key = (unsigned char) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
333
+ key = (uint8_t) (key - YP_REGEXP_OPTION_STATE_SLOT_MINIMUM);
334
334
 
335
335
  switch (options->values[key]) {
336
336
  case YP_REGEXP_OPTION_STATE_INVALID:
@@ -431,7 +431,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
431
431
  parser->cursor++;
432
432
  break;
433
433
  default: { // named capture group
434
- const char *start = parser->cursor;
434
+ const uint8_t *start = parser->cursor;
435
435
  if (!yp_regexp_char_find(parser, '>')) {
436
436
  return false;
437
437
  }
@@ -441,7 +441,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
441
441
  }
442
442
  break;
443
443
  case '\'': { // named capture group
444
- const char *start = ++parser->cursor;
444
+ const uint8_t *start = ++parser->cursor;
445
445
  if (!yp_regexp_char_find(parser, '\'')) {
446
446
  return false;
447
447
  }
@@ -456,7 +456,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
456
456
  break;
457
457
  case 'i': case 'm': case 'x': case 'd': case 'a': case 'u': // options
458
458
  while (!yp_regexp_char_is_eof(parser) && *parser->cursor != '-' && *parser->cursor != ':' && *parser->cursor != ')') {
459
- if (!yp_regexp_options_add(&options, (unsigned char) *parser->cursor)) {
459
+ if (!yp_regexp_options_add(&options, *parser->cursor)) {
460
460
  return false;
461
461
  }
462
462
  parser->cursor++;
@@ -474,7 +474,7 @@ yp_regexp_parse_group(yp_regexp_parser_t *parser) {
474
474
  case '-':
475
475
  parser->cursor++;
476
476
  while (!yp_regexp_char_is_eof(parser) && *parser->cursor != ':' && *parser->cursor != ')') {
477
- if (!yp_regexp_options_remove(&options, (unsigned char) *parser->cursor)) {
477
+ if (!yp_regexp_options_remove(&options, *parser->cursor)) {
478
478
  return false;
479
479
  }
480
480
  parser->cursor++;
@@ -573,7 +573,7 @@ yp_regexp_parse_pattern(yp_regexp_parser_t *parser) {
573
573
  // Parse a regular expression and extract the names of all of the named capture
574
574
  // groups.
575
575
  YP_EXPORTED_FUNCTION bool
576
- yp_regexp_named_capture_group_names(const char *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
576
+ yp_regexp_named_capture_group_names(const uint8_t *source, size_t size, yp_string_list_t *named_captures, bool encoding_changed, yp_encoding_t *encoding) {
577
577
  yp_regexp_parser_t parser;
578
578
  yp_regexp_parser_init(&parser, source, source + size, named_captures, encoding_changed, encoding);
579
579
  return yp_regexp_parse_pattern(&parser);