yarp 0.8.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +48 -1
  3. data/Makefile +5 -1
  4. data/README.md +4 -3
  5. data/config.yml +461 -150
  6. data/docs/configuration.md +1 -0
  7. data/docs/encoding.md +5 -5
  8. data/docs/ruby_api.md +2 -0
  9. data/docs/serialization.md +3 -3
  10. data/docs/testing.md +2 -2
  11. data/ext/yarp/api_node.c +810 -199
  12. data/ext/yarp/extension.c +94 -31
  13. data/ext/yarp/extension.h +2 -2
  14. data/include/yarp/ast.h +653 -150
  15. data/include/yarp/defines.h +2 -1
  16. data/include/yarp/diagnostic.h +3 -3
  17. data/include/yarp/enc/yp_encoding.h +10 -10
  18. data/include/yarp/node.h +10 -0
  19. data/include/yarp/parser.h +19 -19
  20. data/include/yarp/regexp.h +1 -1
  21. data/include/yarp/unescape.h +7 -5
  22. data/include/yarp/util/yp_buffer.h +3 -0
  23. data/include/yarp/util/yp_char.h +16 -16
  24. data/include/yarp/util/yp_constant_pool.h +2 -2
  25. data/include/yarp/util/yp_newline_list.h +7 -4
  26. data/include/yarp/util/yp_string.h +4 -4
  27. data/include/yarp/util/yp_string_list.h +0 -3
  28. data/include/yarp/util/yp_strpbrk.h +1 -1
  29. data/include/yarp/version.h +2 -2
  30. data/include/yarp.h +14 -3
  31. data/lib/yarp/desugar_visitor.rb +204 -0
  32. data/lib/yarp/ffi.rb +27 -1
  33. data/lib/yarp/lex_compat.rb +93 -25
  34. data/lib/yarp/mutation_visitor.rb +683 -0
  35. data/lib/yarp/node.rb +3121 -597
  36. data/lib/yarp/serialize.rb +198 -126
  37. data/lib/yarp.rb +53 -7
  38. data/src/diagnostic.c +1 -1
  39. data/src/enc/yp_big5.c +15 -42
  40. data/src/enc/yp_euc_jp.c +16 -43
  41. data/src/enc/yp_gbk.c +19 -46
  42. data/src/enc/yp_shift_jis.c +16 -43
  43. data/src/enc/yp_tables.c +36 -38
  44. data/src/enc/yp_unicode.c +20 -25
  45. data/src/enc/yp_windows_31j.c +16 -43
  46. data/src/node.c +1444 -836
  47. data/src/prettyprint.c +324 -103
  48. data/src/regexp.c +21 -21
  49. data/src/serialize.c +429 -276
  50. data/src/token_type.c +2 -2
  51. data/src/unescape.c +184 -136
  52. data/src/util/yp_buffer.c +7 -2
  53. data/src/util/yp_char.c +34 -34
  54. data/src/util/yp_constant_pool.c +4 -4
  55. data/src/util/yp_memchr.c +1 -1
  56. data/src/util/yp_newline_list.c +14 -3
  57. data/src/util/yp_string.c +22 -20
  58. data/src/util/yp_string_list.c +0 -6
  59. data/src/util/yp_strncasecmp.c +3 -6
  60. data/src/util/yp_strpbrk.c +8 -8
  61. data/src/yarp.c +1504 -615
  62. data/yarp.gemspec +3 -1
  63. metadata +4 -2
data/src/enc/yp_unicode.c CHANGED
@@ -10,7 +10,7 @@ typedef uint32_t yp_unicode_codepoint_t;
10
10
  // this table is different from other encodings where we used a lookup table
11
11
  // because the indices of those tables are the byte representations, not the
12
12
  // codepoints themselves.
13
- unsigned char yp_encoding_unicode_table[256] = {
13
+ uint8_t yp_encoding_unicode_table[256] = {
14
14
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -2220,7 +2220,7 @@ static const uint8_t yp_utf_8_dfa[] = {
2220
2220
  };
2221
2221
 
2222
2222
  static yp_unicode_codepoint_t
2223
- yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2223
+ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2224
  assert(n >= 1);
2225
2225
  size_t maximum = (size_t) n;
2226
2226
 
@@ -2228,7 +2228,7 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2228
2228
  uint32_t state = 0;
2229
2229
 
2230
2230
  for (size_t index = 0; index < 4 && index < maximum; index++) {
2231
- uint32_t byte = c[index];
2231
+ uint32_t byte = b[index];
2232
2232
  uint32_t type = yp_utf_8_dfa[byte];
2233
2233
 
2234
2234
  codepoint = (state != 0) ?
@@ -2247,60 +2247,55 @@ yp_utf_8_codepoint(const unsigned char *c, ptrdiff_t n, size_t *width) {
2247
2247
  }
2248
2248
 
2249
2249
  static size_t
2250
- yp_encoding_utf_8_char_width(const char *c, ptrdiff_t n) {
2250
+ yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2251
2251
  size_t width;
2252
- const unsigned char *v = (const unsigned char *) c;
2253
-
2254
- yp_utf_8_codepoint(v, n, &width);
2252
+ yp_utf_8_codepoint(b, n, &width);
2255
2253
  return width;
2256
2254
  }
2257
2255
 
2258
2256
  size_t
2259
- yp_encoding_utf_8_alpha_char(const char *c, ptrdiff_t n) {
2260
- const unsigned char *v = (const unsigned char *) c;
2261
- if (*v < 0x80) {
2262
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2257
+ yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
+ if (*b < 0x80) {
2259
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2263
2260
  }
2264
2261
 
2265
2262
  size_t width;
2266
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2263
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2267
2264
 
2268
2265
  if (codepoint <= 0xFF) {
2269
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2266
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2270
2267
  } else {
2271
2268
  return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
2272
2269
  }
2273
2270
  }
2274
2271
 
2275
2272
  size_t
2276
- yp_encoding_utf_8_alnum_char(const char *c, ptrdiff_t n) {
2277
- const unsigned char *v = (const unsigned char *) c;
2278
- if (*v < 0x80) {
2279
- return (yp_encoding_unicode_table[*v] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2273
+ yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
+ if (*b < 0x80) {
2275
+ return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2280
2276
  }
2281
2277
 
2282
2278
  size_t width;
2283
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2279
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2284
2280
 
2285
2281
  if (codepoint <= 0xFF) {
2286
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2282
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2287
2283
  } else {
2288
2284
  return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
2289
2285
  }
2290
2286
  }
2291
2287
 
2292
2288
  static bool
2293
- yp_encoding_utf_8_isupper_char(const char *c, ptrdiff_t n) {
2294
- const unsigned char *v = (const unsigned char *) c;
2295
- if (*v < 0x80) {
2296
- return (yp_encoding_unicode_table[*v] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2289
+ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
+ if (*b < 0x80) {
2291
+ return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2297
2292
  }
2298
2293
 
2299
2294
  size_t width;
2300
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(v, n, &width);
2295
+ yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2301
2296
 
2302
2297
  if (codepoint <= 0xFF) {
2303
- return (yp_encoding_unicode_table[(unsigned char) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2298
+ return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2304
2299
  } else {
2305
2300
  return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
2306
2301
  }
@@ -1,73 +1,46 @@
1
1
  #include "yarp/enc/yp_encoding.h"
2
2
 
3
- typedef uint16_t yp_windows_31j_codepoint_t;
4
-
5
- static yp_windows_31j_codepoint_t
6
- yp_windows_31j_codepoint(const char *c, ptrdiff_t n, size_t *width) {
7
- const unsigned char *uc = (const unsigned char *) c;
8
-
3
+ static size_t
4
+ yp_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
9
5
  // These are the single byte characters.
10
- if (*uc < 0x80 || (*uc >= 0xA1 && *uc <= 0xDF)) {
11
- *width = 1;
12
- return *uc;
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
13
8
  }
14
9
 
15
10
  // These are the double byte characters.
16
11
  if (
17
12
  (n > 1) &&
18
- ((uc[0] >= 0x81 && uc[0] <= 0x9F) || (uc[0] >= 0xE0 && uc[0] <= 0xFC)) &&
19
- (uc[1] >= 0x40 && uc[1] <= 0xFC)
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
20
15
  ) {
21
- *width = 2;
22
- return (yp_windows_31j_codepoint_t) (uc[0] << 8 | uc[1]);
16
+ return 2;
23
17
  }
24
18
 
25
- *width = 0;
26
19
  return 0;
27
20
  }
28
21
 
29
22
  static size_t
30
- yp_encoding_windows_31j_char_width(const char *c, ptrdiff_t n) {
31
- size_t width;
32
- yp_windows_31j_codepoint(c, n, &width);
33
-
34
- return width;
35
- }
36
-
37
- static size_t
38
- yp_encoding_windows_31j_alpha_char(const char *c, ptrdiff_t n) {
39
- size_t width;
40
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
41
-
42
- if (width == 1) {
43
- const char value = (const char) codepoint;
44
- return yp_encoding_ascii_alpha_char(&value, n);
23
+ yp_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
25
+ return yp_encoding_ascii_alpha_char(b, n);
45
26
  } else {
46
27
  return 0;
47
28
  }
48
29
  }
49
30
 
50
31
  static size_t
51
- yp_encoding_windows_31j_alnum_char(const char *c, ptrdiff_t n) {
52
- size_t width;
53
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
54
-
55
- if (width == 1) {
56
- const char value = (const char) codepoint;
57
- return yp_encoding_ascii_alnum_char(&value, n);
32
+ yp_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
34
+ return yp_encoding_ascii_alnum_char(b, n);
58
35
  } else {
59
36
  return 0;
60
37
  }
61
38
  }
62
39
 
63
40
  static bool
64
- yp_encoding_windows_31j_isupper_char(const char *c, ptrdiff_t n) {
65
- size_t width;
66
- yp_windows_31j_codepoint_t codepoint = yp_windows_31j_codepoint(c, n, &width);
67
-
68
- if (width == 1) {
69
- const char value = (const char) codepoint;
70
- return yp_encoding_ascii_isupper_char(&value, n);
41
+ yp_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (yp_encoding_windows_31j_char_width(b, n) == 1) {
43
+ return yp_encoding_ascii_isupper_char(b, n);
71
44
  } else {
72
45
  return false;
73
46
  }