yarp 0.12.0 → 0.13.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +29 -8
  3. data/CONTRIBUTING.md +2 -2
  4. data/Makefile +5 -5
  5. data/README.md +11 -12
  6. data/config.yml +6 -2
  7. data/docs/build_system.md +21 -21
  8. data/docs/building.md +4 -4
  9. data/docs/configuration.md +25 -21
  10. data/docs/design.md +2 -2
  11. data/docs/encoding.md +17 -17
  12. data/docs/fuzzing.md +4 -4
  13. data/docs/heredocs.md +3 -3
  14. data/docs/mapping.md +94 -94
  15. data/docs/ripper.md +4 -4
  16. data/docs/ruby_api.md +11 -11
  17. data/docs/serialization.md +17 -16
  18. data/docs/testing.md +6 -6
  19. data/ext/prism/api_node.c +4725 -0
  20. data/ext/{yarp → prism}/api_pack.c +82 -82
  21. data/ext/{yarp → prism}/extconf.rb +13 -13
  22. data/ext/{yarp → prism}/extension.c +175 -168
  23. data/ext/prism/extension.h +18 -0
  24. data/include/prism/ast.h +1932 -0
  25. data/include/prism/defines.h +45 -0
  26. data/include/prism/diagnostic.h +231 -0
  27. data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
  28. data/include/prism/node.h +41 -0
  29. data/include/prism/pack.h +141 -0
  30. data/include/{yarp → prism}/parser.h +143 -142
  31. data/include/prism/regexp.h +19 -0
  32. data/include/prism/unescape.h +48 -0
  33. data/include/prism/util/pm_buffer.h +51 -0
  34. data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
  35. data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
  36. data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
  37. data/include/prism/util/pm_memchr.h +14 -0
  38. data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
  39. data/include/prism/util/pm_state_stack.h +24 -0
  40. data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
  41. data/include/prism/util/pm_string_list.h +25 -0
  42. data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
  43. data/include/prism/version.h +4 -0
  44. data/include/prism.h +82 -0
  45. data/lib/prism/compiler.rb +465 -0
  46. data/lib/prism/debug.rb +157 -0
  47. data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
  48. data/lib/prism/dispatcher.rb +2051 -0
  49. data/lib/prism/dsl.rb +750 -0
  50. data/lib/{yarp → prism}/ffi.rb +66 -67
  51. data/lib/{yarp → prism}/lex_compat.rb +40 -43
  52. data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
  53. data/lib/{yarp → prism}/node.rb +2012 -2593
  54. data/lib/prism/node_ext.rb +55 -0
  55. data/lib/prism/node_inspector.rb +68 -0
  56. data/lib/{yarp → prism}/pack.rb +1 -1
  57. data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
  58. data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
  59. data/lib/prism/parse_result.rb +266 -0
  60. data/lib/{yarp → prism}/pattern.rb +14 -14
  61. data/lib/{yarp → prism}/ripper_compat.rb +5 -5
  62. data/lib/{yarp → prism}/serialize.rb +12 -7
  63. data/lib/prism/visitor.rb +470 -0
  64. data/lib/prism.rb +64 -0
  65. data/lib/yarp.rb +2 -614
  66. data/src/diagnostic.c +213 -208
  67. data/src/enc/pm_big5.c +52 -0
  68. data/src/enc/pm_euc_jp.c +58 -0
  69. data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
  70. data/src/enc/pm_shift_jis.c +56 -0
  71. data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
  72. data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
  73. data/src/enc/pm_windows_31j.c +56 -0
  74. data/src/node.c +1293 -1233
  75. data/src/pack.c +247 -247
  76. data/src/prettyprint.c +1479 -1479
  77. data/src/{yarp.c → prism.c} +5205 -5083
  78. data/src/regexp.c +132 -132
  79. data/src/serialize.c +1121 -1121
  80. data/src/token_type.c +169 -167
  81. data/src/unescape.c +106 -87
  82. data/src/util/pm_buffer.c +103 -0
  83. data/src/util/{yp_char.c → pm_char.c} +72 -72
  84. data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
  85. data/src/util/{yp_list.c → pm_list.c} +10 -10
  86. data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
  87. data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
  88. data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
  89. data/src/util/{yp_string.c → pm_string.c} +38 -38
  90. data/src/util/pm_string_list.c +29 -0
  91. data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
  92. data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
  93. data/yarp.gemspec +68 -59
  94. metadata +70 -61
  95. data/ext/yarp/api_node.c +0 -4728
  96. data/ext/yarp/extension.h +0 -18
  97. data/include/yarp/ast.h +0 -1929
  98. data/include/yarp/defines.h +0 -45
  99. data/include/yarp/diagnostic.h +0 -226
  100. data/include/yarp/node.h +0 -42
  101. data/include/yarp/pack.h +0 -141
  102. data/include/yarp/regexp.h +0 -19
  103. data/include/yarp/unescape.h +0 -44
  104. data/include/yarp/util/yp_buffer.h +0 -51
  105. data/include/yarp/util/yp_memchr.h +0 -14
  106. data/include/yarp/util/yp_state_stack.h +0 -24
  107. data/include/yarp/util/yp_string_list.h +0 -25
  108. data/include/yarp/version.h +0 -4
  109. data/include/yarp.h +0 -82
  110. data/src/enc/yp_big5.c +0 -52
  111. data/src/enc/yp_euc_jp.c +0 -58
  112. data/src/enc/yp_shift_jis.c +0 -56
  113. data/src/enc/yp_windows_31j.c +0 -56
  114. data/src/util/yp_buffer.c +0 -101
  115. data/src/util/yp_string_list.c +0 -29
@@ -1,16 +1,16 @@
1
1
  // Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
2
2
  // decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
3
3
 
4
- #include "yarp/enc/yp_encoding.h"
4
+ #include "prism/enc/pm_encoding.h"
5
5
 
6
- typedef uint32_t yp_unicode_codepoint_t;
6
+ typedef uint32_t pm_unicode_codepoint_t;
7
7
 
8
8
  // Each element of the following table contains a bitfield that indicates a
9
9
  // piece of information about the corresponding unicode codepoint. Note that
10
10
  // this table is different from other encodings where we used a lookup table
11
11
  // because the indices of those tables are the byte representations, not the
12
12
  // codepoints themselves.
13
- const uint8_t yp_encoding_unicode_table[256] = {
13
+ const uint8_t pm_encoding_unicode_table[256] = {
14
14
  // 0 1 2 3 4 5 6 7 8 9 A B C D E F
15
15
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
16
16
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -31,7 +31,7 @@ const uint8_t yp_encoding_unicode_table[256] = {
31
31
  };
32
32
 
33
33
  #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
34
- static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
34
+ static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
35
35
  0x100, 0x2C1,
36
36
  0x2C6, 0x2D1,
37
37
  0x2E0, 0x2E4,
@@ -760,7 +760,7 @@ static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
760
760
  };
761
761
 
762
762
  #define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
763
- static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
763
+ static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
764
764
  0x100, 0x2C1,
765
765
  0x2C6, 0x2D1,
766
766
  0x2E0, 0x2E4,
@@ -1528,7 +1528,7 @@ static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
1528
1528
  };
1529
1529
 
1530
1530
  #define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1296
1531
- static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
1531
+ static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
1532
1532
  0x100, 0x100,
1533
1533
  0x102, 0x102,
1534
1534
  0x104, 0x104,
@@ -2180,7 +2180,7 @@ static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
2180
2180
  };
2181
2181
 
2182
2182
  static bool
2183
- yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_codepoint_t *codepoints, size_t size) {
2183
+ pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
2184
2184
  size_t start = 0;
2185
2185
  size_t end = size;
2186
2186
 
@@ -2202,7 +2202,7 @@ yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_co
2202
2202
  return false;
2203
2203
  }
2204
2204
 
2205
- static const uint8_t yp_utf_8_dfa[] = {
2205
+ static const uint8_t pm_utf_8_dfa[] = {
2206
2206
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
2207
2207
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
2208
2208
  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
@@ -2219,8 +2219,8 @@ static const uint8_t yp_utf_8_dfa[] = {
2219
2219
  1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
2220
2220
  };
2221
2221
 
2222
- static yp_unicode_codepoint_t
2223
- yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2222
+ static pm_unicode_codepoint_t
2223
+ pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2224
2224
  assert(n >= 1);
2225
2225
  size_t maximum = (size_t) n;
2226
2226
 
@@ -2229,16 +2229,16 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2229
2229
 
2230
2230
  for (size_t index = 0; index < 4 && index < maximum; index++) {
2231
2231
  uint32_t byte = b[index];
2232
- uint32_t type = yp_utf_8_dfa[byte];
2232
+ uint32_t type = pm_utf_8_dfa[byte];
2233
2233
 
2234
2234
  codepoint = (state != 0) ?
2235
2235
  (byte & 0x3fu) | (codepoint << 6) :
2236
2236
  (0xffu >> type) & (byte);
2237
2237
 
2238
- state = yp_utf_8_dfa[256 + (state * 16) + type];
2238
+ state = pm_utf_8_dfa[256 + (state * 16) + type];
2239
2239
  if (!state) {
2240
2240
  *width = index + 1;
2241
- return (yp_unicode_codepoint_t) codepoint;
2241
+ return (pm_unicode_codepoint_t) codepoint;
2242
2242
  }
2243
2243
  }
2244
2244
 
@@ -2247,57 +2247,57 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
2247
2247
  }
2248
2248
 
2249
2249
  static size_t
2250
- yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2250
+ pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
2251
2251
  size_t width;
2252
- yp_utf_8_codepoint(b, n, &width);
2252
+ pm_utf_8_codepoint(b, n, &width);
2253
2253
  return width;
2254
2254
  }
2255
2255
 
2256
2256
  size_t
2257
- yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2257
+ pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
2258
2258
  if (*b < 0x80) {
2259
- return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2259
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
2260
2260
  }
2261
2261
 
2262
2262
  size_t width;
2263
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2263
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
2264
2264
 
2265
2265
  if (codepoint <= 0xFF) {
2266
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
2266
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
2267
2267
  } else {
2268
- return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
2268
+ return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
2269
2269
  }
2270
2270
  }
2271
2271
 
2272
2272
  size_t
2273
- yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2273
+ pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
2274
2274
  if (*b < 0x80) {
2275
- return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2275
+ return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
2276
2276
  }
2277
2277
 
2278
2278
  size_t width;
2279
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2279
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
2280
2280
 
2281
2281
  if (codepoint <= 0xFF) {
2282
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2282
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
2283
2283
  } else {
2284
- return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
2284
+ return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
2285
2285
  }
2286
2286
  }
2287
2287
 
2288
2288
  static bool
2289
- yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2289
+ pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2290
2290
  if (*b < 0x80) {
2291
- return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2291
+ return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
2292
2292
  }
2293
2293
 
2294
2294
  size_t width;
2295
- yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
2295
+ pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
2296
2296
 
2297
2297
  if (codepoint <= 0xFF) {
2298
- return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
2298
+ return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
2299
2299
  } else {
2300
- return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
2300
+ return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
2301
2301
  }
2302
2302
  }
2303
2303
 
@@ -2305,20 +2305,20 @@ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
2305
2305
  #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
2306
2306
  #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
2307
2307
 
2308
- yp_encoding_t yp_encoding_utf_8 = {
2308
+ pm_encoding_t pm_encoding_utf_8 = {
2309
2309
  .name = "utf-8",
2310
- .char_width = yp_encoding_utf_8_char_width,
2311
- .alnum_char = yp_encoding_utf_8_alnum_char,
2312
- .alpha_char = yp_encoding_utf_8_alpha_char,
2313
- .isupper_char = yp_encoding_utf_8_isupper_char,
2310
+ .char_width = pm_encoding_utf_8_char_width,
2311
+ .alnum_char = pm_encoding_utf_8_alnum_char,
2312
+ .alpha_char = pm_encoding_utf_8_alpha_char,
2313
+ .isupper_char = pm_encoding_utf_8_isupper_char,
2314
2314
  .multibyte = true
2315
2315
  };
2316
2316
 
2317
- yp_encoding_t yp_encoding_utf8_mac = {
2317
+ pm_encoding_t pm_encoding_utf8_mac = {
2318
2318
  .name = "utf8-mac",
2319
- .char_width = yp_encoding_utf_8_char_width,
2320
- .alnum_char = yp_encoding_utf_8_alnum_char,
2321
- .alpha_char = yp_encoding_utf_8_alpha_char,
2322
- .isupper_char = yp_encoding_utf_8_isupper_char,
2319
+ .char_width = pm_encoding_utf_8_char_width,
2320
+ .alnum_char = pm_encoding_utf_8_alnum_char,
2321
+ .alpha_char = pm_encoding_utf_8_alpha_char,
2322
+ .isupper_char = pm_encoding_utf_8_isupper_char,
2323
2323
  .multibyte = true
2324
2324
  };
@@ -0,0 +1,56 @@
1
+ #include "prism/enc/pm_encoding.h"
2
+
3
+ static size_t
4
+ pm_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
5
+ // These are the single byte characters.
6
+ if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
7
+ return 1;
8
+ }
9
+
10
+ // These are the double byte characters.
11
+ if (
12
+ (n > 1) &&
13
+ ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
14
+ (b[1] >= 0x40 && b[1] <= 0xFC)
15
+ ) {
16
+ return 2;
17
+ }
18
+
19
+ return 0;
20
+ }
21
+
22
+ static size_t
23
+ pm_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
24
+ if (pm_encoding_windows_31j_char_width(b, n) == 1) {
25
+ return pm_encoding_ascii_alpha_char(b, n);
26
+ } else {
27
+ return 0;
28
+ }
29
+ }
30
+
31
+ static size_t
32
+ pm_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
33
+ if (pm_encoding_windows_31j_char_width(b, n) == 1) {
34
+ return pm_encoding_ascii_alnum_char(b, n);
35
+ } else {
36
+ return 0;
37
+ }
38
+ }
39
+
40
+ static bool
41
+ pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
42
+ if (pm_encoding_windows_31j_char_width(b, n) == 1) {
43
+ return pm_encoding_ascii_isupper_char(b, n);
44
+ } else {
45
+ return false;
46
+ }
47
+ }
48
+
49
+ pm_encoding_t pm_encoding_windows_31j = {
50
+ .name = "windows-31j",
51
+ .char_width = pm_encoding_windows_31j_char_width,
52
+ .alnum_char = pm_encoding_windows_31j_alnum_char,
53
+ .alpha_char = pm_encoding_windows_31j_alpha_char,
54
+ .isupper_char = pm_encoding_windows_31j_isupper_char,
55
+ .multibyte = true
56
+ };