RubyGems - yarp - Versions diffs - 0.12.0 → 0.13.0 - Mend

yarp 0.12.0 → 0.13.0

Files changed (115) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +29 -8
data/CONTRIBUTING.md +2 -2
data/Makefile +5 -5
data/README.md +11 -12
data/config.yml +6 -2
data/docs/build_system.md +21 -21
data/docs/building.md +4 -4
data/docs/configuration.md +25 -21
data/docs/design.md +2 -2
data/docs/encoding.md +17 -17
data/docs/fuzzing.md +4 -4
data/docs/heredocs.md +3 -3
data/docs/mapping.md +94 -94
data/docs/ripper.md +4 -4
data/docs/ruby_api.md +11 -11
data/docs/serialization.md +17 -16
data/docs/testing.md +6 -6
data/ext/prism/api_node.c +4725 -0
data/ext/{yarp → prism}/api_pack.c +82 -82
data/ext/{yarp → prism}/extconf.rb +13 -13
data/ext/{yarp → prism}/extension.c +175 -168
data/ext/prism/extension.h +18 -0
data/include/prism/ast.h +1932 -0
data/include/prism/defines.h +45 -0
data/include/prism/diagnostic.h +231 -0
data/include/{yarp/enc/yp_encoding.h → prism/enc/pm_encoding.h} +40 -40
data/include/prism/node.h +41 -0
data/include/prism/pack.h +141 -0
data/include/{yarp → prism}/parser.h +143 -142
data/include/prism/regexp.h +19 -0
data/include/prism/unescape.h +48 -0
data/include/prism/util/pm_buffer.h +51 -0
data/include/{yarp/util/yp_char.h → prism/util/pm_char.h} +20 -20
data/include/{yarp/util/yp_constant_pool.h → prism/util/pm_constant_pool.h} +26 -22
data/include/{yarp/util/yp_list.h → prism/util/pm_list.h} +21 -21
data/include/prism/util/pm_memchr.h +14 -0
data/include/{yarp/util/yp_newline_list.h → prism/util/pm_newline_list.h} +11 -11
data/include/prism/util/pm_state_stack.h +24 -0
data/include/{yarp/util/yp_string.h → prism/util/pm_string.h} +20 -20
data/include/prism/util/pm_string_list.h +25 -0
data/include/{yarp/util/yp_strpbrk.h → prism/util/pm_strpbrk.h} +7 -7
data/include/prism/version.h +4 -0
data/include/prism.h +82 -0
data/lib/prism/compiler.rb +465 -0
data/lib/prism/debug.rb +157 -0
data/lib/{yarp/desugar_visitor.rb → prism/desugar_compiler.rb} +4 -2
data/lib/prism/dispatcher.rb +2051 -0
data/lib/prism/dsl.rb +750 -0
data/lib/{yarp → prism}/ffi.rb +66 -67
data/lib/{yarp → prism}/lex_compat.rb +40 -43
data/lib/{yarp/mutation_visitor.rb → prism/mutation_compiler.rb} +3 -3
data/lib/{yarp → prism}/node.rb +2012 -2593
data/lib/prism/node_ext.rb +55 -0
data/lib/prism/node_inspector.rb +68 -0
data/lib/{yarp → prism}/pack.rb +1 -1
data/lib/{yarp → prism}/parse_result/comments.rb +1 -1
data/lib/{yarp → prism}/parse_result/newlines.rb +1 -1
data/lib/prism/parse_result.rb +266 -0
data/lib/{yarp → prism}/pattern.rb +14 -14
data/lib/{yarp → prism}/ripper_compat.rb +5 -5
data/lib/{yarp → prism}/serialize.rb +12 -7
data/lib/prism/visitor.rb +470 -0
data/lib/prism.rb +64 -0
data/lib/yarp.rb +2 -614
data/src/diagnostic.c +213 -208
data/src/enc/pm_big5.c +52 -0
data/src/enc/pm_euc_jp.c +58 -0
data/src/enc/{yp_gbk.c → pm_gbk.c} +16 -16
data/src/enc/pm_shift_jis.c +56 -0
data/src/enc/{yp_tables.c → pm_tables.c} +69 -69
data/src/enc/{yp_unicode.c → pm_unicode.c} +40 -40
data/src/enc/pm_windows_31j.c +56 -0
data/src/node.c +1293 -1233
data/src/pack.c +247 -247
data/src/prettyprint.c +1479 -1479
data/src/{yarp.c → prism.c} +5205 -5083
data/src/regexp.c +132 -132
data/src/serialize.c +1121 -1121
data/src/token_type.c +169 -167
data/src/unescape.c +106 -87
data/src/util/pm_buffer.c +103 -0
data/src/util/{yp_char.c → pm_char.c} +72 -72
data/src/util/{yp_constant_pool.c → pm_constant_pool.c} +85 -64
data/src/util/{yp_list.c → pm_list.c} +10 -10
data/src/util/{yp_memchr.c → pm_memchr.c} +6 -4
data/src/util/{yp_newline_list.c → pm_newline_list.c} +21 -21
data/src/util/{yp_state_stack.c → pm_state_stack.c} +4 -4
data/src/util/{yp_string.c → pm_string.c} +38 -38
data/src/util/pm_string_list.c +29 -0
data/src/util/{yp_strncasecmp.c → pm_strncasecmp.c} +1 -1
data/src/util/{yp_strpbrk.c → pm_strpbrk.c} +8 -8
data/yarp.gemspec +68 -59
metadata +70 -61
data/ext/yarp/api_node.c +0 -4728
data/ext/yarp/extension.h +0 -18
data/include/yarp/ast.h +0 -1929
data/include/yarp/defines.h +0 -45
data/include/yarp/diagnostic.h +0 -226
data/include/yarp/node.h +0 -42
data/include/yarp/pack.h +0 -141
data/include/yarp/regexp.h +0 -19
data/include/yarp/unescape.h +0 -44
data/include/yarp/util/yp_buffer.h +0 -51
data/include/yarp/util/yp_memchr.h +0 -14
data/include/yarp/util/yp_state_stack.h +0 -24
data/include/yarp/util/yp_string_list.h +0 -25
data/include/yarp/version.h +0 -4
data/include/yarp.h +0 -82
data/src/enc/yp_big5.c +0 -52
data/src/enc/yp_euc_jp.c +0 -58
data/src/enc/yp_shift_jis.c +0 -56
data/src/enc/yp_windows_31j.c +0 -56
data/src/util/yp_buffer.c +0 -101
data/src/util/yp_string_list.c +0 -29

data/src/enc/{yp_unicode.c → pm_unicode.c} RENAMED Viewed

@@ -1,16 +1,16 @@
 // Note that the UTF-8 decoding code is based on Bjoern Hoehrmann's UTF-8 DFA
 // decoder. See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
-#include "yarp/enc/yp_encoding.h"
+#include "prism/enc/pm_encoding.h"
-typedef uint32_t yp_unicode_codepoint_t;
+typedef uint32_t pm_unicode_codepoint_t;
 // Each element of the following table contains a bitfield that indicates a
 // piece of information about the corresponding unicode codepoint. Note that
 // this table is different from other encodings where we used a lookup table
 // because the indices of those tables are the byte representations, not the
 // codepoints themselves.
-const uint8_t yp_encoding_unicode_table[256] = {
+const uint8_t pm_encoding_unicode_table[256] = {
 //  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x
     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1x
@@ -31,7 +31,7 @@ const uint8_t yp_encoding_unicode_table[256] = {
 };
 #define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450
-static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = {
     0x100, 0x2C1,
     0x2C6, 0x2D1,
     0x2E0, 0x2E4,
@@ -760,7 +760,7 @@ static const yp_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEP
 };
 #define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528
-static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = {
     0x100, 0x2C1,
     0x2C6, 0x2D1,
     0x2E0, 0x2E4,
@@ -1528,7 +1528,7 @@ static const yp_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEP
 };
 #define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1296
-static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
+static const pm_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = {
     0x100, 0x100,
     0x102, 0x102,
     0x104, 0x104,
@@ -2180,7 +2180,7 @@ static const yp_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_C
 };
 static bool
-yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_codepoint_t *codepoints, size_t size) {
+pm_unicode_codepoint_match(pm_unicode_codepoint_t codepoint, const pm_unicode_codepoint_t *codepoints, size_t size) {
     size_t start = 0;
     size_t end = size;
@@ -2202,7 +2202,7 @@ yp_unicode_codepoint_match(yp_unicode_codepoint_t codepoint, const yp_unicode_co
     return false;
 }
-static const uint8_t yp_utf_8_dfa[] = {
+static const uint8_t pm_utf_8_dfa[] = {
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
     0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
@@ -2219,8 +2219,8 @@ static const uint8_t yp_utf_8_dfa[] = {
     1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
 };
-static yp_unicode_codepoint_t
-yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
+static pm_unicode_codepoint_t
+pm_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
     assert(n >= 1);
     size_t maximum = (size_t) n;
@@ -2229,16 +2229,16 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
     for (size_t index = 0; index < 4 && index < maximum; index++) {
         uint32_t byte = b[index];
-        uint32_t type = yp_utf_8_dfa[byte];
+        uint32_t type = pm_utf_8_dfa[byte];
         codepoint = (state != 0) ?
             (byte & 0x3fu) | (codepoint << 6) :
             (0xffu >> type) & (byte);
-        state = yp_utf_8_dfa[256 + (state * 16) + type];
+        state = pm_utf_8_dfa[256 + (state * 16) + type];
         if (!state) {
             *width = index + 1;
-            return (yp_unicode_codepoint_t) codepoint;
+            return (pm_unicode_codepoint_t) codepoint;
         }
     }
@@ -2247,57 +2247,57 @@ yp_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) {
 }
 static size_t
-yp_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) {
     size_t width;
-    yp_utf_8_codepoint(b, n, &width);
+    pm_utf_8_codepoint(b, n, &width);
     return width;
 }
 size_t
-yp_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
-        return (yp_encoding_unicode_table[*b] & YP_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_ALPHABETIC_BIT) ? 1 : 0;
     }
     size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
     if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_ALPHABETIC_BIT) ? width : 0;
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_ALPHABETIC_BIT) ? width : 0;
     } else {
-        return yp_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
+        return pm_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0;
     }
 }
 size_t
-yp_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
-        return (yp_encoding_unicode_table[*b] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
+        return (pm_encoding_unicode_table[*b] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0;
     }
     size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
     if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(uint8_t) codepoint] & (YP_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & (PRISM_ENCODING_ALPHANUMERIC_BIT)) ? width : 0;
     } else {
-        return yp_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
+        return pm_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0;
     }
 }
 static bool
-yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
+pm_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
     if (*b < 0x80) {
-        return (yp_encoding_unicode_table[*b] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+        return (pm_encoding_unicode_table[*b] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
     }
     size_t width;
-    yp_unicode_codepoint_t codepoint = yp_utf_8_codepoint(b, n, &width);
+    pm_unicode_codepoint_t codepoint = pm_utf_8_codepoint(b, n, &width);
     if (codepoint <= 0xFF) {
-        return (yp_encoding_unicode_table[(uint8_t) codepoint] & YP_ENCODING_UPPERCASE_BIT) ? true : false;
+        return (pm_encoding_unicode_table[(uint8_t) codepoint] & PRISM_ENCODING_UPPERCASE_BIT) ? true : false;
     } else {
-        return yp_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
+        return pm_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false;
     }
 }
@@ -2305,20 +2305,20 @@ yp_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) {
 #undef UNICODE_ALNUM_CODEPOINTS_LENGTH
 #undef UNICODE_ISUPPER_CODEPOINTS_LENGTH
-yp_encoding_t yp_encoding_utf_8 = {
+pm_encoding_t pm_encoding_utf_8 = {
     .name = "utf-8",
-    .char_width = yp_encoding_utf_8_char_width,
-    .alnum_char = yp_encoding_utf_8_alnum_char,
-    .alpha_char = yp_encoding_utf_8_alpha_char,
-    .isupper_char = yp_encoding_utf_8_isupper_char,
+    .char_width = pm_encoding_utf_8_char_width,
+    .alnum_char = pm_encoding_utf_8_alnum_char,
+    .alpha_char = pm_encoding_utf_8_alpha_char,
+    .isupper_char = pm_encoding_utf_8_isupper_char,
     .multibyte = true
 };
-yp_encoding_t yp_encoding_utf8_mac = {
+pm_encoding_t pm_encoding_utf8_mac = {
     .name = "utf8-mac",
-    .char_width = yp_encoding_utf_8_char_width,
-    .alnum_char = yp_encoding_utf_8_alnum_char,
-    .alpha_char = yp_encoding_utf_8_alpha_char,
-    .isupper_char = yp_encoding_utf_8_isupper_char,
+    .char_width = pm_encoding_utf_8_char_width,
+    .alnum_char = pm_encoding_utf_8_alnum_char,
+    .alpha_char = pm_encoding_utf_8_alpha_char,
+    .isupper_char = pm_encoding_utf_8_isupper_char,
     .multibyte = true
 };

data/src/enc/pm_windows_31j.c ADDED Viewed

@@ -0,0 +1,56 @@
+#include "prism/enc/pm_encoding.h"
+static size_t
+pm_encoding_windows_31j_char_width(const uint8_t *b, ptrdiff_t n) {
+    // These are the single byte characters.
+    if (*b < 0x80 || (*b >= 0xA1 && *b <= 0xDF)) {
+        return 1;
+    }
+    // These are the double byte characters.
+    if (
+        (n > 1) &&
+        ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) &&
+        (b[1] >= 0x40 && b[1] <= 0xFC)
+    ) {
+        return 2;
+    }
+    return 0;
+}
+static size_t
+pm_encoding_windows_31j_alpha_char(const uint8_t *b, ptrdiff_t n) {
+    if (pm_encoding_windows_31j_char_width(b, n) == 1) {
+        return pm_encoding_ascii_alpha_char(b, n);
+    } else {
+        return 0;
+    }
+}
+static size_t
+pm_encoding_windows_31j_alnum_char(const uint8_t *b, ptrdiff_t n) {
+    if (pm_encoding_windows_31j_char_width(b, n) == 1) {
+        return pm_encoding_ascii_alnum_char(b, n);
+    } else {
+        return 0;
+    }
+}
+static bool
+pm_encoding_windows_31j_isupper_char(const uint8_t *b, ptrdiff_t n) {
+    if (pm_encoding_windows_31j_char_width(b, n) == 1) {
+        return pm_encoding_ascii_isupper_char(b, n);
+    } else {
+        return false;
+    }
+}
+pm_encoding_t pm_encoding_windows_31j = {
+    .name = "windows-31j",
+    .char_width = pm_encoding_windows_31j_char_width,
+    .alnum_char = pm_encoding_windows_31j_alnum_char,
+    .alpha_char = pm_encoding_windows_31j_alpha_char,
+    .isupper_char = pm_encoding_windows_31j_isupper_char,
+    .multibyte = true
+};