RubyGems - yarp - Versions diffs - 0.7.0 → 0.9.0 - Mend

yarp 0.7.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +54 -2
data/Makefile +2 -0
data/README.md +9 -5
data/config.yml +160 -93
data/docs/configuration.md +1 -0
data/docs/ruby_api.md +2 -0
data/docs/serialization.md +1 -1
data/docs/testing.md +2 -2
data/ext/yarp/api_node.c +361 -238
data/ext/yarp/extension.c +75 -26
data/ext/yarp/extension.h +2 -2
data/include/yarp/ast.h +226 -175
data/include/yarp/defines.h +5 -0
data/include/yarp/node.h +10 -0
data/include/yarp/unescape.h +4 -2
data/include/yarp/util/yp_buffer.h +9 -1
data/include/yarp/util/yp_constant_pool.h +3 -0
data/include/yarp/util/yp_list.h +7 -7
data/include/yarp/util/yp_newline_list.h +7 -0
data/include/yarp/util/yp_state_stack.h +1 -1
data/include/yarp/version.h +2 -2
data/include/yarp.h +10 -0
data/lib/yarp/desugar_visitor.rb +267 -0
data/lib/yarp/ffi.rb +89 -48
data/lib/yarp/lex_compat.rb +93 -25
data/lib/yarp/mutation_visitor.rb +683 -0
data/lib/yarp/node.rb +2061 -422
data/lib/yarp/serialize.rb +162 -120
data/lib/yarp.rb +54 -8
data/src/node.c +360 -304
data/src/prettyprint.c +190 -152
data/src/serialize.c +382 -340
data/src/token_type.c +2 -2
data/src/unescape.c +89 -77
data/src/util/yp_buffer.c +18 -0
data/src/util/yp_list.c +7 -16
data/src/util/yp_newline_list.c +10 -0
data/src/util/yp_state_stack.c +0 -6
data/src/yarp.c +941 -596
data/yarp.gemspec +3 -1
metadata +4 -2

data/src/token_type.c CHANGED Viewed

@@ -1,6 +1,6 @@
 /******************************************************************************/
-/* This file is generated by the bin/template script and should not be        */
-/* modified manually. See                                                     */
+/* This file is generated by the templates/template.rb script and should not  */
+/* be modified manually. See                                                  */
 /* templates/src/token_type.c.erb                                             */
 /* if you are looking to modify the                                           */
 /* template                                                                   */

data/src/unescape.c CHANGED Viewed

@@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
     return true;
 }
+// We don't call the char_width function unless we have to because it's
+// expensive to go through the indirection of the function pointer. Instead we
+// provide a fast path that will check if we can just return 1.
+static inline size_t
+yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
+    const unsigned char *uc = (const unsigned char *) start;
+    if (parser->encoding_changed || (*uc >= 0x80)) {
+        return parser->encoding.char_width(start, end - start);
+    } else {
+        return 1;
+    }
+}
 /******************************************************************************/
 /* Lookup tables for characters                                               */
 /******************************************************************************/
@@ -178,24 +192,8 @@ unescape_char(const unsigned char value, const unsigned char flags) {
 // Read a specific escape sequence into the given destination.
 static const char *
-unescape(char *dest, size_t *dest_length, const char *backslash, const char *end, yp_list_t *error_list, const unsigned char flags, bool write_to_str) {
+unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
     switch (backslash[1]) {
-        // \a \b \e \f \n \r \s \t \v
-        case '\r': {
-            // if this is an \r\n we need to escape both
-            if (write_to_str) {
-                dest[(*dest_length)++] = (char) unescape_char(unescape_chars[(unsigned char) backslash[1]], flags);
-            }
-            if (backslash + 2 < end && backslash[2] == '\n') {
-                if (write_to_str) {
-                    dest[(*dest_length)++] = (char) unescape_char(unescape_chars[(unsigned char) backslash[2]], flags);
-                }
-                return backslash + 3;
-            }
-            return backslash + 2;
-        }
         case 'a':
         case 'b':
         case 'e':
@@ -234,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
         // \unnnn       Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
         case 'u': {
             if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
                 return backslash + 2;
             }
@@ -251,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                     // \u{nnnn} character literal allows only 1-6 hexadecimal digits
                     if (hexadecimal_length > 6)
-                        yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
+                        yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
                     // there are not hexadecimal characters
                     if (hexadecimal_length == 0) {
-                        yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
+                        yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
                         return unicode_cursor;
                     }
@@ -268,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                     uint32_t value;
                     unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
                     if (write_to_str) {
-                        *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
+                        *dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
                     }
                     unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
@@ -276,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                 // ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
                 if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
-                    yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
+                    yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
                 return unicode_cursor + 1;
             }
@@ -286,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                 unescape_unicode(backslash + 2, 4, &value);
                 if (write_to_str) {
-                    *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
+                    *dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
                 }
                 return backslash + 6;
             }
-            yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
+            yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
             return backslash + 2;
         }
         // \c\M-x       meta control character, where x is an ASCII printable character
@@ -299,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
         // \cx          control character, where x is an ASCII printable character
         case 'c':
             if (backslash + 2 >= end) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
                 return end;
             }
             if (flags & YP_UNESCAPE_FLAG_CONTROL) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
                 return backslash + 2;
             }
             switch (backslash[2]) {
                 case '\\':
-                    return unescape(dest, dest_length, backslash + 2, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
+                    return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
                 case '?':
                     if (write_to_str) {
                         dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -318,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                     return backslash + 3;
                 default: {
                     if (!char_is_ascii_printable(backslash[2])) {
-                        yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
+                        yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
                         return backslash + 2;
                     }
@@ -332,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
         // \C-?         delete, ASCII 7Fh (DEL)
         case 'C':
             if (backslash + 3 >= end) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
                 return end;
             }
             if (flags & YP_UNESCAPE_FLAG_CONTROL) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
                 return backslash + 2;
             }
             if (backslash[2] != '-') {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
                 return backslash + 2;
             }
             switch (backslash[3]) {
                 case '\\':
-                    return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
+                    return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
                 case '?':
                     if (write_to_str) {
                         dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
@@ -356,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                     return backslash + 4;
                 default:
                     if (!char_is_ascii_printable(backslash[3])) {
-                        yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
+                        yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
                         return backslash + 2;
                     }
@@ -370,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
         // \M-x         meta character, where x is an ASCII printable character
         case 'M': {
             if (backslash + 3 >= end) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
                 return end;
             }
             if (flags & YP_UNESCAPE_FLAG_META) {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
                 return backslash + 2;
             }
             if (backslash[2] != '-') {
-                yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
+                yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
                 return backslash + 2;
             }
             if (backslash[3] == '\\') {
-                return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_META, write_to_str);
+                return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
             }
             if (char_is_ascii_printable(backslash[3])) {
@@ -395,17 +393,29 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
                 return backslash + 4;
             }
-            yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
+            yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
             return backslash + 3;
         }
+        // \n
+        case '\n':
+            return backslash + 2;
+        // \r
+        case '\r':
+            if (backslash + 2 < end && backslash[2] == '\n') {
+                return backslash + 3;
+            }
+        /* fallthrough */
         // In this case we're escaping something that doesn't need escaping.
-        default:
-            {
-                if (write_to_str) {
-                    dest[(*dest_length)++] = backslash[1];
-                }
-                return backslash + 2;
+        default: {
+            size_t width = yp_char_width(parser, backslash + 1, end);
+            if (write_to_str) {
+                memcpy(dest + *dest_length, backslash + 1, width);
+                *dest_length += width;
             }
+            return backslash + 1 + width;
+        }
     }
 }
@@ -438,26 +448,24 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
 // \c? or \C-?    delete, ASCII 7Fh (DEL)
 //
 YP_EXPORTED_FUNCTION void
-yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t length, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
+yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
     if (unescape_type == YP_UNESCAPE_NONE) {
         // If we're not unescaping then we can reference the source directly.
-        yp_string_shared_init(string, value, value + length);
         return;
     }
-    const char *backslash = yp_memchr(value, '\\', length, parser->encoding_changed, &parser->encoding);
+    const char *backslash = yp_memchr(string->source, '\\', string->length, parser->encoding_changed, &parser->encoding);
     if (backslash == NULL) {
         // Here there are no escapes, so we can reference the source directly.
-        yp_string_shared_init(string, value, value + length);
         return;
     }
     // Here we have found an escape character, so we need to handle all escapes
     // within the string.
-    char *allocated = malloc(length);
+    char *allocated = malloc(string->length);
     if (allocated == NULL) {
-        yp_diagnostic_list_append(error_list, value, value + length, "Failed to allocate memory for unescaping.");
+        yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
         return;
     }
@@ -468,13 +476,13 @@ yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t len
     // This is the current position in the source string that we're looking at.
     // It's going to move along behind the backslash so that we can copy each
     // segment of the string that doesn't contain an escape.
-    const char *cursor = value;
-    const char *end = value + length;
+    const char *cursor = string->source;
+    const char *end = string->source + string->length;
     // For each escape found in the source string, we will handle it and update
     // the moving cursor->backslash window.
     while (backslash != NULL && backslash + 1 < end) {
-        assert(dest_length < length);
+        assert(dest_length < string->length);
         // This is the size of the segment of the string from the previous escape
         // or the start of the string to the current escape.
@@ -502,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t len
                 // This is the only type of unescaping left. In this case we need to
                 // handle all of the different unescapes.
                 assert(unescape_type == YP_UNESCAPE_ALL);
-                cursor = unescape(dest, &dest_length, backslash, end, error_list, YP_UNESCAPE_FLAG_NONE, true);
+                cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
                 break;
         }
@@ -520,36 +528,21 @@ yp_unescape_manipulate_string(yp_parser_t *parser, const char *value, size_t len
         cursor = end;
     }
+    // If the string was already allocated, then we need to free that memory
+    // here. That's because we're about to override it with the escaped string.
+    yp_string_free(string);
     // We also need to update the length at the end. This is because every escape
     // reduces the length of the final string, and we don't want garbage at the
     // end.
     yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
 }
-YP_EXPORTED_FUNCTION bool
-yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
-    bool success;
-    yp_list_t error_list;
-    yp_list_init(&error_list);
-    yp_parser_t parser;
-    yp_parser_init(&parser, start, length, "");
-    yp_unescape_manipulate_string(&parser, start, length, result, unescape_type, &error_list);
-    success = yp_list_empty_p(&error_list);
-    yp_list_free(&error_list);
-    yp_parser_free(&parser);
-    return success;
-}
 // This function is similar to yp_unescape_manipulate_string, except it doesn't
 // actually perform any string manipulations. Instead, it calculates how long
 // the unescaped character is, and returns that value
-YP_EXPORTED_FUNCTION size_t
-yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list) {
+size_t
+yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
     assert(unescape_type != YP_UNESCAPE_NONE);
     switch (backslash[1]) {
@@ -557,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
         case '\'':
             return 2;
         default: {
-            if (unescape_type == YP_UNESCAPE_MINIMAL) return 2;
+            if (unescape_type == YP_UNESCAPE_MINIMAL) {
+                return 1 + yp_char_width(parser, backslash + 1, parser->end);
+            }
             // This is the only type of unescaping left. In this case we need to
             // handle all of the different unescapes.
@@ -567,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
             if (expect_single_codepoint)
                 flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
-            const char *cursor = unescape(NULL, 0, backslash, end, error_list, flags, false);
+            const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
             assert(cursor > backslash);
             return (size_t) (cursor - backslash);
         }
     }
 }
+// This is one of the main entry points into the extension. It accepts a source
+// string, a type of unescaping, and a pointer to a result string. It returns a
+// boolean indicating whether or not the unescaping was successful.
+YP_EXPORTED_FUNCTION bool
+yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
+    yp_parser_t parser;
+    yp_parser_init(&parser, start, length, NULL);
+    yp_string_shared_init(result, start, start + length);
+    yp_unescape_manipulate_string(&parser, result, unescape_type);
+    bool success = yp_list_empty_p(&parser.error_list);
+    yp_parser_free(&parser);
+    return success;
+}

data/src/util/yp_buffer.c CHANGED Viewed

@@ -2,6 +2,12 @@
 #define YP_BUFFER_INITIAL_SIZE 1024
+// Return the size of the yp_buffer_t struct.
+size_t
+yp_buffer_sizeof(void) {
+    return sizeof(yp_buffer_t);
+}
 // Initialize a yp_buffer_t with its default values.
 bool
 yp_buffer_init(yp_buffer_t *buffer) {
@@ -12,6 +18,18 @@ yp_buffer_init(yp_buffer_t *buffer) {
     return buffer->value != NULL;
 }
+// Return the value of the buffer.
+char *
+yp_buffer_value(yp_buffer_t *buffer) {
+    return buffer->value;
+}
+// Return the length of the buffer.
+size_t
+yp_buffer_length(yp_buffer_t *buffer) {
+    return buffer->length;
+}
 // Append the given amount of space to the buffer.
 static inline void
 yp_buffer_append_length(yp_buffer_t *buffer, size_t length) {

data/src/util/yp_list.c CHANGED Viewed

@@ -1,28 +1,15 @@
 #include "yarp/util/yp_list.h"
-// Initializes a new list.
-YP_EXPORTED_FUNCTION void
-yp_list_init(yp_list_t *list) {
-    *list = (yp_list_t) { .head = NULL, .tail = NULL };
-}
 // Returns true if the given list is empty.
 YP_EXPORTED_FUNCTION bool
 yp_list_empty_p(yp_list_t *list) {
     return list->head == NULL;
 }
-YP_EXPORTED_FUNCTION uint32_t
+// Returns the size of the list.
+YP_EXPORTED_FUNCTION size_t
 yp_list_size(yp_list_t *list) {
-    yp_list_node_t *node = list->head;
-    uint32_t length = 0;
-    while (node != NULL) {
-        length++;
-        node = node->next;
-    }
-    return length;
+    return list->size;
 }
 // Append a node to the given list.
@@ -33,7 +20,9 @@ yp_list_append(yp_list_t *list, yp_list_node_t *node) {
     } else {
         list->tail->next = node;
     }
     list->tail = node;
+    list->size++;
 }
 // Deallocate the internal state of the given list.
@@ -47,4 +36,6 @@ yp_list_free(yp_list_t *list) {
         free(node);
         node = next;
     }
+    list->size = 0;
 }

data/src/util/yp_newline_list.c CHANGED Viewed

@@ -30,6 +30,7 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
         if (list->offsets == NULL) return false;
     }
+    assert(*cursor == '\n');
     assert(cursor >= list->start);
     size_t newline_offset = (size_t) (cursor - list->start + 1);
     assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
@@ -38,6 +39,15 @@ yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
     return true;
 }
+// Conditionally append a new offset to the newline list, if the value passed in is a newline.
+bool
+yp_newline_list_check_append(yp_newline_list_t *list, const char *cursor) {
+    if (*cursor != '\n') {
+        return true;
+    }
+    return yp_newline_list_append(list, cursor);
+}
 // Returns the line and column of the given offset, assuming we don't have any
 // information about the previous index that we found.
 static yp_line_column_t

data/src/util/yp_state_stack.c CHANGED Viewed

@@ -1,11 +1,5 @@
 #include "yarp/util/yp_state_stack.h"
-// Initializes the state stack to an empty stack.
-void
-yp_state_stack_init(yp_state_stack_t *stack) {
-    *stack = 0;
-}
 // Pushes a value onto the stack.
 void
 yp_state_stack_push(yp_state_stack_t *stack, bool value) {