RubyGems - liquid-c - Versions diffs - 4.0.1 → 4.2.0 - Mend

liquid-c 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (71) hide show

checksums.yaml +4 -4
data/.github/workflows/cla.yml +23 -0
data/.github/workflows/liquid.yml +36 -11
data/.gitignore +4 -0
data/.rubocop.yml +14 -0
data/Gemfile +15 -5
data/README.md +32 -8
data/Rakefile +12 -63
data/ext/liquid_c/block.c +493 -60
data/ext/liquid_c/block.h +28 -2
data/ext/liquid_c/c_buffer.c +42 -0
data/ext/liquid_c/c_buffer.h +76 -0
data/ext/liquid_c/context.c +233 -0
data/ext/liquid_c/context.h +70 -0
data/ext/liquid_c/document_body.c +97 -0
data/ext/liquid_c/document_body.h +59 -0
data/ext/liquid_c/expression.c +116 -0
data/ext/liquid_c/expression.h +24 -0
data/ext/liquid_c/extconf.rb +21 -9
data/ext/liquid_c/intutil.h +22 -0
data/ext/liquid_c/lexer.c +39 -3
data/ext/liquid_c/lexer.h +18 -3
data/ext/liquid_c/liquid.c +76 -6
data/ext/liquid_c/liquid.h +24 -1
data/ext/liquid_c/liquid_vm.c +618 -0
data/ext/liquid_c/liquid_vm.h +25 -0
data/ext/liquid_c/parse_context.c +76 -0
data/ext/liquid_c/parse_context.h +13 -0
data/ext/liquid_c/parser.c +153 -65
data/ext/liquid_c/parser.h +4 -2
data/ext/liquid_c/raw.c +136 -0
data/ext/liquid_c/raw.h +6 -0
data/ext/liquid_c/resource_limits.c +279 -0
data/ext/liquid_c/resource_limits.h +23 -0
data/ext/liquid_c/stringutil.h +44 -0
data/ext/liquid_c/tokenizer.c +149 -35
data/ext/liquid_c/tokenizer.h +20 -9
data/ext/liquid_c/usage.c +18 -0
data/ext/liquid_c/usage.h +9 -0
data/ext/liquid_c/variable.c +196 -20
data/ext/liquid_c/variable.h +18 -1
data/ext/liquid_c/variable_lookup.c +44 -0
data/ext/liquid_c/variable_lookup.h +8 -0
data/ext/liquid_c/vm_assembler.c +491 -0
data/ext/liquid_c/vm_assembler.h +240 -0
data/ext/liquid_c/vm_assembler_pool.c +99 -0
data/ext/liquid_c/vm_assembler_pool.h +26 -0
data/lib/liquid/c/compile_ext.rb +44 -0
data/lib/liquid/c/version.rb +3 -1
data/lib/liquid/c.rb +226 -48
data/liquid-c.gemspec +16 -10
data/performance/c_profile.rb +23 -0
data/performance.rb +6 -4
data/rakelib/compile.rake +15 -0
data/rakelib/integration_test.rake +43 -0
data/rakelib/performance.rake +43 -0
data/rakelib/rubocop.rake +6 -0
data/rakelib/unit_test.rake +14 -0
data/test/integration_test.rb +11 -0
data/test/liquid_test_helper.rb +21 -0
data/test/test_helper.rb +21 -2
data/test/unit/block_test.rb +137 -0
data/test/unit/context_test.rb +85 -0
data/test/unit/expression_test.rb +191 -0
data/test/unit/gc_stress_test.rb +28 -0
data/test/unit/raw_test.rb +93 -0
data/test/unit/resource_limits_test.rb +50 -0
data/test/unit/tokenizer_test.rb +90 -20
data/test/unit/variable_test.rb +279 -60
metadata +60 -11
data/test/liquid_test.rb +0 -11

data/ext/liquid_c/parse_context.h ADDED Viewed

@@ -0,0 +1,13 @@
+#ifndef LIQUID_PARSE_CONTEXT_H
+#define LIQUID_PARSE_CONTEXT_H
+#include <ruby.h>
+#include <stdbool.h>
+#include "vm_assembler_pool.h"
+void liquid_define_parse_context(void);
+VALUE parse_context_get_document_body(VALUE self);
+vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self);
+#endif

data/ext/liquid_c/parser.c CHANGED Viewed

@@ -2,8 +2,8 @@
 #include "parser.h"
 #include "lexer.h"
-static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals;
-static ID idToI, idEvaluate;
+static VALUE empty_string;
+static ID id_to_i, idEvaluate;
 void init_parser(parser_t *p, const char *str, const char *end)
 {
@@ -67,79 +67,145 @@ static VALUE parse_number(parser_t *p)
     return out;
 }
-static VALUE parse_range(parser_t *p)
+__attribute__((noreturn)) static void raise_invalid_expression_type(const char *expr, int expr_len)
 {
+    rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Invalid expression type '%.*s' in range expression", expr_len, expr);
+}
+static VALUE try_parse_constant_range(parser_t *p)
+{
+    parser_t saved_state = *p;
     parser_must_consume(p, TOKEN_OPEN_ROUND);
-    VALUE args[2];
-    args[0] = parse_expression(p);
+    const char *begin_str = p->cur.val;
+    VALUE begin = try_parse_constant_expression(p);
+    const char *begin_str_end = p->cur.val;
+    if (begin == Qundef) {
+        *p = saved_state;
+        return Qundef;
+    }
     parser_must_consume(p, TOKEN_DOTDOT);
-    args[1] = parse_expression(p);
+    const char *end_str = p->cur.val;
+    VALUE end = try_parse_constant_expression(p);
+    const char *end_str_end = p->cur.val;
+    if (end == Qundef) {
+        *p = saved_state;
+        return Qundef;
+    }
     parser_must_consume(p, TOKEN_CLOSE_ROUND);
-    if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
-        return rb_class_new_instance(2, args, cLiquidRangeLookup);
+    begin = rb_check_funcall(begin, id_to_i, 0, NULL);
+    if (begin == Qundef) raise_invalid_expression_type(begin_str, (int)(begin_str_end - begin_str));
+    end = rb_check_funcall(end, id_to_i, 0, NULL);
+    if (end == Qundef) raise_invalid_expression_type(end_str, (int)(end_str_end - end_str));
-    return rb_class_new_instance(2, args, cRange);
+    bool exclude_end = false;
+    return rb_range_new(begin, end, exclude_end);
 }
-static VALUE parse_variable(parser_t *p)
+static void parse_and_compile_range(parser_t *p, vm_assembler_t *code)
 {
-    VALUE name, lookups = rb_ary_new(), lookup;
-    unsigned long long command_flags = 0;
+    VALUE const_range = try_parse_constant_range(p);
+    if (const_range != Qundef) {
+        vm_assembler_add_push_const(code, const_range);
+        return;
+    }
+    parser_must_consume(p, TOKEN_OPEN_ROUND);
+    parse_and_compile_expression(p, code);
+    parser_must_consume(p, TOKEN_DOTDOT);
+    parse_and_compile_expression(p, code);
+    parser_must_consume(p, TOKEN_CLOSE_ROUND);
+    vm_assembler_add_new_int_range(code);
+}
+static void parse_and_compile_variable_lookup(parser_t *p, vm_assembler_t *code)
+{
     if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
-        name = parse_expression(p);
+        parse_and_compile_expression(p, code);
         parser_must_consume(p, TOKEN_CLOSE_SQUARE);
+        vm_assembler_add_find_variable(code);
     } else {
-        name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
+        VALUE name = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
+        vm_assembler_add_find_static_variable(code, name);
     }
     while (true) {
         if (p->cur.type == TOKEN_OPEN_SQUARE) {
             parser_consume_any(p);
-            lookup = parse_expression(p);
+            parse_and_compile_expression(p, code);
             parser_must_consume(p, TOKEN_CLOSE_SQUARE);
-            rb_ary_push(lookups, lookup);
+            vm_assembler_add_lookup_key(code);
         } else if (p->cur.type == TOKEN_DOT) {
-            int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
-            lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
-            if (has_space_affix)
-                rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
-            if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
-                command_flags |= 1 << RARRAY_LEN(lookups);
+            parser_consume_any(p);
+            VALUE key = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
-            rb_ary_push(lookups, lookup);
+            if (rstring_eq(key, "size") || rstring_eq(key, "first") || rstring_eq(key, "last"))
+                vm_assembler_add_lookup_command(code, key);
+            else
+                vm_assembler_add_lookup_const_key(code, key);
         } else {
             break;
         }
     }
+}
-    if (RARRAY_LEN(lookups) == 0) {
-        VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef);
-        if (literal != Qundef) return literal;
+static VALUE try_parse_literal(parser_t *p)
+{
+    if (p->next.type == TOKEN_DOT || p->next.type == TOKEN_OPEN_SQUARE)
+        return Qundef;
+    const char *str = p->cur.val;
+    long size = p->cur.val_end - str;
+    VALUE result = Qundef;
+    switch (size) {
+        case 3:
+            if (memcmp(str, "nil", size) == 0)
+                result = Qnil;
+            break;
+        case 4:
+            if (memcmp(str, "null", size) == 0) {
+                result = Qnil;
+            } else if (memcmp(str, "true", size) == 0) {
+                result = Qtrue;
+            }
+            break;
+        case 5:
+            switch (*str) {
+                case 'f':
+                    if (memcmp(str, "false", size) == 0)
+                        result = Qfalse;
+                    break;
+                case 'b':
+                    if (memcmp(str, "blank", size) == 0)
+                        result = empty_string;
+                    break;
+                case 'e':
+                    if (memcmp(str, "empty", size) == 0)
+                        result = empty_string;
+                    break;
+            }
+            break;
     }
-    VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
-    return rb_class_new_instance(4, args, cLiquidVariableLookup);
+    if (result != Qundef)
+        parser_consume_any(p);
+    return result;
 }
-VALUE parse_expression(parser_t *p)
+VALUE try_parse_constant_expression(parser_t *p)
 {
     switch (p->cur.type) {
         case TOKEN_IDENTIFIER:
-        case TOKEN_OPEN_SQUARE:
-            return parse_variable(p);
+            return try_parse_literal(p);
         case TOKEN_NUMBER:
             return parse_number(p);
         case TOKEN_OPEN_ROUND:
-            return parse_range(p);
+            return try_parse_constant_range(p);
         case TOKEN_STRING:
         {
@@ -149,47 +215,69 @@ VALUE parse_expression(parser_t *p)
             return token_to_rstr(token);
         }
     }
-    if (p->cur.type == TOKEN_EOS) {
-        rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
-    } else {
-        rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
-                 symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
-    }
-    return Qnil;
+    return Qundef;
 }
-static VALUE rb_parse_expression(VALUE self, VALUE markup)
+static void parse_and_compile_number(parser_t *p, vm_assembler_t *code)
 {
-    StringValue(markup);
-    char *start = RSTRING_PTR(markup);
+    VALUE num = parse_number(p);
+    if (RB_FIXNUM_P(num))
+        vm_assembler_add_push_fixnum(code, num);
+    else
+        vm_assembler_add_push_const(code, num);
+    return;
+}
-    parser_t p;
-    init_parser(&p, start, start + RSTRING_LEN(markup));
+void parse_and_compile_expression(parser_t *p, vm_assembler_t *code)
+{
+    switch (p->cur.type) {
+        case TOKEN_IDENTIFIER:
+        {
+            VALUE literal = try_parse_literal(p);
+            if (literal != Qundef) {
+                vm_assembler_add_push_literal(code, literal);
+                return;
+            }
+             __attribute__ ((fallthrough));
+        }
+        case TOKEN_OPEN_SQUARE:
+            parse_and_compile_variable_lookup(p, code);
+            return;
-    if (p.cur.type == TOKEN_EOS)
-        return Qnil;
+        case TOKEN_NUMBER:
+            parse_and_compile_number(p, code);
+            return;
-    VALUE expr = parse_expression(&p);
+        case TOKEN_OPEN_ROUND:
+            parse_and_compile_range(p, code);
+            return;
-    if (p.cur.type != TOKEN_EOS)
-        rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
+        case TOKEN_STRING:
+        {
+            lexer_token_t token = parser_consume_any(p);
+            token.val++;
+            token.val_end--;
+            VALUE str = token_to_rstr(token);
+            vm_assembler_add_push_const(code, str);
+            return;
+        }
+    }
-    return expr;
+    if (p->cur.type == TOKEN_EOS) {
+        rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
+    } else {
+        rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
+                 symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
+    }
 }
-void init_liquid_parser(void)
+void liquid_define_parser(void)
 {
-    idToI = rb_intern("to_i");
+    id_to_i = rb_intern("to_i");
     idEvaluate = rb_intern("evaluate");
-    cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
-    cRange = rb_const_get(rb_cObject, rb_intern("Range"));
-    cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
-    VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
-    rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
-    vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS"));
+    empty_string = rb_utf8_str_new_literal("");
+    rb_global_variable(&empty_string);
 }

data/ext/liquid_c/parser.h CHANGED Viewed

@@ -2,6 +2,7 @@
 #define LIQUID_PARSER_H
 #include "lexer.h"
+#include "vm_assembler.h"
 typedef struct parser {
     lexer_token_t cur, next;
@@ -14,9 +15,10 @@ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
 lexer_token_t parser_consume(parser_t *parser, unsigned char type);
 lexer_token_t parser_consume_any(parser_t *parser);
-VALUE parse_expression(parser_t *parser);
+void parse_and_compile_expression(parser_t *p, vm_assembler_t *code);
+VALUE try_parse_constant_expression(parser_t *p);
-void init_liquid_parser(void);
+void liquid_define_parser(void);
 #endif

data/ext/liquid_c/raw.c ADDED Viewed

@@ -0,0 +1,136 @@
+#include "liquid.h"
+#include "raw.h"
+#include "stringutil.h"
+#include "tokenizer.h"
+static VALUE id_block_name, id_raise_tag_never_closed, id_block_delimiter, id_ivar_body;
+static VALUE cLiquidRaw;
+struct full_token_possibly_invalid_t {
+    long body_len;
+    const char *delimiter_start;
+    long delimiter_len;
+};
+static bool match_full_token_possibly_invalid(token_t *token, struct full_token_possibly_invalid_t *match)
+{
+    const char *str = token->str_full;
+    long len = token->len_full;
+    match->body_len = 0;
+    match->delimiter_start = NULL;
+    match->delimiter_len = 0;
+    if (len < 5) return false; // Must be at least 5 characters: \{%\w%\}
+    if (str[len - 1] != '}' || str[len - 2] != '%') return false;
+    const char *curr_delimiter_start;
+    long curr_delimiter_len = 0;
+    bool is_last_char_whitespace = true;
+    // Search from the end of the string.
+    // The token could have a part of the body like this:
+    // {% endraw {% endraw %}
+    // In this case, we need to return body_len to 10 to preserve the body content.
+    for (long i = len - 3; i > 1; i--) {
+        char c = str[i];
+        // match \s
+        bool is_whitespace = rb_isspace(c);
+        if (is_word_char(c)) {
+            curr_delimiter_start = str + i;
+            if (is_last_char_whitespace) {
+                // start a new delimiter match
+                curr_delimiter_len = 1;
+            } else {
+                curr_delimiter_len++;
+            }
+        } else if (!is_word_char(c) && !is_whitespace) {
+            curr_delimiter_start = NULL;
+            curr_delimiter_len = 0;
+        }
+        is_last_char_whitespace = is_whitespace;
+        if (curr_delimiter_len > 0) {
+            // match start of a tag which is {% or {%-
+            if (
+                (str[i - 1] == '%' && str[i - 2] == '{') ||
+                (i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
+            ) {
+                match->delimiter_start = curr_delimiter_start;
+                match->delimiter_len = curr_delimiter_len;
+                if (str[i - 1] == '-') {
+                    match->body_len = i - 3;
+                } else {
+                    match->body_len = i - 2;
+                }
+                return true;
+            }
+        }
+    }
+    return false;
+}
+static VALUE raw_parse_method(VALUE self, VALUE tokens)
+{
+    tokenizer_t *tokenizer;
+    Tokenizer_Get_Struct(tokens, tokenizer);
+    token_t token;
+    struct full_token_possibly_invalid_t match;
+    VALUE block_delimiter = rb_funcall(self, id_block_delimiter, 0);
+    Check_Type(block_delimiter, T_STRING);
+    char *block_delimiter_str = RSTRING_PTR(block_delimiter);
+    long block_delimiter_len = RSTRING_LEN(block_delimiter);
+    const char *body = NULL;
+    long body_len = 0;
+    while (true) {
+        tokenizer_next(tokenizer, &token);
+        if (!token.type) break;
+        if (body == NULL) {
+            body = token.str_full;
+        }
+        if (match_full_token_possibly_invalid(&token, &match)
+                && match.delimiter_len == block_delimiter_len
+                && memcmp(match.delimiter_start, block_delimiter_str, block_delimiter_len) == 0) {
+            body_len += match.body_len;
+            VALUE body_str = rb_enc_str_new(body, body_len, utf8_encoding);
+            rb_ivar_set(self, id_ivar_body, body_str);
+            if (RBASIC_CLASS(self) == cLiquidRaw) {
+                tokenizer->raw_tag_body = RSTRING_PTR(body_str);
+                tokenizer->raw_tag_body_len = (unsigned int)body_len;
+            }
+            return Qnil;
+        }
+        body_len += token.len_full;
+    }
+    rb_funcall(self, id_raise_tag_never_closed, 1, rb_funcall(self, id_block_name, 0));
+    return Qnil;
+}
+void liquid_define_raw(void)
+{
+    id_block_name = rb_intern("block_name");
+    id_raise_tag_never_closed = rb_intern("raise_tag_never_closed");
+    id_block_delimiter = rb_intern("block_delimiter");
+    id_ivar_body = rb_intern("@body");
+    cLiquidRaw = rb_const_get(mLiquid, rb_intern("Raw"));
+    rb_define_method(cLiquidRaw, "c_parse", raw_parse_method, 1);
+}

data/ext/liquid_c/raw.h ADDED Viewed

@@ -0,0 +1,6 @@
+#ifndef LIQUID_RAW_H
+#define LIQUID_RAW_H
+void liquid_define_raw(void);
+#endif