liquid-c 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/cla.yml +23 -0
  3. data/.github/workflows/liquid.yml +36 -11
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +14 -0
  6. data/Gemfile +15 -5
  7. data/README.md +32 -8
  8. data/Rakefile +12 -63
  9. data/ext/liquid_c/block.c +493 -60
  10. data/ext/liquid_c/block.h +28 -2
  11. data/ext/liquid_c/c_buffer.c +42 -0
  12. data/ext/liquid_c/c_buffer.h +76 -0
  13. data/ext/liquid_c/context.c +233 -0
  14. data/ext/liquid_c/context.h +70 -0
  15. data/ext/liquid_c/document_body.c +97 -0
  16. data/ext/liquid_c/document_body.h +59 -0
  17. data/ext/liquid_c/expression.c +116 -0
  18. data/ext/liquid_c/expression.h +24 -0
  19. data/ext/liquid_c/extconf.rb +21 -9
  20. data/ext/liquid_c/intutil.h +22 -0
  21. data/ext/liquid_c/lexer.c +39 -3
  22. data/ext/liquid_c/lexer.h +18 -3
  23. data/ext/liquid_c/liquid.c +76 -6
  24. data/ext/liquid_c/liquid.h +24 -1
  25. data/ext/liquid_c/liquid_vm.c +618 -0
  26. data/ext/liquid_c/liquid_vm.h +25 -0
  27. data/ext/liquid_c/parse_context.c +76 -0
  28. data/ext/liquid_c/parse_context.h +13 -0
  29. data/ext/liquid_c/parser.c +153 -65
  30. data/ext/liquid_c/parser.h +4 -2
  31. data/ext/liquid_c/raw.c +136 -0
  32. data/ext/liquid_c/raw.h +6 -0
  33. data/ext/liquid_c/resource_limits.c +279 -0
  34. data/ext/liquid_c/resource_limits.h +23 -0
  35. data/ext/liquid_c/stringutil.h +44 -0
  36. data/ext/liquid_c/tokenizer.c +149 -35
  37. data/ext/liquid_c/tokenizer.h +20 -9
  38. data/ext/liquid_c/usage.c +18 -0
  39. data/ext/liquid_c/usage.h +9 -0
  40. data/ext/liquid_c/variable.c +196 -20
  41. data/ext/liquid_c/variable.h +18 -1
  42. data/ext/liquid_c/variable_lookup.c +44 -0
  43. data/ext/liquid_c/variable_lookup.h +8 -0
  44. data/ext/liquid_c/vm_assembler.c +491 -0
  45. data/ext/liquid_c/vm_assembler.h +240 -0
  46. data/ext/liquid_c/vm_assembler_pool.c +99 -0
  47. data/ext/liquid_c/vm_assembler_pool.h +26 -0
  48. data/lib/liquid/c/compile_ext.rb +44 -0
  49. data/lib/liquid/c/version.rb +3 -1
  50. data/lib/liquid/c.rb +226 -48
  51. data/liquid-c.gemspec +16 -10
  52. data/performance/c_profile.rb +23 -0
  53. data/performance.rb +6 -4
  54. data/rakelib/compile.rake +15 -0
  55. data/rakelib/integration_test.rake +43 -0
  56. data/rakelib/performance.rake +43 -0
  57. data/rakelib/rubocop.rake +6 -0
  58. data/rakelib/unit_test.rake +14 -0
  59. data/test/integration_test.rb +11 -0
  60. data/test/liquid_test_helper.rb +21 -0
  61. data/test/test_helper.rb +21 -2
  62. data/test/unit/block_test.rb +137 -0
  63. data/test/unit/context_test.rb +85 -0
  64. data/test/unit/expression_test.rb +191 -0
  65. data/test/unit/gc_stress_test.rb +28 -0
  66. data/test/unit/raw_test.rb +93 -0
  67. data/test/unit/resource_limits_test.rb +50 -0
  68. data/test/unit/tokenizer_test.rb +90 -20
  69. data/test/unit/variable_test.rb +279 -60
  70. metadata +60 -11
  71. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,24 @@
1
+ #if !defined(LIQUID_EXPRESSION_H)
2
+ #define LIQUID_EXPRESSION_H
3
+
4
+ #include "vm_assembler.h"
5
+ #include "parser.h"
6
+
7
+ extern VALUE cLiquidCExpression;
8
+ extern const rb_data_type_t expression_data_type;
9
+
10
+ typedef struct expression {
11
+ vm_assembler_t code;
12
+ } expression_t;
13
+
14
+ extern const rb_data_type_t expression_data_type;
15
+ #define Expression_Get_Struct(obj, sval) TypedData_Get_Struct(obj, expression_t, &expression_data_type, sval)
16
+
17
+ void liquid_define_expression(void);
18
+
19
+ VALUE expression_new(VALUE klass, expression_t **expression_ptr);
20
+ VALUE expression_evaluate(VALUE self, VALUE context);
21
+ VALUE internal_expression_evaluate(expression_t *expression, VALUE context);
22
+
23
+ #endif
24
+
@@ -1,14 +1,26 @@
1
- require 'mkmf'
2
- $CFLAGS << ' -std=c99 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers'
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ $CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
5
+ append_cflags("-fvisibility=hidden")
3
6
  # In Ruby 2.6 and earlier, the Ruby headers did not have struct timespec defined
4
- valid_headers = RbConfig::CONFIG['host_os'] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
5
- pedantic = !ENV['LIQUID_C_PEDANTIC'].to_s.empty?
7
+ valid_headers = RbConfig::CONFIG["host_os"] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
8
+ pedantic = !ENV["LIQUID_C_PEDANTIC"].to_s.empty?
6
9
  if pedantic && valid_headers
7
- $CFLAGS << ' -Werror'
10
+ $CFLAGS << " -Werror"
8
11
  end
9
- compiler = RbConfig::MAKEFILE_CONFIG['CC']
10
- if ENV['DEBUG'] == 'true' && compiler =~ /gcc|g\+\+/
11
- $CFLAGS << ' -fbounds-check'
12
+ if ENV["DEBUG"] == "true"
13
+ append_cflags("-fbounds-check")
14
+ CONFIG["optflags"] = " -O0"
15
+ # Hack to enable assertions since ruby/assert.h disables assertions unless
16
+ # Ruby was compiled with -DRUBY_DEBUG.
17
+ # https://github.com/ruby/ruby/blob/9e678cdbd054f78576a8f21b3f97cccc395ade22/include/ruby/assert.h#L36-L41
18
+ $CFLAGS << " -DRUBY_DEBUG"
19
+ else
20
+ $CFLAGS << " -DNDEBUG"
12
21
  end
13
- $warnflags.gsub!(/-Wdeclaration-after-statement/, "") if $warnflags
22
+
23
+ have_func "rb_hash_bulk_insert"
24
+
25
+ $warnflags&.gsub!(/-Wdeclaration-after-statement/, "")
14
26
  create_makefile("liquid_c")
@@ -0,0 +1,22 @@
1
+ #ifndef LIQUID_INTUTIL_H
2
+ #define LIQUID_INTUTIL_H
3
+
4
+ #include <stdint.h>
5
+
6
+ static inline unsigned int bytes_to_uint24(const uint8_t *bytes)
7
+ {
8
+ return (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
9
+ }
10
+
11
+ static inline void uint24_to_bytes(unsigned int num, uint8_t *bytes)
12
+ {
13
+ assert(num < (1 << 24));
14
+
15
+ bytes[0] = num >> 16;
16
+ bytes[1] = num >> 8;
17
+ bytes[2] = num;
18
+
19
+ assert(bytes_to_uint24(bytes) == num);
20
+ }
21
+
22
+ #endif
data/ext/liquid_c/lexer.c CHANGED
@@ -1,5 +1,6 @@
1
1
  #include "liquid.h"
2
2
  #include "lexer.h"
3
+ #include "usage.h"
3
4
  #include <stdio.h>
4
5
 
5
6
  const char *symbol_names[TOKEN_END] = {
@@ -59,8 +60,6 @@ inline static const char *scan_past(const char *cur, const char *end, char targe
59
60
  const char *tok_end = str + (n); \
60
61
  token->type = (t); \
61
62
  token->val = str; \
62
- if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \
63
- if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \
64
63
  return (token->val_end = tok_end); \
65
64
  }
66
65
 
@@ -109,6 +108,11 @@ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
109
108
  }
110
109
  }
111
110
 
111
+ // Instrument for bug: https://github.com/Shopify/liquid-c/pull/120
112
+ if (c == '-' && str + 1 < end && str[1] == '.') {
113
+ usage_increment("liquid_c_negative_float_without_integer");
114
+ }
115
+
112
116
  if (ISDIGIT(c) || c == '-') {
113
117
  int has_dot = 0;
114
118
  cur = str;
@@ -140,7 +144,39 @@ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
140
144
 
141
145
  if (is_special(c)) RETURN_TOKEN(c, 1);
142
146
 
143
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
147
+ long remaining_str_len = end - str;
148
+ int char_len = 0;
149
+
150
+ // read multibyte UTF-8 character
151
+ if ((c & 0x80) == 0) {
152
+ // 1-byte character
153
+ char_len = 1;
154
+ } else if ((c & 0xE0) == 0xC0) {
155
+ // 2-byte character
156
+ if (remaining_str_len >= 2) {
157
+ char_len = 2;
158
+ }
159
+ } else if ((c & 0xF0) == 0xE0) {
160
+ // 3-byte character
161
+ if (remaining_str_len >= 3) {
162
+ char_len = 3;
163
+ }
164
+ } else if ((c & 0xF8) == 0xF0) {
165
+ // 4-byte character
166
+ if (remaining_str_len >= 4) {
167
+ char_len = 4;
168
+ }
169
+ } else {
170
+ // this should never happen
171
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
172
+ }
173
+
174
+ if (char_len > 0) {
175
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %.*s", char_len, str);
176
+ } else {
177
+ rb_raise(rb_eArgError, "invalid byte sequence in UTF-8");
178
+ }
179
+
144
180
  return NULL;
145
181
  }
146
182
 
data/ext/liquid_c/lexer.h CHANGED
@@ -24,9 +24,6 @@ enum lexer_token_type {
24
24
  TOKEN_END = 256
25
25
  };
26
26
 
27
- #define TOKEN_SPACE_PREFIX 0x1
28
- #define TOKEN_SPACE_SUFFIX 0x2
29
- #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX)
30
27
  #define TOKEN_FLOAT_NUMBER 0x4
31
28
 
32
29
  typedef struct lexer_token {
@@ -42,5 +39,23 @@ inline static VALUE token_to_rstr(lexer_token_t token) {
42
39
  return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding);
43
40
  }
44
41
 
42
+ inline static VALUE token_check_for_symbol(lexer_token_t token) {
43
+ return rb_check_symbol_cstr(token.val, token.val_end - token.val, utf8_encoding);
44
+ }
45
+
46
+ inline static VALUE token_to_rstr_leveraging_existing_symbol(lexer_token_t token) {
47
+ VALUE sym = token_check_for_symbol(token);
48
+ if (RB_LIKELY(sym != Qnil))
49
+ return rb_sym2str(sym);
50
+ return token_to_rstr(token);
51
+ }
52
+
53
+ inline static VALUE token_to_rsym(lexer_token_t token) {
54
+ VALUE sym = token_check_for_symbol(token);
55
+ if (RB_LIKELY(sym != Qnil))
56
+ return sym;
57
+ return rb_str_intern(token_to_rstr(token));
58
+ }
59
+
45
60
  #endif
46
61
 
@@ -3,23 +3,93 @@
3
3
  #include "variable.h"
4
4
  #include "lexer.h"
5
5
  #include "parser.h"
6
+ #include "raw.h"
7
+ #include "resource_limits.h"
8
+ #include "expression.h"
9
+ #include "document_body.h"
6
10
  #include "block.h"
11
+ #include "context.h"
12
+ #include "parse_context.h"
13
+ #include "variable_lookup.h"
14
+ #include "vm_assembler_pool.h"
15
+ #include "liquid_vm.h"
16
+ #include "usage.h"
17
+
18
+ ID id_evaluate;
19
+ ID id_to_liquid;
20
+ ID id_to_s;
21
+ ID id_call;
22
+ ID id_compile_evaluate;
23
+ ID id_ivar_line_number;
24
+
25
+ VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
26
+ VALUE cLiquidVariableLookup, cLiquidRangeLookup;
27
+ VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
7
28
 
8
- VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
9
29
  rb_encoding *utf8_encoding;
30
+ int utf8_encoding_index;
31
+
32
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *value_name)
33
+ {
34
+ rb_raise(rb_eEncCompatError, "non-UTF8 encoded %s (%"PRIsVALUE") not supported", value_name, rb_obj_encoding(string));
35
+ }
10
36
 
11
- void Init_liquid_c(void)
37
+ RUBY_FUNC_EXPORTED void Init_liquid_c(void)
12
38
  {
39
+ id_evaluate = rb_intern("evaluate");
40
+ id_to_liquid = rb_intern("to_liquid");
41
+ id_to_s = rb_intern("to_s");
42
+ id_call = rb_intern("call");
43
+ id_compile_evaluate = rb_intern("compile_evaluate");
44
+ id_ivar_line_number = rb_intern("@line_number");
45
+
13
46
  utf8_encoding = rb_utf8_encoding();
47
+ utf8_encoding_index = rb_enc_to_index(utf8_encoding);
48
+
14
49
  mLiquid = rb_define_module("Liquid");
50
+ rb_global_variable(&mLiquid);
51
+
15
52
  mLiquidC = rb_define_module_under(mLiquid, "C");
53
+ rb_global_variable(&mLiquidC);
54
+
55
+ cLiquidArgumentError = rb_const_get(mLiquid, rb_intern("ArgumentError"));
56
+ rb_global_variable(&cLiquidArgumentError);
57
+
16
58
  cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError"));
59
+ rb_global_variable(&cLiquidSyntaxError);
60
+
61
+ cMemoryError = rb_const_get(mLiquid, rb_intern("MemoryError"));
62
+ rb_global_variable(&cMemoryError);
63
+
17
64
  cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable"));
65
+ rb_global_variable(&cLiquidVariable);
66
+
18
67
  cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template"));
68
+ rb_global_variable(&cLiquidTemplate);
69
+
70
+ cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
71
+ rb_global_variable(&cLiquidBlockBody);
72
+
73
+ cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
74
+ rb_global_variable(&cLiquidVariableLookup);
75
+
76
+ cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
77
+ rb_global_variable(&cLiquidRangeLookup);
19
78
 
20
- init_liquid_tokenizer();
21
- init_liquid_parser();
22
- init_liquid_variable();
23
- init_liquid_block();
79
+ liquid_define_tokenizer();
80
+ liquid_define_parser();
81
+ liquid_define_raw();
82
+ liquid_define_resource_limits();
83
+ liquid_define_expression();
84
+ liquid_define_variable();
85
+ liquid_define_document_body();
86
+ liquid_define_block_body();
87
+ liquid_define_context();
88
+ liquid_define_parse_context();
89
+ liquid_define_variable_lookup();
90
+ liquid_define_vm_assembler_pool();
91
+ liquid_define_vm_assembler();
92
+ liquid_define_vm();
93
+ liquid_define_usage();
24
94
  }
25
95
 
@@ -5,8 +5,31 @@
5
5
  #include <ruby/encoding.h>
6
6
  #include <stdbool.h>
7
7
 
8
- extern VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
8
+ extern ID id_evaluate;
9
+ extern ID id_to_liquid;
10
+ extern ID id_to_s;
11
+ extern ID id_call;
12
+ extern ID id_compile_evaluate;
13
+ extern ID id_ivar_line_number;
14
+
15
+ extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
16
+ extern VALUE cLiquidVariableLookup, cLiquidRangeLookup;
17
+ extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
9
18
  extern rb_encoding *utf8_encoding;
19
+ extern int utf8_encoding_index;
20
+
21
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *string_name);
22
+
23
+ static inline void check_utf8_encoding(VALUE string, const char *string_name)
24
+ {
25
+ if (RB_UNLIKELY(RB_ENCODING_GET_INLINED(string) != utf8_encoding_index))
26
+ raise_non_utf8_encoding_error(string, string_name);
27
+ }
28
+
29
+ #ifndef RB_LIKELY
30
+ // RB_LIKELY added in Ruby 2.4
31
+ #define RB_LIKELY(x) (__builtin_expect(!!(x), 1))
32
+ #endif
10
33
 
11
34
  #endif
12
35