liquid-c 4.0.1 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/cla.yml +23 -0
  3. data/.github/workflows/liquid.yml +36 -11
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +14 -0
  6. data/Gemfile +15 -5
  7. data/README.md +32 -8
  8. data/Rakefile +12 -63
  9. data/ext/liquid_c/block.c +493 -60
  10. data/ext/liquid_c/block.h +28 -2
  11. data/ext/liquid_c/c_buffer.c +42 -0
  12. data/ext/liquid_c/c_buffer.h +76 -0
  13. data/ext/liquid_c/context.c +233 -0
  14. data/ext/liquid_c/context.h +70 -0
  15. data/ext/liquid_c/document_body.c +97 -0
  16. data/ext/liquid_c/document_body.h +59 -0
  17. data/ext/liquid_c/expression.c +116 -0
  18. data/ext/liquid_c/expression.h +24 -0
  19. data/ext/liquid_c/extconf.rb +21 -9
  20. data/ext/liquid_c/intutil.h +22 -0
  21. data/ext/liquid_c/lexer.c +39 -3
  22. data/ext/liquid_c/lexer.h +18 -3
  23. data/ext/liquid_c/liquid.c +76 -6
  24. data/ext/liquid_c/liquid.h +24 -1
  25. data/ext/liquid_c/liquid_vm.c +618 -0
  26. data/ext/liquid_c/liquid_vm.h +25 -0
  27. data/ext/liquid_c/parse_context.c +76 -0
  28. data/ext/liquid_c/parse_context.h +13 -0
  29. data/ext/liquid_c/parser.c +153 -65
  30. data/ext/liquid_c/parser.h +4 -2
  31. data/ext/liquid_c/raw.c +136 -0
  32. data/ext/liquid_c/raw.h +6 -0
  33. data/ext/liquid_c/resource_limits.c +279 -0
  34. data/ext/liquid_c/resource_limits.h +23 -0
  35. data/ext/liquid_c/stringutil.h +44 -0
  36. data/ext/liquid_c/tokenizer.c +149 -35
  37. data/ext/liquid_c/tokenizer.h +20 -9
  38. data/ext/liquid_c/usage.c +18 -0
  39. data/ext/liquid_c/usage.h +9 -0
  40. data/ext/liquid_c/variable.c +196 -20
  41. data/ext/liquid_c/variable.h +18 -1
  42. data/ext/liquid_c/variable_lookup.c +44 -0
  43. data/ext/liquid_c/variable_lookup.h +8 -0
  44. data/ext/liquid_c/vm_assembler.c +491 -0
  45. data/ext/liquid_c/vm_assembler.h +240 -0
  46. data/ext/liquid_c/vm_assembler_pool.c +99 -0
  47. data/ext/liquid_c/vm_assembler_pool.h +26 -0
  48. data/lib/liquid/c/compile_ext.rb +44 -0
  49. data/lib/liquid/c/version.rb +3 -1
  50. data/lib/liquid/c.rb +226 -48
  51. data/liquid-c.gemspec +16 -10
  52. data/performance/c_profile.rb +23 -0
  53. data/performance.rb +6 -4
  54. data/rakelib/compile.rake +15 -0
  55. data/rakelib/integration_test.rake +43 -0
  56. data/rakelib/performance.rake +43 -0
  57. data/rakelib/rubocop.rake +6 -0
  58. data/rakelib/unit_test.rake +14 -0
  59. data/test/integration_test.rb +11 -0
  60. data/test/liquid_test_helper.rb +21 -0
  61. data/test/test_helper.rb +21 -2
  62. data/test/unit/block_test.rb +137 -0
  63. data/test/unit/context_test.rb +85 -0
  64. data/test/unit/expression_test.rb +191 -0
  65. data/test/unit/gc_stress_test.rb +28 -0
  66. data/test/unit/raw_test.rb +93 -0
  67. data/test/unit/resource_limits_test.rb +50 -0
  68. data/test/unit/tokenizer_test.rb +90 -20
  69. data/test/unit/variable_test.rb +279 -60
  70. metadata +60 -11
  71. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,13 @@
1
+ #ifndef LIQUID_PARSE_CONTEXT_H
2
+ #define LIQUID_PARSE_CONTEXT_H
3
+
4
+ #include <ruby.h>
5
+ #include <stdbool.h>
6
+ #include "vm_assembler_pool.h"
7
+
8
+ void liquid_define_parse_context(void);
9
+ VALUE parse_context_get_document_body(VALUE self);
10
+
11
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self);
12
+
13
+ #endif
@@ -2,8 +2,8 @@
2
2
  #include "parser.h"
3
3
  #include "lexer.h"
4
4
 
5
- static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals;
6
- static ID idToI, idEvaluate;
5
+ static VALUE empty_string;
6
+ static ID id_to_i, idEvaluate;
7
7
 
8
8
  void init_parser(parser_t *p, const char *str, const char *end)
9
9
  {
@@ -67,79 +67,145 @@ static VALUE parse_number(parser_t *p)
67
67
  return out;
68
68
  }
69
69
 
70
- static VALUE parse_range(parser_t *p)
70
+ __attribute__((noreturn)) static void raise_invalid_expression_type(const char *expr, int expr_len)
71
71
  {
72
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Invalid expression type '%.*s' in range expression", expr_len, expr);
73
+ }
74
+
75
+ static VALUE try_parse_constant_range(parser_t *p)
76
+ {
77
+ parser_t saved_state = *p;
78
+
72
79
  parser_must_consume(p, TOKEN_OPEN_ROUND);
73
80
 
74
- VALUE args[2];
75
- args[0] = parse_expression(p);
81
+ const char *begin_str = p->cur.val;
82
+ VALUE begin = try_parse_constant_expression(p);
83
+ const char *begin_str_end = p->cur.val;
84
+ if (begin == Qundef) {
85
+ *p = saved_state;
86
+ return Qundef;
87
+ }
76
88
  parser_must_consume(p, TOKEN_DOTDOT);
77
89
 
78
- args[1] = parse_expression(p);
90
+ const char *end_str = p->cur.val;
91
+ VALUE end = try_parse_constant_expression(p);
92
+ const char *end_str_end = p->cur.val;
93
+ if (end == Qundef) {
94
+ *p = saved_state;
95
+ return Qundef;
96
+ }
79
97
  parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
98
 
81
- if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
- return rb_class_new_instance(2, args, cLiquidRangeLookup);
99
+ begin = rb_check_funcall(begin, id_to_i, 0, NULL);
100
+ if (begin == Qundef) raise_invalid_expression_type(begin_str, (int)(begin_str_end - begin_str));
101
+
102
+ end = rb_check_funcall(end, id_to_i, 0, NULL);
103
+ if (end == Qundef) raise_invalid_expression_type(end_str, (int)(end_str_end - end_str));
83
104
 
84
- return rb_class_new_instance(2, args, cRange);
105
+ bool exclude_end = false;
106
+ return rb_range_new(begin, end, exclude_end);
85
107
  }
86
108
 
87
- static VALUE parse_variable(parser_t *p)
109
+ static void parse_and_compile_range(parser_t *p, vm_assembler_t *code)
88
110
  {
89
- VALUE name, lookups = rb_ary_new(), lookup;
90
- unsigned long long command_flags = 0;
111
+ VALUE const_range = try_parse_constant_range(p);
112
+ if (const_range != Qundef) {
113
+ vm_assembler_add_push_const(code, const_range);
114
+ return;
115
+ }
91
116
 
117
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
118
+ parse_and_compile_expression(p, code);
119
+ parser_must_consume(p, TOKEN_DOTDOT);
120
+ parse_and_compile_expression(p, code);
121
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
122
+ vm_assembler_add_new_int_range(code);
123
+ }
124
+
125
+ static void parse_and_compile_variable_lookup(parser_t *p, vm_assembler_t *code)
126
+ {
92
127
  if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
- name = parse_expression(p);
128
+ parse_and_compile_expression(p, code);
94
129
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
130
+ vm_assembler_add_find_variable(code);
95
131
  } else {
96
- name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
132
+ VALUE name = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
133
+ vm_assembler_add_find_static_variable(code, name);
97
134
  }
98
135
 
99
136
  while (true) {
100
137
  if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
138
  parser_consume_any(p);
102
- lookup = parse_expression(p);
139
+ parse_and_compile_expression(p, code);
103
140
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
-
105
- rb_ary_push(lookups, lookup);
141
+ vm_assembler_add_lookup_key(code);
106
142
  } else if (p->cur.type == TOKEN_DOT) {
107
- int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
- lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
-
110
- if (has_space_affix)
111
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
-
113
- if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
- command_flags |= 1 << RARRAY_LEN(lookups);
143
+ parser_consume_any(p);
144
+ VALUE key = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
115
145
 
116
- rb_ary_push(lookups, lookup);
146
+ if (rstring_eq(key, "size") || rstring_eq(key, "first") || rstring_eq(key, "last"))
147
+ vm_assembler_add_lookup_command(code, key);
148
+ else
149
+ vm_assembler_add_lookup_const_key(code, key);
117
150
  } else {
118
151
  break;
119
152
  }
120
153
  }
154
+ }
121
155
 
122
- if (RARRAY_LEN(lookups) == 0) {
123
- VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef);
124
- if (literal != Qundef) return literal;
156
+ static VALUE try_parse_literal(parser_t *p)
157
+ {
158
+ if (p->next.type == TOKEN_DOT || p->next.type == TOKEN_OPEN_SQUARE)
159
+ return Qundef;
160
+
161
+ const char *str = p->cur.val;
162
+ long size = p->cur.val_end - str;
163
+ VALUE result = Qundef;
164
+ switch (size) {
165
+ case 3:
166
+ if (memcmp(str, "nil", size) == 0)
167
+ result = Qnil;
168
+ break;
169
+ case 4:
170
+ if (memcmp(str, "null", size) == 0) {
171
+ result = Qnil;
172
+ } else if (memcmp(str, "true", size) == 0) {
173
+ result = Qtrue;
174
+ }
175
+ break;
176
+ case 5:
177
+ switch (*str) {
178
+ case 'f':
179
+ if (memcmp(str, "false", size) == 0)
180
+ result = Qfalse;
181
+ break;
182
+ case 'b':
183
+ if (memcmp(str, "blank", size) == 0)
184
+ result = empty_string;
185
+ break;
186
+ case 'e':
187
+ if (memcmp(str, "empty", size) == 0)
188
+ result = empty_string;
189
+ break;
190
+ }
191
+ break;
125
192
  }
126
-
127
- VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
128
- return rb_class_new_instance(4, args, cLiquidVariableLookup);
193
+ if (result != Qundef)
194
+ parser_consume_any(p);
195
+ return result;
129
196
  }
130
197
 
131
- VALUE parse_expression(parser_t *p)
198
+ VALUE try_parse_constant_expression(parser_t *p)
132
199
  {
133
200
  switch (p->cur.type) {
134
201
  case TOKEN_IDENTIFIER:
135
- case TOKEN_OPEN_SQUARE:
136
- return parse_variable(p);
202
+ return try_parse_literal(p);
137
203
 
138
204
  case TOKEN_NUMBER:
139
205
  return parse_number(p);
140
206
 
141
207
  case TOKEN_OPEN_ROUND:
142
- return parse_range(p);
208
+ return try_parse_constant_range(p);
143
209
 
144
210
  case TOKEN_STRING:
145
211
  {
@@ -149,47 +215,69 @@ VALUE parse_expression(parser_t *p)
149
215
  return token_to_rstr(token);
150
216
  }
151
217
  }
152
-
153
- if (p->cur.type == TOKEN_EOS) {
154
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
155
- } else {
156
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
157
- symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
158
- }
159
- return Qnil;
218
+ return Qundef;
160
219
  }
161
220
 
162
- static VALUE rb_parse_expression(VALUE self, VALUE markup)
221
+ static void parse_and_compile_number(parser_t *p, vm_assembler_t *code)
163
222
  {
164
- StringValue(markup);
165
- char *start = RSTRING_PTR(markup);
223
+ VALUE num = parse_number(p);
224
+ if (RB_FIXNUM_P(num))
225
+ vm_assembler_add_push_fixnum(code, num);
226
+ else
227
+ vm_assembler_add_push_const(code, num);
228
+ return;
229
+ }
166
230
 
167
- parser_t p;
168
- init_parser(&p, start, start + RSTRING_LEN(markup));
231
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code)
232
+ {
233
+ switch (p->cur.type) {
234
+ case TOKEN_IDENTIFIER:
235
+ {
236
+ VALUE literal = try_parse_literal(p);
237
+ if (literal != Qundef) {
238
+ vm_assembler_add_push_literal(code, literal);
239
+ return;
240
+ }
241
+
242
+ __attribute__ ((fallthrough));
243
+ }
244
+ case TOKEN_OPEN_SQUARE:
245
+ parse_and_compile_variable_lookup(p, code);
246
+ return;
169
247
 
170
- if (p.cur.type == TOKEN_EOS)
171
- return Qnil;
248
+ case TOKEN_NUMBER:
249
+ parse_and_compile_number(p, code);
250
+ return;
172
251
 
173
- VALUE expr = parse_expression(&p);
252
+ case TOKEN_OPEN_ROUND:
253
+ parse_and_compile_range(p, code);
254
+ return;
174
255
 
175
- if (p.cur.type != TOKEN_EOS)
176
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
256
+ case TOKEN_STRING:
257
+ {
258
+ lexer_token_t token = parser_consume_any(p);
259
+ token.val++;
260
+ token.val_end--;
261
+ VALUE str = token_to_rstr(token);
262
+ vm_assembler_add_push_const(code, str);
263
+ return;
264
+ }
265
+ }
177
266
 
178
- return expr;
267
+ if (p->cur.type == TOKEN_EOS) {
268
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
269
+ } else {
270
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
271
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
272
+ }
179
273
  }
180
274
 
181
- void init_liquid_parser(void)
275
+ void liquid_define_parser(void)
182
276
  {
183
- idToI = rb_intern("to_i");
277
+ id_to_i = rb_intern("to_i");
184
278
  idEvaluate = rb_intern("evaluate");
185
279
 
186
- cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
187
- cRange = rb_const_get(rb_cObject, rb_intern("Range"));
188
- cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
189
-
190
- VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
191
- rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
192
-
193
- vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS"));
280
+ empty_string = rb_utf8_str_new_literal("");
281
+ rb_global_variable(&empty_string);
194
282
  }
195
283
 
@@ -2,6 +2,7 @@
2
2
  #define LIQUID_PARSER_H
3
3
 
4
4
  #include "lexer.h"
5
+ #include "vm_assembler.h"
5
6
 
6
7
  typedef struct parser {
7
8
  lexer_token_t cur, next;
@@ -14,9 +15,10 @@ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
15
  lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
16
  lexer_token_t parser_consume_any(parser_t *parser);
16
17
 
17
- VALUE parse_expression(parser_t *parser);
18
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code);
19
+ VALUE try_parse_constant_expression(parser_t *p);
18
20
 
19
- void init_liquid_parser(void);
21
+ void liquid_define_parser(void);
20
22
 
21
23
  #endif
22
24
 
@@ -0,0 +1,136 @@
1
+ #include "liquid.h"
2
+ #include "raw.h"
3
+ #include "stringutil.h"
4
+ #include "tokenizer.h"
5
+
6
+ static VALUE id_block_name, id_raise_tag_never_closed, id_block_delimiter, id_ivar_body;
7
+ static VALUE cLiquidRaw;
8
+
9
+ struct full_token_possibly_invalid_t {
10
+ long body_len;
11
+ const char *delimiter_start;
12
+ long delimiter_len;
13
+ };
14
+
15
+ static bool match_full_token_possibly_invalid(token_t *token, struct full_token_possibly_invalid_t *match)
16
+ {
17
+ const char *str = token->str_full;
18
+ long len = token->len_full;
19
+
20
+ match->body_len = 0;
21
+ match->delimiter_start = NULL;
22
+ match->delimiter_len = 0;
23
+
24
+ if (len < 5) return false; // Must be at least 5 characters: \{%\w%\}
25
+ if (str[len - 1] != '}' || str[len - 2] != '%') return false;
26
+
27
+ const char *curr_delimiter_start;
28
+ long curr_delimiter_len = 0;
29
+
30
+ bool is_last_char_whitespace = true;
31
+
32
+ // Search from the end of the string.
33
+ // The token could have a part of the body like this:
34
+ // {% endraw {% endraw %}
35
+ // In this case, we need to return body_len to 10 to preserve the body content.
36
+ for (long i = len - 3; i > 1; i--) {
37
+ char c = str[i];
38
+
39
+ // match \s
40
+ bool is_whitespace = rb_isspace(c);
41
+
42
+ if (is_word_char(c)) {
43
+ curr_delimiter_start = str + i;
44
+
45
+ if (is_last_char_whitespace) {
46
+ // start a new delimiter match
47
+ curr_delimiter_len = 1;
48
+ } else {
49
+ curr_delimiter_len++;
50
+ }
51
+ } else if (!is_word_char(c) && !is_whitespace) {
52
+ curr_delimiter_start = NULL;
53
+ curr_delimiter_len = 0;
54
+ }
55
+
56
+ is_last_char_whitespace = is_whitespace;
57
+
58
+ if (curr_delimiter_len > 0) {
59
+ // match start of a tag which is {% or {%-
60
+ if (
61
+ (str[i - 1] == '%' && str[i - 2] == '{') ||
62
+ (i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
63
+ ) {
64
+ match->delimiter_start = curr_delimiter_start;
65
+ match->delimiter_len = curr_delimiter_len;
66
+
67
+ if (str[i - 1] == '-') {
68
+ match->body_len = i - 3;
69
+ } else {
70
+ match->body_len = i - 2;
71
+ }
72
+
73
+ return true;
74
+ }
75
+ }
76
+ }
77
+
78
+ return false;
79
+ }
80
+
81
+ static VALUE raw_parse_method(VALUE self, VALUE tokens)
82
+ {
83
+ tokenizer_t *tokenizer;
84
+ Tokenizer_Get_Struct(tokens, tokenizer);
85
+
86
+ token_t token;
87
+ struct full_token_possibly_invalid_t match;
88
+
89
+ VALUE block_delimiter = rb_funcall(self, id_block_delimiter, 0);
90
+ Check_Type(block_delimiter, T_STRING);
91
+ char *block_delimiter_str = RSTRING_PTR(block_delimiter);
92
+ long block_delimiter_len = RSTRING_LEN(block_delimiter);
93
+
94
+ const char *body = NULL;
95
+ long body_len = 0;
96
+
97
+ while (true) {
98
+ tokenizer_next(tokenizer, &token);
99
+
100
+ if (!token.type) break;
101
+
102
+ if (body == NULL) {
103
+ body = token.str_full;
104
+ }
105
+
106
+ if (match_full_token_possibly_invalid(&token, &match)
107
+ && match.delimiter_len == block_delimiter_len
108
+ && memcmp(match.delimiter_start, block_delimiter_str, block_delimiter_len) == 0) {
109
+ body_len += match.body_len;
110
+ VALUE body_str = rb_enc_str_new(body, body_len, utf8_encoding);
111
+ rb_ivar_set(self, id_ivar_body, body_str);
112
+ if (RBASIC_CLASS(self) == cLiquidRaw) {
113
+ tokenizer->raw_tag_body = RSTRING_PTR(body_str);
114
+ tokenizer->raw_tag_body_len = (unsigned int)body_len;
115
+ }
116
+ return Qnil;
117
+ }
118
+
119
+ body_len += token.len_full;
120
+ }
121
+
122
+ rb_funcall(self, id_raise_tag_never_closed, 1, rb_funcall(self, id_block_name, 0));
123
+ return Qnil;
124
+ }
125
+
126
+ void liquid_define_raw(void)
127
+ {
128
+ id_block_name = rb_intern("block_name");
129
+ id_raise_tag_never_closed = rb_intern("raise_tag_never_closed");
130
+ id_block_delimiter = rb_intern("block_delimiter");
131
+ id_ivar_body = rb_intern("@body");
132
+
133
+ cLiquidRaw = rb_const_get(mLiquid, rb_intern("Raw"));
134
+
135
+ rb_define_method(cLiquidRaw, "c_parse", raw_parse_method, 1);
136
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef LIQUID_RAW_H
2
+ #define LIQUID_RAW_H
3
+
4
+ void liquid_define_raw(void);
5
+
6
+ #endif