liquid-c 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/cla.yml +23 -0
  3. data/.github/workflows/liquid.yml +36 -11
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +14 -0
  6. data/Gemfile +15 -5
  7. data/README.md +32 -8
  8. data/Rakefile +12 -63
  9. data/ext/liquid_c/block.c +493 -60
  10. data/ext/liquid_c/block.h +28 -2
  11. data/ext/liquid_c/c_buffer.c +42 -0
  12. data/ext/liquid_c/c_buffer.h +76 -0
  13. data/ext/liquid_c/context.c +233 -0
  14. data/ext/liquid_c/context.h +70 -0
  15. data/ext/liquid_c/document_body.c +97 -0
  16. data/ext/liquid_c/document_body.h +59 -0
  17. data/ext/liquid_c/expression.c +116 -0
  18. data/ext/liquid_c/expression.h +24 -0
  19. data/ext/liquid_c/extconf.rb +21 -9
  20. data/ext/liquid_c/intutil.h +22 -0
  21. data/ext/liquid_c/lexer.c +39 -3
  22. data/ext/liquid_c/lexer.h +18 -3
  23. data/ext/liquid_c/liquid.c +76 -6
  24. data/ext/liquid_c/liquid.h +24 -1
  25. data/ext/liquid_c/liquid_vm.c +618 -0
  26. data/ext/liquid_c/liquid_vm.h +25 -0
  27. data/ext/liquid_c/parse_context.c +76 -0
  28. data/ext/liquid_c/parse_context.h +13 -0
  29. data/ext/liquid_c/parser.c +153 -65
  30. data/ext/liquid_c/parser.h +4 -2
  31. data/ext/liquid_c/raw.c +136 -0
  32. data/ext/liquid_c/raw.h +6 -0
  33. data/ext/liquid_c/resource_limits.c +279 -0
  34. data/ext/liquid_c/resource_limits.h +23 -0
  35. data/ext/liquid_c/stringutil.h +44 -0
  36. data/ext/liquid_c/tokenizer.c +149 -35
  37. data/ext/liquid_c/tokenizer.h +20 -9
  38. data/ext/liquid_c/usage.c +18 -0
  39. data/ext/liquid_c/usage.h +9 -0
  40. data/ext/liquid_c/variable.c +196 -20
  41. data/ext/liquid_c/variable.h +18 -1
  42. data/ext/liquid_c/variable_lookup.c +44 -0
  43. data/ext/liquid_c/variable_lookup.h +8 -0
  44. data/ext/liquid_c/vm_assembler.c +491 -0
  45. data/ext/liquid_c/vm_assembler.h +240 -0
  46. data/ext/liquid_c/vm_assembler_pool.c +99 -0
  47. data/ext/liquid_c/vm_assembler_pool.h +26 -0
  48. data/lib/liquid/c/compile_ext.rb +44 -0
  49. data/lib/liquid/c/version.rb +3 -1
  50. data/lib/liquid/c.rb +226 -48
  51. data/liquid-c.gemspec +16 -10
  52. data/performance/c_profile.rb +23 -0
  53. data/performance.rb +6 -4
  54. data/rakelib/compile.rake +15 -0
  55. data/rakelib/integration_test.rake +43 -0
  56. data/rakelib/performance.rake +43 -0
  57. data/rakelib/rubocop.rake +6 -0
  58. data/rakelib/unit_test.rake +14 -0
  59. data/test/integration_test.rb +11 -0
  60. data/test/liquid_test_helper.rb +21 -0
  61. data/test/test_helper.rb +21 -2
  62. data/test/unit/block_test.rb +137 -0
  63. data/test/unit/context_test.rb +85 -0
  64. data/test/unit/expression_test.rb +191 -0
  65. data/test/unit/gc_stress_test.rb +28 -0
  66. data/test/unit/raw_test.rb +93 -0
  67. data/test/unit/resource_limits_test.rb +50 -0
  68. data/test/unit/tokenizer_test.rb +90 -20
  69. data/test/unit/variable_test.rb +279 -60
  70. metadata +60 -11
  71. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,13 @@
1
+ #ifndef LIQUID_PARSE_CONTEXT_H
2
+ #define LIQUID_PARSE_CONTEXT_H
3
+
4
+ #include <ruby.h>
5
+ #include <stdbool.h>
6
+ #include "vm_assembler_pool.h"
7
+
8
+ void liquid_define_parse_context(void);
9
+ VALUE parse_context_get_document_body(VALUE self);
10
+
11
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self);
12
+
13
+ #endif
@@ -2,8 +2,8 @@
2
2
  #include "parser.h"
3
3
  #include "lexer.h"
4
4
 
5
- static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals;
6
- static ID idToI, idEvaluate;
5
+ static VALUE empty_string;
6
+ static ID id_to_i, idEvaluate;
7
7
 
8
8
  void init_parser(parser_t *p, const char *str, const char *end)
9
9
  {
@@ -67,79 +67,145 @@ static VALUE parse_number(parser_t *p)
67
67
  return out;
68
68
  }
69
69
 
70
- static VALUE parse_range(parser_t *p)
70
+ __attribute__((noreturn)) static void raise_invalid_expression_type(const char *expr, int expr_len)
71
71
  {
72
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Invalid expression type '%.*s' in range expression", expr_len, expr);
73
+ }
74
+
75
+ static VALUE try_parse_constant_range(parser_t *p)
76
+ {
77
+ parser_t saved_state = *p;
78
+
72
79
  parser_must_consume(p, TOKEN_OPEN_ROUND);
73
80
 
74
- VALUE args[2];
75
- args[0] = parse_expression(p);
81
+ const char *begin_str = p->cur.val;
82
+ VALUE begin = try_parse_constant_expression(p);
83
+ const char *begin_str_end = p->cur.val;
84
+ if (begin == Qundef) {
85
+ *p = saved_state;
86
+ return Qundef;
87
+ }
76
88
  parser_must_consume(p, TOKEN_DOTDOT);
77
89
 
78
- args[1] = parse_expression(p);
90
+ const char *end_str = p->cur.val;
91
+ VALUE end = try_parse_constant_expression(p);
92
+ const char *end_str_end = p->cur.val;
93
+ if (end == Qundef) {
94
+ *p = saved_state;
95
+ return Qundef;
96
+ }
79
97
  parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
98
 
81
- if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
- return rb_class_new_instance(2, args, cLiquidRangeLookup);
99
+ begin = rb_check_funcall(begin, id_to_i, 0, NULL);
100
+ if (begin == Qundef) raise_invalid_expression_type(begin_str, (int)(begin_str_end - begin_str));
101
+
102
+ end = rb_check_funcall(end, id_to_i, 0, NULL);
103
+ if (end == Qundef) raise_invalid_expression_type(end_str, (int)(end_str_end - end_str));
83
104
 
84
- return rb_class_new_instance(2, args, cRange);
105
+ bool exclude_end = false;
106
+ return rb_range_new(begin, end, exclude_end);
85
107
  }
86
108
 
87
- static VALUE parse_variable(parser_t *p)
109
+ static void parse_and_compile_range(parser_t *p, vm_assembler_t *code)
88
110
  {
89
- VALUE name, lookups = rb_ary_new(), lookup;
90
- unsigned long long command_flags = 0;
111
+ VALUE const_range = try_parse_constant_range(p);
112
+ if (const_range != Qundef) {
113
+ vm_assembler_add_push_const(code, const_range);
114
+ return;
115
+ }
91
116
 
117
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
118
+ parse_and_compile_expression(p, code);
119
+ parser_must_consume(p, TOKEN_DOTDOT);
120
+ parse_and_compile_expression(p, code);
121
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
122
+ vm_assembler_add_new_int_range(code);
123
+ }
124
+
125
+ static void parse_and_compile_variable_lookup(parser_t *p, vm_assembler_t *code)
126
+ {
92
127
  if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
- name = parse_expression(p);
128
+ parse_and_compile_expression(p, code);
94
129
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
130
+ vm_assembler_add_find_variable(code);
95
131
  } else {
96
- name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
132
+ VALUE name = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
133
+ vm_assembler_add_find_static_variable(code, name);
97
134
  }
98
135
 
99
136
  while (true) {
100
137
  if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
138
  parser_consume_any(p);
102
- lookup = parse_expression(p);
139
+ parse_and_compile_expression(p, code);
103
140
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
-
105
- rb_ary_push(lookups, lookup);
141
+ vm_assembler_add_lookup_key(code);
106
142
  } else if (p->cur.type == TOKEN_DOT) {
107
- int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
- lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
-
110
- if (has_space_affix)
111
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
-
113
- if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
- command_flags |= 1 << RARRAY_LEN(lookups);
143
+ parser_consume_any(p);
144
+ VALUE key = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
115
145
 
116
- rb_ary_push(lookups, lookup);
146
+ if (rstring_eq(key, "size") || rstring_eq(key, "first") || rstring_eq(key, "last"))
147
+ vm_assembler_add_lookup_command(code, key);
148
+ else
149
+ vm_assembler_add_lookup_const_key(code, key);
117
150
  } else {
118
151
  break;
119
152
  }
120
153
  }
154
+ }
121
155
 
122
- if (RARRAY_LEN(lookups) == 0) {
123
- VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef);
124
- if (literal != Qundef) return literal;
156
+ static VALUE try_parse_literal(parser_t *p)
157
+ {
158
+ if (p->next.type == TOKEN_DOT || p->next.type == TOKEN_OPEN_SQUARE)
159
+ return Qundef;
160
+
161
+ const char *str = p->cur.val;
162
+ long size = p->cur.val_end - str;
163
+ VALUE result = Qundef;
164
+ switch (size) {
165
+ case 3:
166
+ if (memcmp(str, "nil", size) == 0)
167
+ result = Qnil;
168
+ break;
169
+ case 4:
170
+ if (memcmp(str, "null", size) == 0) {
171
+ result = Qnil;
172
+ } else if (memcmp(str, "true", size) == 0) {
173
+ result = Qtrue;
174
+ }
175
+ break;
176
+ case 5:
177
+ switch (*str) {
178
+ case 'f':
179
+ if (memcmp(str, "false", size) == 0)
180
+ result = Qfalse;
181
+ break;
182
+ case 'b':
183
+ if (memcmp(str, "blank", size) == 0)
184
+ result = empty_string;
185
+ break;
186
+ case 'e':
187
+ if (memcmp(str, "empty", size) == 0)
188
+ result = empty_string;
189
+ break;
190
+ }
191
+ break;
125
192
  }
126
-
127
- VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
128
- return rb_class_new_instance(4, args, cLiquidVariableLookup);
193
+ if (result != Qundef)
194
+ parser_consume_any(p);
195
+ return result;
129
196
  }
130
197
 
131
- VALUE parse_expression(parser_t *p)
198
+ VALUE try_parse_constant_expression(parser_t *p)
132
199
  {
133
200
  switch (p->cur.type) {
134
201
  case TOKEN_IDENTIFIER:
135
- case TOKEN_OPEN_SQUARE:
136
- return parse_variable(p);
202
+ return try_parse_literal(p);
137
203
 
138
204
  case TOKEN_NUMBER:
139
205
  return parse_number(p);
140
206
 
141
207
  case TOKEN_OPEN_ROUND:
142
- return parse_range(p);
208
+ return try_parse_constant_range(p);
143
209
 
144
210
  case TOKEN_STRING:
145
211
  {
@@ -149,47 +215,69 @@ VALUE parse_expression(parser_t *p)
149
215
  return token_to_rstr(token);
150
216
  }
151
217
  }
152
-
153
- if (p->cur.type == TOKEN_EOS) {
154
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
155
- } else {
156
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
157
- symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
158
- }
159
- return Qnil;
218
+ return Qundef;
160
219
  }
161
220
 
162
- static VALUE rb_parse_expression(VALUE self, VALUE markup)
221
+ static void parse_and_compile_number(parser_t *p, vm_assembler_t *code)
163
222
  {
164
- StringValue(markup);
165
- char *start = RSTRING_PTR(markup);
223
+ VALUE num = parse_number(p);
224
+ if (RB_FIXNUM_P(num))
225
+ vm_assembler_add_push_fixnum(code, num);
226
+ else
227
+ vm_assembler_add_push_const(code, num);
228
+ return;
229
+ }
166
230
 
167
- parser_t p;
168
- init_parser(&p, start, start + RSTRING_LEN(markup));
231
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code)
232
+ {
233
+ switch (p->cur.type) {
234
+ case TOKEN_IDENTIFIER:
235
+ {
236
+ VALUE literal = try_parse_literal(p);
237
+ if (literal != Qundef) {
238
+ vm_assembler_add_push_literal(code, literal);
239
+ return;
240
+ }
241
+
242
+ __attribute__ ((fallthrough));
243
+ }
244
+ case TOKEN_OPEN_SQUARE:
245
+ parse_and_compile_variable_lookup(p, code);
246
+ return;
169
247
 
170
- if (p.cur.type == TOKEN_EOS)
171
- return Qnil;
248
+ case TOKEN_NUMBER:
249
+ parse_and_compile_number(p, code);
250
+ return;
172
251
 
173
- VALUE expr = parse_expression(&p);
252
+ case TOKEN_OPEN_ROUND:
253
+ parse_and_compile_range(p, code);
254
+ return;
174
255
 
175
- if (p.cur.type != TOKEN_EOS)
176
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
256
+ case TOKEN_STRING:
257
+ {
258
+ lexer_token_t token = parser_consume_any(p);
259
+ token.val++;
260
+ token.val_end--;
261
+ VALUE str = token_to_rstr(token);
262
+ vm_assembler_add_push_const(code, str);
263
+ return;
264
+ }
265
+ }
177
266
 
178
- return expr;
267
+ if (p->cur.type == TOKEN_EOS) {
268
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
269
+ } else {
270
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
271
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
272
+ }
179
273
  }
180
274
 
181
- void init_liquid_parser(void)
275
+ void liquid_define_parser(void)
182
276
  {
183
- idToI = rb_intern("to_i");
277
+ id_to_i = rb_intern("to_i");
184
278
  idEvaluate = rb_intern("evaluate");
185
279
 
186
- cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
187
- cRange = rb_const_get(rb_cObject, rb_intern("Range"));
188
- cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
189
-
190
- VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
191
- rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
192
-
193
- vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS"));
280
+ empty_string = rb_utf8_str_new_literal("");
281
+ rb_global_variable(&empty_string);
194
282
  }
195
283
 
@@ -2,6 +2,7 @@
2
2
  #define LIQUID_PARSER_H
3
3
 
4
4
  #include "lexer.h"
5
+ #include "vm_assembler.h"
5
6
 
6
7
  typedef struct parser {
7
8
  lexer_token_t cur, next;
@@ -14,9 +15,10 @@ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
15
  lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
16
  lexer_token_t parser_consume_any(parser_t *parser);
16
17
 
17
- VALUE parse_expression(parser_t *parser);
18
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code);
19
+ VALUE try_parse_constant_expression(parser_t *p);
18
20
 
19
- void init_liquid_parser(void);
21
+ void liquid_define_parser(void);
20
22
 
21
23
  #endif
22
24
 
@@ -0,0 +1,136 @@
1
+ #include "liquid.h"
2
+ #include "raw.h"
3
+ #include "stringutil.h"
4
+ #include "tokenizer.h"
5
+
6
+ static VALUE id_block_name, id_raise_tag_never_closed, id_block_delimiter, id_ivar_body;
7
+ static VALUE cLiquidRaw;
8
+
9
+ struct full_token_possibly_invalid_t {
10
+ long body_len;
11
+ const char *delimiter_start;
12
+ long delimiter_len;
13
+ };
14
+
15
+ static bool match_full_token_possibly_invalid(token_t *token, struct full_token_possibly_invalid_t *match)
16
+ {
17
+ const char *str = token->str_full;
18
+ long len = token->len_full;
19
+
20
+ match->body_len = 0;
21
+ match->delimiter_start = NULL;
22
+ match->delimiter_len = 0;
23
+
24
+ if (len < 5) return false; // Must be at least 5 characters: \{%\w%\}
25
+ if (str[len - 1] != '}' || str[len - 2] != '%') return false;
26
+
27
+ const char *curr_delimiter_start;
28
+ long curr_delimiter_len = 0;
29
+
30
+ bool is_last_char_whitespace = true;
31
+
32
+ // Search from the end of the string.
33
+ // The token could have a part of the body like this:
34
+ // {% endraw {% endraw %}
35
+ // In this case, we need to return body_len to 10 to preserve the body content.
36
+ for (long i = len - 3; i > 1; i--) {
37
+ char c = str[i];
38
+
39
+ // match \s
40
+ bool is_whitespace = rb_isspace(c);
41
+
42
+ if (is_word_char(c)) {
43
+ curr_delimiter_start = str + i;
44
+
45
+ if (is_last_char_whitespace) {
46
+ // start a new delimiter match
47
+ curr_delimiter_len = 1;
48
+ } else {
49
+ curr_delimiter_len++;
50
+ }
51
+ } else if (!is_word_char(c) && !is_whitespace) {
52
+ curr_delimiter_start = NULL;
53
+ curr_delimiter_len = 0;
54
+ }
55
+
56
+ is_last_char_whitespace = is_whitespace;
57
+
58
+ if (curr_delimiter_len > 0) {
59
+ // match start of a tag which is {% or {%-
60
+ if (
61
+ (str[i - 1] == '%' && str[i - 2] == '{') ||
62
+ (i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
63
+ ) {
64
+ match->delimiter_start = curr_delimiter_start;
65
+ match->delimiter_len = curr_delimiter_len;
66
+
67
+ if (str[i - 1] == '-') {
68
+ match->body_len = i - 3;
69
+ } else {
70
+ match->body_len = i - 2;
71
+ }
72
+
73
+ return true;
74
+ }
75
+ }
76
+ }
77
+
78
+ return false;
79
+ }
80
+
81
+ static VALUE raw_parse_method(VALUE self, VALUE tokens)
82
+ {
83
+ tokenizer_t *tokenizer;
84
+ Tokenizer_Get_Struct(tokens, tokenizer);
85
+
86
+ token_t token;
87
+ struct full_token_possibly_invalid_t match;
88
+
89
+ VALUE block_delimiter = rb_funcall(self, id_block_delimiter, 0);
90
+ Check_Type(block_delimiter, T_STRING);
91
+ char *block_delimiter_str = RSTRING_PTR(block_delimiter);
92
+ long block_delimiter_len = RSTRING_LEN(block_delimiter);
93
+
94
+ const char *body = NULL;
95
+ long body_len = 0;
96
+
97
+ while (true) {
98
+ tokenizer_next(tokenizer, &token);
99
+
100
+ if (!token.type) break;
101
+
102
+ if (body == NULL) {
103
+ body = token.str_full;
104
+ }
105
+
106
+ if (match_full_token_possibly_invalid(&token, &match)
107
+ && match.delimiter_len == block_delimiter_len
108
+ && memcmp(match.delimiter_start, block_delimiter_str, block_delimiter_len) == 0) {
109
+ body_len += match.body_len;
110
+ VALUE body_str = rb_enc_str_new(body, body_len, utf8_encoding);
111
+ rb_ivar_set(self, id_ivar_body, body_str);
112
+ if (RBASIC_CLASS(self) == cLiquidRaw) {
113
+ tokenizer->raw_tag_body = RSTRING_PTR(body_str);
114
+ tokenizer->raw_tag_body_len = (unsigned int)body_len;
115
+ }
116
+ return Qnil;
117
+ }
118
+
119
+ body_len += token.len_full;
120
+ }
121
+
122
+ rb_funcall(self, id_raise_tag_never_closed, 1, rb_funcall(self, id_block_name, 0));
123
+ return Qnil;
124
+ }
125
+
126
+ void liquid_define_raw(void)
127
+ {
128
+ id_block_name = rb_intern("block_name");
129
+ id_raise_tag_never_closed = rb_intern("raise_tag_never_closed");
130
+ id_block_delimiter = rb_intern("block_delimiter");
131
+ id_ivar_body = rb_intern("@body");
132
+
133
+ cLiquidRaw = rb_const_get(mLiquid, rb_intern("Raw"));
134
+
135
+ rb_define_method(cLiquidRaw, "c_parse", raw_parse_method, 1);
136
+ }
@@ -0,0 +1,6 @@
1
+ #ifndef LIQUID_RAW_H
2
+ #define LIQUID_RAW_H
3
+
4
+ void liquid_define_raw(void);
5
+
6
+ #endif