liquid-c 4.0.1 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/liquid.yml +24 -2
  3. data/.gitignore +4 -0
  4. data/.rubocop.yml +14 -0
  5. data/Gemfile +14 -5
  6. data/README.md +29 -5
  7. data/Rakefile +13 -62
  8. data/ext/liquid_c/block.c +488 -60
  9. data/ext/liquid_c/block.h +28 -2
  10. data/ext/liquid_c/c_buffer.c +42 -0
  11. data/ext/liquid_c/c_buffer.h +76 -0
  12. data/ext/liquid_c/context.c +233 -0
  13. data/ext/liquid_c/context.h +70 -0
  14. data/ext/liquid_c/document_body.c +89 -0
  15. data/ext/liquid_c/document_body.h +59 -0
  16. data/ext/liquid_c/expression.c +116 -0
  17. data/ext/liquid_c/expression.h +24 -0
  18. data/ext/liquid_c/extconf.rb +19 -9
  19. data/ext/liquid_c/intutil.h +22 -0
  20. data/ext/liquid_c/lexer.c +6 -2
  21. data/ext/liquid_c/lexer.h +18 -3
  22. data/ext/liquid_c/liquid.c +76 -6
  23. data/ext/liquid_c/liquid.h +24 -1
  24. data/ext/liquid_c/parse_context.c +76 -0
  25. data/ext/liquid_c/parse_context.h +13 -0
  26. data/ext/liquid_c/parser.c +141 -65
  27. data/ext/liquid_c/parser.h +4 -2
  28. data/ext/liquid_c/raw.c +110 -0
  29. data/ext/liquid_c/raw.h +6 -0
  30. data/ext/liquid_c/resource_limits.c +279 -0
  31. data/ext/liquid_c/resource_limits.h +23 -0
  32. data/ext/liquid_c/stringutil.h +44 -0
  33. data/ext/liquid_c/tokenizer.c +149 -35
  34. data/ext/liquid_c/tokenizer.h +20 -9
  35. data/ext/liquid_c/usage.c +18 -0
  36. data/ext/liquid_c/usage.h +9 -0
  37. data/ext/liquid_c/variable.c +196 -20
  38. data/ext/liquid_c/variable.h +18 -1
  39. data/ext/liquid_c/variable_lookup.c +44 -0
  40. data/ext/liquid_c/variable_lookup.h +8 -0
  41. data/ext/liquid_c/vm.c +588 -0
  42. data/ext/liquid_c/vm.h +25 -0
  43. data/ext/liquid_c/vm_assembler.c +491 -0
  44. data/ext/liquid_c/vm_assembler.h +240 -0
  45. data/ext/liquid_c/vm_assembler_pool.c +97 -0
  46. data/ext/liquid_c/vm_assembler_pool.h +27 -0
  47. data/lib/liquid/c/compile_ext.rb +44 -0
  48. data/lib/liquid/c/version.rb +3 -1
  49. data/lib/liquid/c.rb +225 -46
  50. data/liquid-c.gemspec +16 -10
  51. data/performance/c_profile.rb +23 -0
  52. data/performance.rb +6 -4
  53. data/rakelib/compile.rake +15 -0
  54. data/rakelib/integration_test.rake +43 -0
  55. data/rakelib/performance.rake +43 -0
  56. data/rakelib/rubocop.rake +6 -0
  57. data/rakelib/unit_test.rake +14 -0
  58. data/test/integration_test.rb +11 -0
  59. data/test/liquid_test_helper.rb +21 -0
  60. data/test/test_helper.rb +14 -2
  61. data/test/unit/block_test.rb +130 -0
  62. data/test/unit/context_test.rb +83 -0
  63. data/test/unit/expression_test.rb +186 -0
  64. data/test/unit/gc_stress_test.rb +28 -0
  65. data/test/unit/raw_test.rb +19 -0
  66. data/test/unit/resource_limits_test.rb +50 -0
  67. data/test/unit/tokenizer_test.rb +90 -20
  68. data/test/unit/variable_test.rb +212 -60
  69. metadata +59 -11
  70. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,491 @@
1
+ #include "liquid.h"
2
+ #include "vm_assembler.h"
3
+ #include "expression.h"
4
+ #include "vm.h"
5
+
6
+ #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
7
+
8
+ static st_table *builtin_filter_table;
9
+
10
+ // methods from Liquid::StandardFilters
11
+ filter_desc_t builtin_filters[] = {
12
+ { .name = "size" },
13
+ { .name = "downcase" },
14
+ { .name = "upcase" },
15
+ { .name = "capitalize" },
16
+ { .name = "h" },
17
+ { .name = "escape" },
18
+ { .name = "escape_once" },
19
+ { .name = "url_encode" },
20
+ { .name = "url_decode" },
21
+ { .name = "slice" },
22
+ { .name = "truncate" },
23
+ { .name = "truncatewords" },
24
+ { .name = "split" },
25
+ { .name = "strip" },
26
+ { .name = "lstrip" },
27
+ { .name = "rstrip" },
28
+ { .name = "strip_html" },
29
+ { .name = "strip_newlines" },
30
+ { .name = "join" },
31
+ { .name = "sort" },
32
+ { .name = "sort_natural" },
33
+ { .name = "where" },
34
+ { .name = "uniq" },
35
+ { .name = "reverse" },
36
+ { .name = "map" },
37
+ { .name = "compact" },
38
+ { .name = "replace" },
39
+ { .name = "replace_first" },
40
+ { .name = "remove" },
41
+ { .name = "remove_first" },
42
+ { .name = "append" },
43
+ { .name = "concat" },
44
+ { .name = "prepend" },
45
+ { .name = "newline_to_br" },
46
+ { .name = "date" },
47
+ { .name = "first" },
48
+ { .name = "last" },
49
+ { .name = "abs" },
50
+ { .name = "plus" },
51
+ { .name = "minus" },
52
+ { .name = "times" },
53
+ { .name = "divided_by" },
54
+ { .name = "modulo" },
55
+ { .name = "round" },
56
+ { .name = "ceil" },
57
+ { .name = "floor" },
58
+ { .name = "at_least" },
59
+ { .name = "at_most" },
60
+ { .name = "default" },
61
+ };
62
+ static_assert(ARRAY_LENGTH(builtin_filters) < 256,
63
+ "support for larger than byte sized indexing of filters has not yet been implemented");
64
+
65
+ static void vm_assembler_common_init(vm_assembler_t *code)
66
+ {
67
+ code->max_stack_size = 0;
68
+ code->stack_size = 0;
69
+ code->protected_stack_size = 0;
70
+ code->parsing = true;
71
+ }
72
+
73
+ void vm_assembler_init(vm_assembler_t *code)
74
+ {
75
+ code->instructions = c_buffer_allocate(8);
76
+ code->constants = c_buffer_allocate(8 * sizeof(VALUE));
77
+ code->constants_table = st_init_numtable();
78
+ vm_assembler_common_init(code);
79
+ }
80
+
81
+ void vm_assembler_reset(vm_assembler_t *code)
82
+ {
83
+ c_buffer_reset(&code->instructions);
84
+ c_buffer_reset(&code->constants);
85
+ st_clear(code->constants_table);
86
+ vm_assembler_common_init(code);
87
+ }
88
+
89
+ void vm_assembler_free(vm_assembler_t *code)
90
+ {
91
+ c_buffer_free(&code->instructions);
92
+ c_buffer_free(&code->constants);
93
+ st_free_table(code->constants_table);
94
+ }
95
+
96
+ void vm_assembler_gc_mark(vm_assembler_t *code)
97
+ {
98
+ c_buffer_rb_gc_mark(&code->constants);
99
+ }
100
+
101
+ VALUE vm_assembler_disassemble(const uint8_t *start_ip, const uint8_t *end_ip, const VALUE *constants)
102
+ {
103
+ const uint8_t *ip = start_ip;
104
+ VALUE output = rb_str_buf_new(32);
105
+ VALUE constant = Qnil;
106
+
107
+ while (ip < end_ip) {
108
+ rb_str_catf(output, "0x%04lx: ", ip - start_ip);
109
+
110
+ if (vm_assembler_opcode_has_constant(*ip)) {
111
+ uint16_t constant_index = (ip[1] << 8) | ip[2];
112
+ constant = RARRAY_AREF(*constants, constant_index);
113
+ }
114
+
115
+ switch (*ip) {
116
+ case OP_LEAVE:
117
+ rb_str_catf(output, "leave\n");
118
+ break;
119
+
120
+ case OP_POP_WRITE:
121
+ rb_str_catf(output, "pop_write\n");
122
+ break;
123
+
124
+ case OP_PUSH_NIL:
125
+ rb_str_catf(output, "push_nil\n");
126
+ break;
127
+
128
+ case OP_PUSH_TRUE:
129
+ rb_str_catf(output, "push_true\n");
130
+ break;
131
+
132
+ case OP_PUSH_FALSE:
133
+ rb_str_catf(output, "push_false\n");
134
+ break;
135
+
136
+ case OP_FIND_VAR:
137
+ rb_str_catf(output, "find_var\n");
138
+ break;
139
+
140
+ case OP_LOOKUP_KEY:
141
+ rb_str_catf(output, "lookup_key\n");
142
+ break;
143
+
144
+ case OP_NEW_INT_RANGE:
145
+ rb_str_catf(output, "new_int_range\n");
146
+ break;
147
+
148
+ case OP_HASH_NEW:
149
+ rb_str_catf(output, "hash_new(%u)\n", ip[1]);
150
+ break;
151
+
152
+ case OP_PUSH_INT8:
153
+ rb_str_catf(output, "push_int8(%u)\n", ip[1]);
154
+ break;
155
+
156
+ case OP_PUSH_INT16:
157
+ {
158
+ int num = (ip[1] << 8) | ip[2];
159
+ rb_str_catf(output, "push_int16(%u)\n", num);
160
+ break;
161
+ }
162
+
163
+ case OP_RENDER_VARIABLE_RESCUE:
164
+ {
165
+ unsigned int line_number = bytes_to_uint24(ip + 1);
166
+ rb_str_catf(output, "render_variable_rescue(line_number: %u)\n", line_number);
167
+ break;
168
+ }
169
+
170
+ case OP_WRITE_RAW_W:
171
+ case OP_WRITE_RAW:
172
+ {
173
+ const char *text;
174
+ size_t size;
175
+ const char *name;
176
+ if (*ip == OP_WRITE_RAW_W) {
177
+ name = "write_raw_w";
178
+ size = bytes_to_uint24(&ip[1]);
179
+ text = (const char *)&ip[4];
180
+ } else {
181
+ name = "write_raw";
182
+ size = ip[1];
183
+ text = (const char *)&ip[2];
184
+ }
185
+ VALUE string = rb_enc_str_new(text, size, utf8_encoding);
186
+ rb_str_catf(output, "%s(%+"PRIsVALUE")\n", name, string);
187
+ break;
188
+ }
189
+
190
+ case OP_WRITE_NODE:
191
+ rb_str_catf(output, "write_node(%+"PRIsVALUE")\n", constant);
192
+ break;
193
+
194
+ case OP_PUSH_CONST:
195
+ rb_str_catf(output, "push_const(%+"PRIsVALUE")\n", constant);
196
+ break;
197
+
198
+ case OP_FIND_STATIC_VAR:
199
+ rb_str_catf(output, "find_static_var(%+"PRIsVALUE")\n", constant);
200
+ break;
201
+
202
+ case OP_LOOKUP_CONST_KEY:
203
+ rb_str_catf(output, "lookup_const_key(%+"PRIsVALUE")\n", constant);
204
+ break;
205
+
206
+ case OP_LOOKUP_COMMAND:
207
+ rb_str_catf(output, "lookup_command(%+"PRIsVALUE")\n", constant);
208
+ break;
209
+
210
+ case OP_FILTER:
211
+ {
212
+ VALUE filter_name = RARRAY_AREF(constant, 0);
213
+ uint8_t num_args = RARRAY_AREF(constant, 1);
214
+ rb_str_catf(output, "filter(name: %+"PRIsVALUE", num_args: %u)\n", filter_name, num_args);
215
+ break;
216
+ }
217
+
218
+ case OP_BUILTIN_FILTER:
219
+ rb_str_catf(output, "builtin_filter(name: :%s, num_args: %u)\n", builtin_filters[ip[1]].name, ip[2]);
220
+ break;
221
+
222
+ default:
223
+ rb_str_catf(output, "<opcode number %d disassembly not implemented>\n", ip[0]);
224
+ break;
225
+ }
226
+ liquid_vm_next_instruction(&ip);
227
+ }
228
+ return output;
229
+ }
230
+
231
+ struct merge_constants_table_func_args {
232
+ st_table *hash;
233
+ size_t increment_amount;
234
+ };
235
+
236
+ static int merge_constants_table(st_data_t key, st_data_t value, VALUE _arg)
237
+ {
238
+ struct merge_constants_table_func_args *arg = (struct merge_constants_table_func_args *)_arg;
239
+ st_table *dest_hash = arg->hash;
240
+ uint16_t new_value = value + arg->increment_amount;
241
+ st_insert(dest_hash, key, new_value);
242
+
243
+ return ST_CONTINUE;
244
+ }
245
+
246
+ void update_instructions_constants_table_index_ref(c_buffer_t *instructions, size_t increment_amount, c_buffer_t *constants)
247
+ {
248
+ uint8_t *ip = instructions->data;
249
+
250
+ while (ip < instructions->data_end) {
251
+ if (vm_assembler_opcode_has_constant(*ip)) {
252
+ uint16_t constant_index = (ip[1] << 8) | ip[2];
253
+ uint16_t new_constant_index = constant_index + increment_amount;
254
+ ip[1] = new_constant_index >> 8;
255
+ ip[2] = (uint8_t)new_constant_index;
256
+ }
257
+
258
+ liquid_vm_next_instruction((const uint8_t **)&ip);
259
+ }
260
+ }
261
+
262
+ void vm_assembler_concat(vm_assembler_t *dest, vm_assembler_t *src)
263
+ {
264
+ size_t dest_element_count = c_buffer_size(&dest->constants) / sizeof(VALUE);
265
+
266
+ // merge src constants table into dest constants table with new index
267
+ struct merge_constants_table_func_args arg;
268
+ arg.hash = dest->constants_table;
269
+ arg.increment_amount = dest_element_count;
270
+ st_foreach(src->constants_table, merge_constants_table, (VALUE)&arg);
271
+
272
+ // merge constants array
273
+ c_buffer_concat(&dest->constants, &src->constants);
274
+
275
+ update_instructions_constants_table_index_ref(&src->instructions, dest_element_count, &dest->constants);
276
+ c_buffer_concat(&dest->instructions, &src->instructions);
277
+
278
+ size_t max_src_stack_size = dest->stack_size + src->max_stack_size;
279
+ if (max_src_stack_size > dest->max_stack_size)
280
+ dest->max_stack_size = max_src_stack_size;
281
+
282
+ dest->stack_size += src->stack_size;
283
+ }
284
+
285
+ void vm_assembler_require_stack_args(vm_assembler_t *code, unsigned int count)
286
+ {
287
+ if (code->stack_size < code->protected_stack_size + count) {
288
+ rb_raise(rb_eRuntimeError, "insufficient number of values on the stack");
289
+ }
290
+ }
291
+
292
+
293
+ void vm_assembler_add_write_raw(vm_assembler_t *code, const char *string, size_t size)
294
+ {
295
+ if (size > UINT8_MAX) {
296
+ uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 4);
297
+ instructions[0] = OP_WRITE_RAW_W;
298
+ uint24_to_bytes((unsigned int)size, &instructions[1]);
299
+ } else {
300
+ uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 2);
301
+ instructions[0] = OP_WRITE_RAW;
302
+ instructions[1] = size;
303
+ }
304
+
305
+ c_buffer_write(&code->instructions, (char *)string, size);
306
+ }
307
+
308
+ void vm_assembler_add_write_node(vm_assembler_t *code, VALUE node)
309
+ {
310
+ vm_assembler_add_op_with_constant(code, node, OP_WRITE_NODE);
311
+ }
312
+
313
+ void vm_assembler_add_push_fixnum(vm_assembler_t *code, VALUE num)
314
+ {
315
+ long x = FIX2LONG(num);
316
+ if (x >= INT8_MIN && x <= INT8_MAX) {
317
+ vm_assembler_add_push_int8(code, x);
318
+ } else if (x >= INT16_MIN && x <= INT16_MAX) {
319
+ vm_assembler_add_push_int16(code, x);
320
+ } else {
321
+ vm_assembler_add_push_const(code, num);
322
+ }
323
+ }
324
+
325
+ void vm_assembler_add_push_literal(vm_assembler_t *code, VALUE literal)
326
+ {
327
+ switch (literal) {
328
+ case Qnil:
329
+ vm_assembler_add_push_nil(code);
330
+ break;
331
+ case Qtrue:
332
+ vm_assembler_add_push_true(code);
333
+ break;
334
+ case Qfalse:
335
+ vm_assembler_add_push_false(code);
336
+ break;
337
+ default:
338
+ if (RB_FIXNUM_P(literal)) {
339
+ vm_assembler_add_push_fixnum(code, literal);
340
+ } else {
341
+ vm_assembler_add_push_const(code, literal);
342
+ }
343
+ break;
344
+ }
345
+ }
346
+
347
+ void vm_assembler_add_filter(vm_assembler_t *code, VALUE filter_name, size_t arg_count)
348
+ {
349
+ if (arg_count > 254) {
350
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Too many filter arguments");
351
+ }
352
+ code->stack_size -= arg_count; // pop arg_count + 1, push 1
353
+
354
+ st_data_t builtin_index;
355
+ bool is_builtin = st_lookup(builtin_filter_table, filter_name, &builtin_index);
356
+
357
+ if (is_builtin) {
358
+ uint8_t *instructions = c_buffer_extend_for_write(&code->instructions, 3);
359
+ *instructions++ = OP_BUILTIN_FILTER;
360
+ *instructions++ = builtin_index;
361
+ *instructions++ = arg_count + 1; // include input
362
+ } else {
363
+ VALUE filter_args = rb_ary_new_capa(2);
364
+ rb_ary_push(filter_args, filter_name);
365
+ rb_ary_push(filter_args, arg_count + 1);
366
+ vm_assembler_add_op_with_constant(code, filter_args, OP_FILTER);
367
+ }
368
+ }
369
+
370
+ static void ensure_parsing(vm_assembler_t *code)
371
+ {
372
+ if (!code->parsing)
373
+ rb_raise(rb_eRuntimeError, "cannot extend code after it has finished being compiled");
374
+ }
375
+
376
+ void vm_assembler_add_evaluate_expression_from_ruby(vm_assembler_t *code, VALUE code_obj, VALUE expression)
377
+ {
378
+ ensure_parsing(code);
379
+
380
+ if (RB_SPECIAL_CONST_P(expression)) {
381
+ vm_assembler_add_push_literal(code, expression);
382
+ return;
383
+ }
384
+
385
+ switch (RB_BUILTIN_TYPE(expression)) {
386
+ case T_DATA:
387
+ if (RBASIC_CLASS(expression) == cLiquidCExpression) {
388
+ vm_assembler_concat(code, &((expression_t *)DATA_PTR(expression))->code);
389
+ vm_assembler_remove_leave(code);
390
+ return;
391
+ }
392
+ break;
393
+ case T_OBJECT:
394
+ {
395
+ VALUE klass = RBASIC_CLASS(expression);
396
+ if (klass == cLiquidVariableLookup || klass == cLiquidRangeLookup) {
397
+ rb_funcall(expression, id_compile_evaluate, 1, code_obj);
398
+ return;
399
+ }
400
+ break;
401
+ }
402
+ default:
403
+ break;
404
+ }
405
+
406
+ vm_assembler_add_push_const(code, expression);
407
+ }
408
+
409
+ void vm_assembler_add_find_variable_from_ruby(vm_assembler_t *code, VALUE code_obj, VALUE expression)
410
+ {
411
+ ensure_parsing(code);
412
+
413
+ if (RB_TYPE_P(expression, T_STRING)) {
414
+ vm_assembler_add_find_static_variable(code, expression);
415
+ } else {
416
+ vm_assembler_add_evaluate_expression_from_ruby(code, code_obj, expression);
417
+ vm_assembler_add_find_variable(code);
418
+ }
419
+ }
420
+
421
+ void vm_assembler_add_lookup_command_from_ruby(vm_assembler_t *code, VALUE command)
422
+ {
423
+ StringValue(command);
424
+ ensure_parsing(code);
425
+ vm_assembler_require_stack_args(code, 1);
426
+
427
+ vm_assembler_add_lookup_command(code, command);
428
+ }
429
+
430
+ void vm_assembler_add_lookup_key_from_ruby(vm_assembler_t *code, VALUE code_obj, VALUE expression)
431
+ {
432
+ ensure_parsing(code);
433
+ vm_assembler_require_stack_args(code, 1);
434
+
435
+ if (RB_TYPE_P(expression, T_STRING)) {
436
+ vm_assembler_add_lookup_const_key(code, expression);
437
+ } else {
438
+ vm_assembler_add_evaluate_expression_from_ruby(code, code_obj, expression);
439
+ vm_assembler_add_lookup_key(code);
440
+ }
441
+ }
442
+
443
+ void vm_assembler_add_new_int_range_from_ruby(vm_assembler_t *code)
444
+ {
445
+ ensure_parsing(code);
446
+ vm_assembler_require_stack_args(code, 2);
447
+ vm_assembler_add_new_int_range(code);
448
+ }
449
+
450
+ void vm_assembler_add_hash_new_from_ruby(vm_assembler_t *code, VALUE hash_size_obj)
451
+ {
452
+ ensure_parsing(code);
453
+ unsigned int hash_size = NUM2USHORT(hash_size_obj);
454
+ vm_assembler_require_stack_args(code, hash_size * 2);
455
+
456
+ vm_assembler_add_hash_new(code, hash_size);
457
+ }
458
+
459
+ void vm_assembler_add_filter_from_ruby(vm_assembler_t *code, VALUE filter_name, VALUE arg_count_obj)
460
+ {
461
+ ensure_parsing(code);
462
+ unsigned int arg_count = NUM2USHORT(arg_count_obj);
463
+ vm_assembler_require_stack_args(code, arg_count + 1);
464
+ filter_name = rb_str_intern(filter_name);
465
+
466
+ vm_assembler_add_filter(code, filter_name, arg_count);
467
+ }
468
+
469
+ bool vm_assembler_opcode_has_constant(uint8_t ip) {
470
+ if (
471
+ ip == OP_PUSH_CONST ||
472
+ ip == OP_WRITE_NODE ||
473
+ ip == OP_FIND_STATIC_VAR ||
474
+ ip == OP_LOOKUP_CONST_KEY ||
475
+ ip == OP_LOOKUP_COMMAND ||
476
+ ip == OP_FILTER
477
+ ) {
478
+ return true;
479
+ }
480
+ return false;
481
+ }
482
+
483
+ void liquid_define_vm_assembler(void)
484
+ {
485
+ builtin_filter_table = st_init_numtable_with_size(ARRAY_LENGTH(builtin_filters));
486
+ for (unsigned int i = 0; i < ARRAY_LENGTH(builtin_filters); i++) {
487
+ filter_desc_t *filter = &builtin_filters[i];
488
+ filter->sym = ID2SYM(rb_intern(filter->name));
489
+ st_insert(builtin_filter_table, filter->sym, i);
490
+ }
491
+ }