liquid-c 4.1.0 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f0b1d438c8d9e73c713f3c66e14cc41a0b5351f33153047d46728a897c1bc576
4
- data.tar.gz: 5275881737af08a486e1ebeb672333ffe534e0de20e8d018246013419950f382
3
+ metadata.gz: 8b5a8a5b1b5f7409b378626e889898eeac6996ef2127894d25d2f342183c633c
4
+ data.tar.gz: 165117bbbcf583f3c9c0223c3232de5e5c42b67a8ed90b3d268864e4732c685d
5
5
  SHA512:
6
- metadata.gz: c21caebe56294b8ef4cce7712261c6f7a50ddb3fd72d08ad2463b7b561a8c13b2d0db1c9b3941ee3a8d4f9a816c85764db182b181a1eacf86032b6d27878e974
7
- data.tar.gz: 736ebd21fd1cf147ed1f88cc3828a0209027d22cbec79f72f71fde2599523ec40e974b3ad1aa47acfd5724f4c0b1e3508ba74a13efee84e9e9771c39b012ef65
6
+ metadata.gz: a7c7ed2cd71c5470c4b53671611279e71279cba16f0a6fcd5623935b80373ed941f6b3360264399ec7f8ab1efe39728f0bba1a6fecdd3e704675e7464ab660aa
7
+ data.tar.gz: 006af34fd3a151cae0387a0d70f18e5f64f98dba95f1e2bf6ea36fe239db3153f301405c349ff25a852977ad38b8873a6d8b2554753f29f37665ef20f7a30fda
@@ -0,0 +1,23 @@
1
+ # .github/workflows/cla.yml
2
+ name: Contributor License Agreement (CLA)
3
+
4
+ on:
5
+ pull_request_target:
6
+ types: [opened, synchronize]
7
+ issue_comment:
8
+ types: [created]
9
+
10
+ jobs:
11
+ cla:
12
+ runs-on: ubuntu-latest
13
+ if: |
14
+ (github.event.issue.pull_request
15
+ && !github.event.issue.pull_request.merged_at
16
+ && contains(github.event.comment.body, 'signed')
17
+ )
18
+ || (github.event.pull_request && !github.event.pull_request.merged)
19
+ steps:
20
+ - uses: Shopify/shopify-cla-action@v1
21
+ with:
22
+ github-token: ${{ secrets.GITHUB_TOKEN }}
23
+ cla-token: ${{ secrets.CLA_TOKEN }}
@@ -5,28 +5,36 @@ jobs:
5
5
  runs-on: ubuntu-latest
6
6
  strategy:
7
7
  matrix:
8
- entry:
9
- - { ruby: '2.5', allowed-failure: false }
8
+ include:
10
9
  - { ruby: '2.7', allowed-failure: false }
11
10
  - { ruby: '3.0', allowed-failure: false }
11
+ - { ruby: '3.1', allowed-failure: false }
12
+ - { ruby: '3.2', allowed-failure: false }
13
+ - { ruby: '3.3', allowed-failure: false }
12
14
  - { ruby: ruby-head, allowed-failure: true }
13
- name: test (${{ matrix.entry.ruby }})
15
+ - { ruby: truffleruby-head, allowed-failure: true }
16
+ name: test (${{ matrix.ruby }})
14
17
  steps:
15
18
  - uses: actions/checkout@v2
16
19
  - uses: ruby/setup-ruby@v1
17
20
  with:
18
- ruby-version: ${{ matrix.entry.ruby }}
19
- - uses: actions/cache@v1
20
- with:
21
- path: vendor/bundle
22
- key: ${{ runner.os }}-gems-${{ hashFiles('Gemfile') }}
23
- restore-keys: ${{ runner.os }}-gems-
24
- - run: bundle install --jobs=3 --retry=3 --path=vendor/bundle
21
+ ruby-version: ${{ matrix.ruby }}
22
+ bundler-cache: true
23
+
25
24
  - run: bundle exec rake
26
- continue-on-error: ${{ matrix.entry.allowed-failure }}
25
+ continue-on-error: ${{ matrix.allowed-failure }}
27
26
  env:
28
27
  LIQUID_C_PEDANTIC: 'true'
28
+ if: matrix.ruby != 'truffleruby-head'
29
+
30
+ - run: bundle exec rake test:unit
31
+ continue-on-error: ${{ matrix.allowed-failure }}
32
+ env:
33
+ LIQUID_C_PEDANTIC: 'true'
34
+ if: matrix.ruby == 'truffleruby-head'
35
+
29
36
  - run: bundle exec rubocop
37
+ if: matrix.ruby != 'truffleruby-head'
30
38
 
31
39
  valgrind:
32
40
  runs-on: ubuntu-latest
@@ -34,12 +42,7 @@ jobs:
34
42
  - uses: actions/checkout@v2
35
43
  - uses: ruby/setup-ruby@v1
36
44
  with:
37
- ruby-version: '3.0'
45
+ ruby-version: 3.3
46
+ bundler-cache: true
38
47
  - run: sudo apt-get install -y valgrind
39
- - uses: actions/cache@v1
40
- with:
41
- path: vendor/bundle
42
- key: ${{ runner.os }}-gems-${{ hashFiles('Gemfile') }}
43
- restore-keys: ${{ runner.os }}-gems-
44
- - run: bundle install --jobs=3 --retry=3 --path=vendor/bundle
45
48
  - run: bundle exec rake test:valgrind
data/Gemfile CHANGED
@@ -10,6 +10,7 @@ gemspec
10
10
  gem "liquid", github: "Shopify/liquid", ref: "master"
11
11
 
12
12
  group :test do
13
+ gem "base64", require: false # for older rubocop on Ruby 3.4
13
14
  gem "rubocop", "~> 1.24.1", require: false
14
15
  gem "rubocop-performance", "~> 1.13.2", require: false
15
16
  gem "rubocop-shopify", "~> 2.4.0", require: false
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # Liquid::C
2
- [![Build Status](https://travis-ci.org/Shopify/liquid-c.svg?branch=master)](https://travis-ci.org/Shopify/liquid-c)
2
+ [![Build Status](https://travis-ci.org/Shopify/liquid-c.svg?branch=main)](https://travis-ci.org/Shopify/liquid-c)
3
3
 
4
4
  Partial native implementation of the liquid ruby gem in C.
5
5
 
@@ -7,8 +7,8 @@ Partial native implementation of the liquid ruby gem in C.
7
7
 
8
8
  Add these lines to your application's Gemfile:
9
9
 
10
- gem 'liquid', github: 'Shopify/liquid', branch: 'master'
11
- gem 'liquid-c', github: 'Shopify/liquid-c', branch: 'master'
10
+ gem 'liquid', github: 'Shopify/liquid', branch: 'main'
11
+ gem 'liquid-c', github: 'Shopify/liquid-c', branch: 'main'
12
12
 
13
13
  And then execute:
14
14
 
data/Rakefile CHANGED
@@ -9,8 +9,6 @@ require "ruby_memcheck"
9
9
 
10
10
  ENV["DEBUG"] ||= "true"
11
11
 
12
- RubyMemcheck.config(binary_name: "liquid_c")
13
-
14
12
  task default: [:test, :rubocop]
15
13
 
16
14
  task test: ["test:unit", "test:integration:all"]
data/ext/liquid_c/block.c CHANGED
@@ -3,7 +3,7 @@
3
3
  #include "intutil.h"
4
4
  #include "tokenizer.h"
5
5
  #include "stringutil.h"
6
- #include "vm.h"
6
+ #include "liquid_vm.h"
7
7
  #include "variable.h"
8
8
  #include "context.h"
9
9
  #include "parse_context.h"
@@ -49,7 +49,7 @@ static void block_body_mark(void *ptr)
49
49
  } else {
50
50
  rb_gc_mark(body->as.intermediate.parse_context);
51
51
  if (body->as.intermediate.vm_assembler_pool)
52
- vm_assembler_pool_gc_mark(body->as.intermediate.vm_assembler_pool);
52
+ rb_gc_mark(body->as.intermediate.vm_assembler_pool->self);
53
53
  if (body->as.intermediate.code)
54
54
  vm_assembler_gc_mark(body->as.intermediate.code);
55
55
  }
@@ -199,9 +199,14 @@ static tag_markup_t internal_block_body_parse(block_body_t *body, parse_context_
199
199
  long name_len = name_end - name_start;
200
200
 
201
201
  if (name_len == 0) {
202
- VALUE str = rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
203
- unknown_tag = (tag_markup_t) { str, str };
204
- goto loop_break;
202
+ if (name_start < end && *name_start == '#') { // inline comment
203
+ name_end++;
204
+ name_len++;
205
+ } else {
206
+ VALUE str = rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
207
+ unknown_tag = (tag_markup_t) { str, str };
208
+ goto loop_break;
209
+ }
205
210
  }
206
211
 
207
212
  if (name_len == 6 && strncmp(name_start, "liquid", 6) == 0) {
@@ -2,7 +2,7 @@
2
2
  #include "context.h"
3
3
  #include "variable_lookup.h"
4
4
  #include "variable.h"
5
- #include "vm.h"
5
+ #include "liquid_vm.h"
6
6
  #include "expression.h"
7
7
  #include "document_body.h"
8
8
 
@@ -38,7 +38,7 @@ inline static VALUE value_to_liquid_and_set_context(VALUE value, VALUE context_t
38
38
  if (RB_SPECIAL_CONST_P(value))
39
39
  return value;
40
40
 
41
- VALUE klass = RBASIC(value)->klass;
41
+ VALUE klass = RBASIC_CLASS(value);
42
42
 
43
43
  // More basic types having #to_liquid of self and no #context=
44
44
  if (klass == rb_cString || klass == rb_cArray || klass == rb_cHash)
@@ -9,6 +9,14 @@ static VALUE cLiquidCDocumentBody;
9
9
  static void document_body_mark(void *ptr)
10
10
  {
11
11
  document_body_t *body = ptr;
12
+ /* When Liquid::C::BlockBody#freeze is called, it calls
13
+ * document_body_write_block_body which sets the document_body_entry but
14
+ * does not yet set the compiled flag to true. During this time, the only
15
+ * reference to this Liquid::C::DocumentBody object is in the instance
16
+ * variables of the parse_context which is marked movable by Ruby. This
17
+ * causes the self reference here to be moved by compaction causing it to
18
+ * point to an incorrect object. */
19
+ rb_gc_mark(body->self);
12
20
  rb_gc_mark(body->constants);
13
21
  }
14
22
 
@@ -46,7 +46,7 @@ static inline block_body_header_t *document_body_get_block_body_header_ptr(const
46
46
  static inline const VALUE *document_body_get_constants_ptr(const document_body_entry_t *entry)
47
47
  {
48
48
  block_body_header_t *header = document_body_get_block_body_header_ptr(entry);
49
- return RARRAY_PTR(entry->body->constants) + header->constants_offset;
49
+ return RARRAY_CONST_PTR(entry->body->constants) + header->constants_offset;
50
50
  }
51
51
 
52
52
  static inline void document_body_ensure_compile_finished(document_body_t *body)
@@ -1,7 +1,7 @@
1
1
  #include "liquid.h"
2
2
  #include "vm_assembler.h"
3
3
  #include "parser.h"
4
- #include "vm.h"
4
+ #include "liquid_vm.h"
5
5
  #include "expression.h"
6
6
 
7
7
  VALUE cLiquidCExpression;
@@ -12,13 +12,15 @@ end
12
12
  if ENV["DEBUG"] == "true"
13
13
  append_cflags("-fbounds-check")
14
14
  CONFIG["optflags"] = " -O0"
15
+ # Hack to enable assertions since ruby/assert.h disables assertions unless
16
+ # Ruby was compiled with -DRUBY_DEBUG.
17
+ # https://github.com/ruby/ruby/blob/9e678cdbd054f78576a8f21b3f97cccc395ade22/include/ruby/assert.h#L36-L41
18
+ $CFLAGS << " -DRUBY_DEBUG"
15
19
  else
16
20
  $CFLAGS << " -DNDEBUG"
17
21
  end
18
22
 
19
- if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7.0") # added in 2.7
20
- $CFLAGS << " -DHAVE_RB_HASH_BULK_INSERT"
21
- end
23
+ have_func "rb_hash_bulk_insert"
22
24
 
23
25
  $warnflags&.gsub!(/-Wdeclaration-after-statement/, "")
24
26
  create_makefile("liquid_c")
data/ext/liquid_c/lexer.c CHANGED
@@ -144,7 +144,39 @@ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
144
144
 
145
145
  if (is_special(c)) RETURN_TOKEN(c, 1);
146
146
 
147
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
147
+ long remaining_str_len = end - str;
148
+ int char_len = 0;
149
+
150
+ // read multibyte UTF-8 character
151
+ if ((c & 0x80) == 0) {
152
+ // 1-byte character
153
+ char_len = 1;
154
+ } else if ((c & 0xE0) == 0xC0) {
155
+ // 2-byte character
156
+ if (remaining_str_len >= 2) {
157
+ char_len = 2;
158
+ }
159
+ } else if ((c & 0xF0) == 0xE0) {
160
+ // 3-byte character
161
+ if (remaining_str_len >= 3) {
162
+ char_len = 3;
163
+ }
164
+ } else if ((c & 0xF8) == 0xF0) {
165
+ // 4-byte character
166
+ if (remaining_str_len >= 4) {
167
+ char_len = 4;
168
+ }
169
+ } else {
170
+ // this should never happen
171
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
172
+ }
173
+
174
+ if (char_len > 0) {
175
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %.*s", char_len, str);
176
+ } else {
177
+ rb_raise(rb_eArgError, "invalid byte sequence in UTF-8");
178
+ }
179
+
148
180
  return NULL;
149
181
  }
150
182
 
@@ -12,7 +12,7 @@
12
12
  #include "parse_context.h"
13
13
  #include "variable_lookup.h"
14
14
  #include "vm_assembler_pool.h"
15
- #include "vm.h"
15
+ #include "liquid_vm.h"
16
16
  #include "usage.h"
17
17
 
18
18
  ID id_evaluate;
@@ -2,7 +2,7 @@
2
2
  #include <assert.h>
3
3
 
4
4
  #include "liquid.h"
5
- #include "vm.h"
5
+ #include "liquid_vm.h"
6
6
  #include "variable_lookup.h"
7
7
  #include "intutil.h"
8
8
  #include "document_body.h"
@@ -141,31 +141,40 @@ static inline void vm_stack_push(vm_t *vm, VALUE value)
141
141
  vm->stack.data_end = (uint8_t *)stack_ptr;
142
142
  }
143
143
 
144
- static inline VALUE vm_stack_pop(vm_t *vm)
144
+ static inline VALUE *vm_stack_peek_n(vm_t *vm, size_t n)
145
145
  {
146
146
  VALUE *stack_ptr = (VALUE *)vm->stack.data_end;
147
- stack_ptr--;
147
+ stack_ptr -= n;
148
148
  assert((VALUE *)vm->stack.data <= stack_ptr);
149
- vm->stack.data_end = (uint8_t *)stack_ptr;
150
- return *stack_ptr;
149
+ return stack_ptr;
151
150
  }
152
151
 
153
- static inline VALUE *vm_stack_pop_n_use_in_place(vm_t *vm, size_t n)
152
+ static inline VALUE *vm_stack_pop_n(vm_t *vm, size_t n)
154
153
  {
155
- VALUE *stack_ptr = (VALUE *)vm->stack.data_end;
156
- stack_ptr -= n;
157
- assert((VALUE *)vm->stack.data <= stack_ptr);
154
+ VALUE *stack_ptr = vm_stack_peek_n(vm, n);
158
155
  vm->stack.data_end = (uint8_t *)stack_ptr;
159
156
  return stack_ptr;
160
157
  }
161
158
 
159
+ static inline VALUE vm_stack_pop(vm_t *vm)
160
+ {
161
+ return *vm_stack_pop_n(vm, 1);
162
+ }
163
+
162
164
  static inline void vm_stack_reserve_for_write(vm_t *vm, size_t num_values)
163
165
  {
164
166
  c_buffer_reserve_for_write(&vm->stack, num_values * sizeof(VALUE));
165
167
  }
166
168
 
167
- static VALUE vm_invoke_filter(vm_t *vm, VALUE filter_name, int num_args, VALUE *args)
169
+ static VALUE vm_invoke_filter(vm_t *vm, VALUE filter_name, int num_args)
168
170
  {
171
+ VALUE *popped_args = vm_stack_pop_n(vm, num_args);
172
+ /* We have to copy popped_args_ptr to the stack because the VM
173
+ * no longer holds onto these objects, so they have to exist on
174
+ * the stack to ensure they don't get garbage collected. */
175
+ VALUE *args = alloca(sizeof(VALUE *) * num_args);
176
+ memcpy(args, popped_args, sizeof(VALUE *) * num_args);
177
+
169
178
  bool not_invokable = rb_hash_lookup(vm->context.filter_methods, filter_name) != Qtrue;
170
179
  if (RB_UNLIKELY(not_invokable)) {
171
180
  if (vm->context.strict_filters) {
@@ -313,8 +322,11 @@ static VALUE vm_render_until_error(VALUE uncast_args)
313
322
  size_t hash_size = *ip++;
314
323
  size_t num_keys_and_values = hash_size * 2;
315
324
  VALUE hash = rb_hash_new();
316
- VALUE *args_ptr = vm_stack_pop_n_use_in_place(vm, num_keys_and_values);
325
+
326
+ VALUE *args_ptr = vm_stack_peek_n(vm, num_keys_and_values);
317
327
  hash_bulk_insert(num_keys_and_values, args_ptr, hash);
328
+ vm_stack_pop_n(vm, num_keys_and_values);
329
+
318
330
  vm_stack_push(vm, hash);
319
331
  break;
320
332
  }
@@ -336,8 +348,7 @@ static VALUE vm_render_until_error(VALUE uncast_args)
336
348
  num_args = *ip++; // includes input argument
337
349
  }
338
350
 
339
- VALUE *args_ptr = vm_stack_pop_n_use_in_place(vm, num_args);
340
- VALUE result = vm_invoke_filter(vm, filter_name, num_args, args_ptr);
351
+ VALUE result = vm_invoke_filter(vm, filter_name, num_args);
341
352
  vm_stack_push(vm, result);
342
353
  break;
343
354
  }
@@ -424,24 +435,43 @@ static VALUE vm_render_until_error(VALUE uncast_args)
424
435
  }
425
436
  }
426
437
 
438
+ typedef struct vm_evaluate_rescue_args {
439
+ vm_render_until_error_args_t *render_args;
440
+ size_t old_stack_byte_size;
441
+ } vm_evaluate_rescue_args_t;
442
+
443
+ static VALUE vm_evaluate_rescue(VALUE uncast_args, VALUE exception)
444
+ {
445
+ vm_evaluate_rescue_args_t *args = (void *)uncast_args;
446
+ vm_render_until_error_args_t *render_args = args->render_args;
447
+ vm_t *vm = render_args->vm;
448
+
449
+ vm->stack.data_end = vm->stack.data + args->old_stack_byte_size;
450
+
451
+ rb_exc_raise(exception);
452
+ return Qnil;
453
+ }
454
+
427
455
  // Evaluate instructions that avoid using rendering instructions and leave with the result on
428
456
  // the top of the stack
429
457
  VALUE liquid_vm_evaluate(VALUE context, vm_assembler_t *code)
430
458
  {
431
459
  vm_t *vm = vm_from_context(context);
432
460
  vm_stack_reserve_for_write(vm, code->max_stack_size);
433
- #ifndef NDEBUG
434
- size_t old_stack_byte_size = c_buffer_size(&vm->stack);
435
- #endif
436
461
 
437
462
  vm_render_until_error_args_t args = {
438
463
  .vm = vm,
439
464
  .const_ptr = (const size_t *)code->constants.data,
440
465
  .ip = code->instructions.data
441
466
  };
442
- vm_render_until_error((VALUE)&args);
467
+ vm_evaluate_rescue_args_t rescue_args = {
468
+ .render_args = &args,
469
+ .old_stack_byte_size = c_buffer_size(&vm->stack),
470
+ };
471
+ rb_rescue(vm_render_until_error, (VALUE)&args, vm_evaluate_rescue, (VALUE)&rescue_args);
472
+
443
473
  VALUE ret = vm_stack_pop(vm);
444
- assert(old_stack_byte_size == c_buffer_size(&vm->stack));
474
+ assert(rescue_args.old_stack_byte_size == c_buffer_size(&vm->stack));
445
475
  return ret;
446
476
  }
447
477
 
@@ -67,28 +67,40 @@ static VALUE parse_number(parser_t *p)
67
67
  return out;
68
68
  }
69
69
 
70
+ __attribute__((noreturn)) static void raise_invalid_expression_type(const char *expr, int expr_len)
71
+ {
72
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Invalid expression type '%.*s' in range expression", expr_len, expr);
73
+ }
74
+
70
75
  static VALUE try_parse_constant_range(parser_t *p)
71
76
  {
72
77
  parser_t saved_state = *p;
73
78
 
74
79
  parser_must_consume(p, TOKEN_OPEN_ROUND);
75
80
 
81
+ const char *begin_str = p->cur.val;
76
82
  VALUE begin = try_parse_constant_expression(p);
83
+ const char *begin_str_end = p->cur.val;
77
84
  if (begin == Qundef) {
78
85
  *p = saved_state;
79
86
  return Qundef;
80
87
  }
81
88
  parser_must_consume(p, TOKEN_DOTDOT);
82
89
 
90
+ const char *end_str = p->cur.val;
83
91
  VALUE end = try_parse_constant_expression(p);
92
+ const char *end_str_end = p->cur.val;
84
93
  if (end == Qundef) {
85
94
  *p = saved_state;
86
95
  return Qundef;
87
96
  }
88
97
  parser_must_consume(p, TOKEN_CLOSE_ROUND);
89
98
 
90
- begin = rb_funcall(begin, id_to_i, 0);
91
- end = rb_funcall(end, id_to_i, 0);
99
+ begin = rb_check_funcall(begin, id_to_i, 0, NULL);
100
+ if (begin == Qundef) raise_invalid_expression_type(begin_str, (int)(begin_str_end - begin_str));
101
+
102
+ end = rb_check_funcall(end, id_to_i, 0, NULL);
103
+ if (end == Qundef) raise_invalid_expression_type(end_str, (int)(end_str_end - end_str));
92
104
 
93
105
  bool exclude_end = false;
94
106
  return rb_range_new(begin, end, exclude_end);
data/ext/liquid_c/raw.c CHANGED
@@ -27,25 +27,51 @@ static bool match_full_token_possibly_invalid(token_t *token, struct full_token_
27
27
  const char *curr_delimiter_start;
28
28
  long curr_delimiter_len = 0;
29
29
 
30
- for (long i = len - 3; i >= 0; i--) {
30
+ bool is_last_char_whitespace = true;
31
+
32
+ // Search from the end of the string.
33
+ // The token could have a part of the body like this:
34
+ // {% endraw {% endraw %}
35
+ // In this case, we need to return body_len to 10 to preserve the body content.
36
+ for (long i = len - 3; i > 1; i--) {
31
37
  char c = str[i];
32
38
 
39
+ // match \s
40
+ bool is_whitespace = rb_isspace(c);
41
+
33
42
  if (is_word_char(c)) {
34
43
  curr_delimiter_start = str + i;
35
- curr_delimiter_len++;
36
- } else {
37
- if (curr_delimiter_len > 0) {
38
- match->delimiter_start = curr_delimiter_start;
39
- match->delimiter_len = curr_delimiter_len;
44
+
45
+ if (is_last_char_whitespace) {
46
+ // start a new delimiter match
47
+ curr_delimiter_len = 1;
48
+ } else {
49
+ curr_delimiter_len++;
40
50
  }
51
+ } else if (!is_word_char(c) && !is_whitespace) {
41
52
  curr_delimiter_start = NULL;
42
53
  curr_delimiter_len = 0;
43
54
  }
44
55
 
45
- if (c == '%' && match->delimiter_len > 0 &&
46
- i - 1 >= 0 && str[i - 1] == '{') {
47
- match->body_len = i - 1;
48
- return true;
56
+ is_last_char_whitespace = is_whitespace;
57
+
58
+ if (curr_delimiter_len > 0) {
59
+ // match start of a tag which is {% or {%-
60
+ if (
61
+ (str[i - 1] == '%' && str[i - 2] == '{') ||
62
+ (i - 3 >= 0 && str[i - 1] == '-' && str[i - 2] == '%' && str[i - 3] == '{')
63
+ ) {
64
+ match->delimiter_start = curr_delimiter_start;
65
+ match->delimiter_len = curr_delimiter_len;
66
+
67
+ if (str[i - 1] == '-') {
68
+ match->body_len = i - 3;
69
+ } else {
70
+ match->body_len = i - 2;
71
+ }
72
+
73
+ return true;
74
+ }
49
75
  }
50
76
  }
51
77
 
@@ -290,9 +290,9 @@ void liquid_define_tokenizer(void)
290
290
  rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
291
291
  rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
292
292
  rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);
293
+ rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
293
294
 
294
295
  // For testing the internal token representation.
295
- rb_define_private_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
296
296
  rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
297
297
  }
298
298
 
@@ -2,7 +2,7 @@
2
2
  #include "variable.h"
3
3
  #include "parser.h"
4
4
  #include "expression.h"
5
- #include "vm.h"
5
+ #include "liquid_vm.h"
6
6
 
7
7
  #include <stdio.h>
8
8
 
@@ -1,7 +1,7 @@
1
1
  #include "liquid.h"
2
2
  #include "vm_assembler.h"
3
3
  #include "expression.h"
4
- #include "vm.h"
4
+ #include "liquid_vm.h"
5
5
 
6
6
  #define ARRAY_LENGTH(array) (sizeof(array) / sizeof(array[0]))
7
7
 
@@ -3,8 +3,10 @@
3
3
 
4
4
  static VALUE cLiquidCVMAssemblerPool;
5
5
 
6
- void vm_assembler_pool_gc_mark(vm_assembler_pool_t *pool)
6
+ static void vm_assembler_pool_mark(void *ptr)
7
7
  {
8
+ vm_assembler_pool_t *pool = ptr;
9
+
8
10
  rb_gc_mark(pool->self);
9
11
  }
10
12
 
@@ -39,7 +41,7 @@ static size_t vm_assembler_pool_memsize(const void *ptr)
39
41
 
40
42
  const rb_data_type_t vm_assembler_pool_data_type = {
41
43
  "liquid_vm_assembler_pool",
42
- { NULL, vm_assembler_pool_free, vm_assembler_pool_memsize, },
44
+ { vm_assembler_pool_mark, vm_assembler_pool_free, vm_assembler_pool_memsize, },
43
45
  NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
44
46
  };
45
47
 
@@ -18,7 +18,6 @@ extern const rb_data_type_t vm_assembler_pool_data_type;
18
18
  #define VMAssemblerPool_Get_Struct(obj, sval) TypedData_Get_Struct(obj, vm_assembler_pool_t, &vm_assembler_pool_data_type, sval)
19
19
 
20
20
  void liquid_define_vm_assembler_pool(void);
21
- void vm_assembler_pool_gc_mark(vm_assembler_pool_t *pool);
22
21
  VALUE vm_assembler_pool_new(void);
23
22
  vm_assembler_t *vm_assembler_pool_alloc_assembler(vm_assembler_pool_t *pool);
24
23
  void vm_assembler_pool_free_assembler(vm_assembler_t *assembler);
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Liquid
4
4
  module C
5
- VERSION = "4.1.0"
5
+ VERSION = "4.2.0"
6
6
  end
7
7
  end
data/lib/liquid/c.rb CHANGED
@@ -86,7 +86,7 @@ Liquid::ParseContext.class_eval do
86
86
 
87
87
  def parse_expression(markup)
88
88
  if liquid_c_nodes_disabled?
89
- Liquid::Expression.ruby_parse(markup)
89
+ Liquid::Expression.parse(markup)
90
90
  else
91
91
  Liquid::C::Expression.lax_parse(markup)
92
92
  end
@@ -207,22 +207,17 @@ Liquid::C::Expression.class_eval do
207
207
  def lax_parse(markup)
208
208
  strict_parse(markup)
209
209
  rescue Liquid::SyntaxError
210
- Liquid::Expression.ruby_parse(markup)
210
+ Liquid::Expression.parse(markup)
211
211
  end
212
- end
213
- end
214
-
215
- Liquid::Expression.class_eval do
216
- class << self
217
- alias_method :ruby_parse, :parse
218
212
 
219
- def c_parse(markup)
220
- Liquid::C::Expression.lax_parse(markup)
221
- end
213
+ # Default to strict parsing, since Liquid::C::Expression.parse should only really
214
+ # be used with constant expressions. Otherwise, prefer parse_context.parse_expression.
215
+ alias_method :parse, :strict_parse
222
216
  end
223
217
  end
224
218
 
225
219
  Liquid::Context.class_eval do
220
+ alias_method :ruby_parse_evaluate, :[]
226
221
  alias_method :ruby_evaluate, :evaluate
227
222
  alias_method :ruby_find_variable, :find_variable
228
223
  alias_method :ruby_strict_variables=, :strict_variables=
@@ -232,6 +227,10 @@ Liquid::Context.class_eval do
232
227
  def c_find_variable_kwarg(key, raise_on_not_found: true)
233
228
  c_find_variable(key, raise_on_not_found)
234
229
  end
230
+
231
+ def c_parse_evaluate(expression)
232
+ c_evaluate(Liquid::C::Expression.lax_parse(expression))
233
+ end
235
234
  end
236
235
 
237
236
  Liquid::ResourceLimits.class_eval do
@@ -256,15 +255,15 @@ module Liquid
256
255
  def enabled=(value)
257
256
  @enabled = value
258
257
  if value
258
+ Liquid::Context.send(:alias_method, :[], :c_parse_evaluate)
259
259
  Liquid::Context.send(:alias_method, :evaluate, :c_evaluate)
260
260
  Liquid::Context.send(:alias_method, :find_variable, :c_find_variable_kwarg)
261
261
  Liquid::Context.send(:alias_method, :strict_variables=, :c_strict_variables=)
262
- Liquid::Expression.singleton_class.send(:alias_method, :parse, :c_parse)
263
262
  else
263
+ Liquid::Context.send(:alias_method, :[], :ruby_parse_evaluate)
264
264
  Liquid::Context.send(:alias_method, :evaluate, :ruby_evaluate)
265
265
  Liquid::Context.send(:alias_method, :find_variable, :ruby_find_variable)
266
266
  Liquid::Context.send(:alias_method, :strict_variables=, :ruby_strict_variables=)
267
- Liquid::Expression.singleton_class.send(:alias_method, :parse, :ruby_parse)
268
267
  end
269
268
  end
270
269
  end
data/test/test_helper.rb CHANGED
@@ -1,14 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- at_exit { GC.start }
4
-
5
3
  require "minitest/autorun"
6
4
  require "liquid/c"
7
5
 
8
6
  if GC.respond_to?(:verify_compaction_references)
9
7
  # This method was added in Ruby 3.0.0. Calling it this way asks the GC to
10
8
  # move objects around, helping to find object movement bugs.
11
- GC.verify_compaction_references(double_heap: true, toward: :empty)
9
+ begin
10
+ GC.verify_compaction_references(double_heap: true, toward: :empty)
11
+ rescue NotImplementedError
12
+ puts "W: GC compaction not suppported by platform"
13
+ end
14
+ end
15
+
16
+ # Enable auto-compaction in the GC if supported.
17
+ if GC.respond_to?(:auto_compact=)
18
+ GC.auto_compact = true
12
19
  end
13
20
 
14
21
  GC.stress = true if ENV["GC_STRESS"]
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "test_helper"
4
4
 
5
- class BlockTest < MiniTest::Test
5
+ class BlockTest < Minitest::Test
6
6
  def test_no_allocation_of_trimmed_strings
7
7
  template = Liquid::Template.parse("{{ a -}} {{- b }}")
8
8
  assert_equal(2, template.root.nodelist.size)
@@ -127,4 +127,11 @@ class BlockTest < MiniTest::Test
127
127
  Liquid::Template.file_system = old_file_system
128
128
  end
129
129
  end
130
+
131
+ def test_assign_filter_argument_exception
132
+ source = "{% assign v = 'IN' | truncate: 123, liquid_error %}{{ v | default: 'err swallowed' }}"
133
+ template = Liquid::Template.parse(source)
134
+ output = template.render({ "liquid_error" => -> { raise Liquid::Error, "var lookup error" } })
135
+ assert_equal("err swallowed", output)
136
+ end
130
137
  end
@@ -29,6 +29,8 @@ class ContextTest < Minitest::Test
29
29
  end
30
30
 
31
31
  def test_evaluating_a_variable_entirely_within_c
32
+ skip("TracePoint :call not yet supported") if RUBY_ENGINE == "truffleruby"
33
+
32
34
  context = Liquid::Context.new({ "var" => 42 })
33
35
  lookup = Liquid::C::Expression.strict_parse("var")
34
36
  context.evaluate(lookup) # memoize vm_internal_new calls
@@ -2,7 +2,7 @@
2
2
 
3
3
  require "test_helper"
4
4
 
5
- class ExpressionTest < MiniTest::Test
5
+ class ExpressionTest < Minitest::Test
6
6
  def test_constant_literals
7
7
  assert_equal(true, Liquid::C::Expression.strict_parse("true"))
8
8
  assert_equal(false, Liquid::C::Expression.strict_parse("false"))
@@ -159,7 +159,7 @@ class ExpressionTest < MiniTest::Test
159
159
  end
160
160
 
161
161
  def test_disable_c_nodes
162
- context = Liquid::Context.new({ "x" => 123 })
162
+ context = Liquid::Context.new({ "x" => 123, "y" => { 123 => 42 } })
163
163
 
164
164
  expr = Liquid::ParseContext.new.parse_expression("x")
165
165
  assert_instance_of(Liquid::C::Expression, expr)
@@ -168,6 +168,11 @@ class ExpressionTest < MiniTest::Test
168
168
  expr = Liquid::ParseContext.new(disable_liquid_c_nodes: true).parse_expression("x")
169
169
  assert_instance_of(Liquid::VariableLookup, expr)
170
170
  assert_equal(123, context.evaluate(expr))
171
+
172
+ expr = Liquid::ParseContext.new(disable_liquid_c_nodes: true).parse_expression("y[x]")
173
+ assert_instance_of(Liquid::VariableLookup, expr)
174
+ assert_instance_of(Liquid::VariableLookup, expr.lookups.first)
175
+ assert_equal(42, context.evaluate(expr))
171
176
  end
172
177
 
173
178
  private
@@ -13,7 +13,81 @@ class RawTest < Minitest::Test
13
13
  Liquid::Template.register_tag("raw_wrapper", RawWrapper)
14
14
 
15
15
  def test_derived_class
16
- output = Liquid::Template.parse("{% raw_wrapper %}body{% endraw_wrapper %}").render!
17
- assert_equal("<body>", output)
16
+ [
17
+ "{% raw_wrapper %}body{% endraw_wrapper %}",
18
+ "{% raw_wrapper %}body{%endraw_wrapper%}",
19
+ "{% raw_wrapper %}body{%- endraw_wrapper -%}",
20
+ "{% raw_wrapper %}body{%- endraw_wrapper %}",
21
+ "{% raw_wrapper %}body{% endraw_wrapper -%}",
22
+ ].each do |template|
23
+ output = Liquid::Template.parse(template).render!
24
+
25
+ assert_equal(
26
+ "<body>",
27
+ output,
28
+ "Template: #{template}"
29
+ )
30
+ end
31
+ end
32
+
33
+ def test_allows_extra_string_after_tag_delimiter
34
+ output = Liquid::Template.parse("{% raw %}message{% endraw this_is_allowed %}").render
35
+ assert_equal("message", output)
36
+
37
+ output = Liquid::Template.parse("{% raw %}message{% endraw r%}").render
38
+ assert_equal("message", output)
39
+ end
40
+
41
+ def test_ignores_incomplete_tag_delimter
42
+ output = Liquid::Template.parse("{% raw %}{% endraw {% endraw %}").render
43
+ assert_equal("{% endraw ", output)
44
+
45
+ output = Liquid::Template.parse("{% raw %}{%endraw{% endraw %}").render
46
+ assert_equal("{%endraw", output)
47
+
48
+ output = Liquid::Template.parse("{% raw %}{%- endraw {% endraw %}").render
49
+ assert_equal("{%- endraw ", output)
50
+ end
51
+
52
+ def test_does_not_allow_nbsp_in_tag_delimiter
53
+ # these are valid
54
+ Liquid::Template.parse("{% raw %}body{%endraw%}")
55
+ Liquid::Template.parse("{% raw %}body{% endraw-%}")
56
+ Liquid::Template.parse("{% raw %}body{% endraw -%}")
57
+ Liquid::Template.parse("{% raw %}body{%-endraw %}")
58
+ Liquid::Template.parse("{% raw %}body{%- endraw %}")
59
+ Liquid::Template.parse("{% raw %}body{%-endraw-%}")
60
+ Liquid::Template.parse("{% raw %}body{%- endraw -%}")
61
+ Liquid::Template.parse("{% raw %}body{% endraw\u00A0%}")
62
+ Liquid::Template.parse("{% raw %}body{% endraw \u00A0%}")
63
+ Liquid::Template.parse("{% raw %}body{% endraw\u00A0 %}")
64
+ Liquid::Template.parse("{% raw %}body{% endraw \u00A0 %}")
65
+ Liquid::Template.parse("{% raw %}body{% endraw \u00A0 endraw %}")
66
+ Liquid::Template.parse("{% raw %}body{% endraw\u00A0endraw %}")
67
+
68
+ [
69
+ "{%\u00A0endraw%}",
70
+ "{%\u00A0 endraw%}",
71
+ "{% \u00A0endraw%}",
72
+ "{% \u00A0 endraw%}",
73
+ "{%\u00A0endraw\u00A0%}",
74
+ "{% - endraw %}",
75
+ "{% endnot endraw %}",
76
+ ].each do |bad_delimiter|
77
+ exception = assert_raises(
78
+ Liquid::SyntaxError,
79
+ "#{bad_delimiter.inspect} did not raise Liquid::SyntaxError"
80
+ ) do
81
+ Liquid::Template.parse(
82
+ "{% raw %}body#{bad_delimiter}"
83
+ )
84
+ end
85
+
86
+ assert_equal(
87
+ exception.message,
88
+ "Liquid syntax error: 'raw' tag was never closed",
89
+ "#{bad_delimiter.inspect} raised the wrong exception message",
90
+ )
91
+ end
18
92
  end
19
93
  end
@@ -6,7 +6,7 @@ require "test_helper"
6
6
  class TokenizerTest < Minitest::Test
7
7
  def test_tokenizer_nil
8
8
  tokenizer = new_tokenizer(nil)
9
- assert_nil(tokenizer.send(:shift))
9
+ assert_nil(tokenizer.shift)
10
10
  end
11
11
 
12
12
  def test_tokenize_strings
@@ -60,10 +60,10 @@ class TokenizerTest < Minitest::Test
60
60
  def test_utf8_compatible_source
61
61
  source = String.new("ascii", encoding: Encoding::ASCII)
62
62
  tokenizer = new_tokenizer(source)
63
- output = tokenizer.send(:shift)
63
+ output = tokenizer.shift
64
64
  assert_equal(Encoding::UTF_8, output.encoding)
65
65
  assert_equal(source, output)
66
- assert_nil(tokenizer.send(:shift))
66
+ assert_nil(tokenizer.shift)
67
67
  end
68
68
 
69
69
  def test_non_utf8_compatible_source
@@ -105,7 +105,7 @@ class TokenizerTest < Minitest::Test
105
105
  def tokenize(source, for_liquid_tag: false, trimmed: false)
106
106
  tokenizer = Liquid::C::Tokenizer.new(source, 1, for_liquid_tag)
107
107
  tokens = []
108
- while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.send(:shift))
108
+ while (t = trimmed ? tokenizer.send(:shift_trimmed) : tokenizer.shift)
109
109
  tokens << t
110
110
  end
111
111
  tokens
@@ -254,6 +254,73 @@ class VariableTest < Minitest::Test
254
254
  assert_equal("2", output)
255
255
  end
256
256
 
257
+ def test_encoding_error_message_with_multi_byte_characters
258
+ # 2 byte character
259
+ exc = assert_raises(Liquid::SyntaxError) do
260
+ variable_strict_parse("\u00A0")
261
+ end
262
+ assert_equal(
263
+ "Liquid syntax error: Unexpected character \u00A0 in \"{{\u00a0}}\"",
264
+ exc.message
265
+ )
266
+
267
+ # 3 byte character
268
+ exc = assert_raises(Liquid::SyntaxError) do
269
+ variable_strict_parse("\u3042")
270
+ end
271
+ assert_equal(
272
+ "Liquid syntax error: Unexpected character \u3042 in \"{{\u3042}}\"",
273
+ exc.message
274
+ )
275
+
276
+ # 4 byte character
277
+ exc = assert_raises(Liquid::SyntaxError) do
278
+ variable_strict_parse("\u{1F600}")
279
+ end
280
+ assert_equal(
281
+ "Liquid syntax error: Unexpected character \u{1F600} in \"{{\u{1F600}}}\"",
282
+ exc.message
283
+ )
284
+ end
285
+
286
+ def test_invalid_utf8_sequence
287
+ # 2 byte character with 1 byte missing
288
+ exc = assert_raises(ArgumentError) do
289
+ variable_strict_parse("\xC0")
290
+ end
291
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
292
+
293
+ # 3 byte character with 1 byte missing
294
+ exc = assert_raises(ArgumentError) do
295
+ variable_strict_parse("\xE0\x01")
296
+ end
297
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
298
+
299
+ # 3 byte character with 2 byte missing
300
+ exc = assert_raises(ArgumentError) do
301
+ variable_strict_parse("\xE0")
302
+ end
303
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
304
+
305
+ # 4 byte character with 1 byte missing
306
+ exc = assert_raises(ArgumentError) do
307
+ variable_strict_parse("\xF0\x01\x01")
308
+ end
309
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
310
+
311
+ # 4 byte character with 2 byte missing
312
+ exc = assert_raises(ArgumentError) do
313
+ variable_strict_parse("\xF0\x01")
314
+ end
315
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
316
+
317
+ # 4 byte character with 3 byte missing
318
+ exc = assert_raises(ArgumentError) do
319
+ variable_strict_parse("\xF0")
320
+ end
321
+ assert_equal("invalid byte sequence in UTF-8", exc.message)
322
+ end
323
+
257
324
  private
258
325
 
259
326
  def variable_strict_parse(markup)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liquid-c
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.1.0
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Li
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-08 00:00:00.000000000 Z
12
+ date: 2024-01-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: liquid
@@ -103,6 +103,7 @@ extensions:
103
103
  - ext/liquid_c/extconf.rb
104
104
  extra_rdoc_files: []
105
105
  files:
106
+ - ".github/workflows/cla.yml"
106
107
  - ".github/workflows/liquid.yml"
107
108
  - ".gitignore"
108
109
  - ".rubocop.yml"
@@ -126,6 +127,8 @@ files:
126
127
  - ext/liquid_c/lexer.h
127
128
  - ext/liquid_c/liquid.c
128
129
  - ext/liquid_c/liquid.h
130
+ - ext/liquid_c/liquid_vm.c
131
+ - ext/liquid_c/liquid_vm.h
129
132
  - ext/liquid_c/parse_context.c
130
133
  - ext/liquid_c/parse_context.h
131
134
  - ext/liquid_c/parser.c
@@ -143,8 +146,6 @@ files:
143
146
  - ext/liquid_c/variable.h
144
147
  - ext/liquid_c/variable_lookup.c
145
148
  - ext/liquid_c/variable_lookup.h
146
- - ext/liquid_c/vm.c
147
- - ext/liquid_c/vm.h
148
149
  - ext/liquid_c/vm_assembler.c
149
150
  - ext/liquid_c/vm_assembler.h
150
151
  - ext/liquid_c/vm_assembler_pool.c
@@ -191,7 +192,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
192
  - !ruby/object:Gem::Version
192
193
  version: '0'
193
194
  requirements: []
194
- rubygems_version: 3.2.20
195
+ rubygems_version: 3.5.4
195
196
  signing_key:
196
197
  specification_version: 4
197
198
  summary: Liquid performance extension in C
File without changes