liquid-c 4.0.1 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/cla.yml +23 -0
  3. data/.github/workflows/liquid.yml +36 -11
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +14 -0
  6. data/Gemfile +15 -5
  7. data/README.md +32 -8
  8. data/Rakefile +12 -63
  9. data/ext/liquid_c/block.c +493 -60
  10. data/ext/liquid_c/block.h +28 -2
  11. data/ext/liquid_c/c_buffer.c +42 -0
  12. data/ext/liquid_c/c_buffer.h +76 -0
  13. data/ext/liquid_c/context.c +233 -0
  14. data/ext/liquid_c/context.h +70 -0
  15. data/ext/liquid_c/document_body.c +97 -0
  16. data/ext/liquid_c/document_body.h +59 -0
  17. data/ext/liquid_c/expression.c +116 -0
  18. data/ext/liquid_c/expression.h +24 -0
  19. data/ext/liquid_c/extconf.rb +21 -9
  20. data/ext/liquid_c/intutil.h +22 -0
  21. data/ext/liquid_c/lexer.c +39 -3
  22. data/ext/liquid_c/lexer.h +18 -3
  23. data/ext/liquid_c/liquid.c +76 -6
  24. data/ext/liquid_c/liquid.h +24 -1
  25. data/ext/liquid_c/liquid_vm.c +618 -0
  26. data/ext/liquid_c/liquid_vm.h +25 -0
  27. data/ext/liquid_c/parse_context.c +76 -0
  28. data/ext/liquid_c/parse_context.h +13 -0
  29. data/ext/liquid_c/parser.c +153 -65
  30. data/ext/liquid_c/parser.h +4 -2
  31. data/ext/liquid_c/raw.c +136 -0
  32. data/ext/liquid_c/raw.h +6 -0
  33. data/ext/liquid_c/resource_limits.c +279 -0
  34. data/ext/liquid_c/resource_limits.h +23 -0
  35. data/ext/liquid_c/stringutil.h +44 -0
  36. data/ext/liquid_c/tokenizer.c +149 -35
  37. data/ext/liquid_c/tokenizer.h +20 -9
  38. data/ext/liquid_c/usage.c +18 -0
  39. data/ext/liquid_c/usage.h +9 -0
  40. data/ext/liquid_c/variable.c +196 -20
  41. data/ext/liquid_c/variable.h +18 -1
  42. data/ext/liquid_c/variable_lookup.c +44 -0
  43. data/ext/liquid_c/variable_lookup.h +8 -0
  44. data/ext/liquid_c/vm_assembler.c +491 -0
  45. data/ext/liquid_c/vm_assembler.h +240 -0
  46. data/ext/liquid_c/vm_assembler_pool.c +99 -0
  47. data/ext/liquid_c/vm_assembler_pool.h +26 -0
  48. data/lib/liquid/c/compile_ext.rb +44 -0
  49. data/lib/liquid/c/version.rb +3 -1
  50. data/lib/liquid/c.rb +226 -48
  51. data/liquid-c.gemspec +16 -10
  52. data/performance/c_profile.rb +23 -0
  53. data/performance.rb +6 -4
  54. data/rakelib/compile.rake +15 -0
  55. data/rakelib/integration_test.rake +43 -0
  56. data/rakelib/performance.rake +43 -0
  57. data/rakelib/rubocop.rake +6 -0
  58. data/rakelib/unit_test.rake +14 -0
  59. data/test/integration_test.rb +11 -0
  60. data/test/liquid_test_helper.rb +21 -0
  61. data/test/test_helper.rb +21 -2
  62. data/test/unit/block_test.rb +137 -0
  63. data/test/unit/context_test.rb +85 -0
  64. data/test/unit/expression_test.rb +191 -0
  65. data/test/unit/gc_stress_test.rb +28 -0
  66. data/test/unit/raw_test.rb +93 -0
  67. data/test/unit/resource_limits_test.rb +50 -0
  68. data/test/unit/tokenizer_test.rb +90 -20
  69. data/test/unit/variable_test.rb +279 -60
  70. metadata +60 -11
  71. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,279 @@
1
+ #include "liquid.h"
2
+ #include "resource_limits.h"
3
+
4
+ VALUE cLiquidResourceLimits;
5
+
6
+ static void resource_limits_free(void *ptr)
7
+ {
8
+ resource_limits_t *resource_limits = ptr;
9
+ xfree(resource_limits);
10
+ }
11
+
12
+ static size_t resource_limits_memsize(const void *ptr)
13
+ {
14
+ return sizeof(resource_limits_t);
15
+ }
16
+
17
+ const rb_data_type_t resource_limits_data_type = {
18
+ "liquid_resource_limits",
19
+ { NULL, resource_limits_free, resource_limits_memsize },
20
+ NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
21
+ };
22
+
23
+ static void resource_limits_reset(resource_limits_t *resource_limit)
24
+ {
25
+ resource_limit->reached_limit = true;
26
+ resource_limit->last_capture_length = -1;
27
+ resource_limit->render_score = 0;
28
+ resource_limit->assign_score = 0;
29
+ }
30
+
31
+ static VALUE resource_limits_allocate(VALUE klass)
32
+ {
33
+ resource_limits_t *resource_limits;
34
+
35
+ VALUE obj = TypedData_Make_Struct(klass, resource_limits_t, &resource_limits_data_type, resource_limits);
36
+
37
+ resource_limits_reset(resource_limits);
38
+
39
+ return obj;
40
+ }
41
+
42
+ static VALUE resource_limits_render_length_limit_method(VALUE self)
43
+ {
44
+ resource_limits_t *resource_limits;
45
+ ResourceLimits_Get_Struct(self, resource_limits);
46
+
47
+ return LONG2NUM(resource_limits->render_length_limit);
48
+ }
49
+
50
+ static VALUE resource_limits_set_render_length_limit_method(VALUE self, VALUE render_length_limit)
51
+ {
52
+ resource_limits_t *resource_limits;
53
+ ResourceLimits_Get_Struct(self, resource_limits);
54
+
55
+ if (render_length_limit == Qnil) {
56
+ resource_limits->render_length_limit = LONG_MAX;
57
+ } else {
58
+ resource_limits->render_length_limit = NUM2LONG(render_length_limit);
59
+ }
60
+
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE resource_limits_render_score_limit_method(VALUE self)
65
+ {
66
+ resource_limits_t *resource_limits;
67
+ ResourceLimits_Get_Struct(self, resource_limits);
68
+
69
+ return LONG2NUM(resource_limits->render_score_limit);
70
+ }
71
+
72
+ static VALUE resource_limits_set_render_score_limit_method(VALUE self, VALUE render_score_limit)
73
+ {
74
+ resource_limits_t *resource_limits;
75
+ ResourceLimits_Get_Struct(self, resource_limits);
76
+
77
+ if (render_score_limit == Qnil) {
78
+ resource_limits->render_score_limit = LONG_MAX;
79
+ } else {
80
+ resource_limits->render_score_limit = NUM2LONG(render_score_limit);
81
+ }
82
+
83
+ return Qnil;
84
+ }
85
+
86
+ static VALUE resource_limits_assign_score_limit_method(VALUE self)
87
+ {
88
+ resource_limits_t *resource_limits;
89
+ ResourceLimits_Get_Struct(self, resource_limits);
90
+
91
+ return LONG2NUM(resource_limits->assign_score_limit);
92
+ }
93
+
94
+ static VALUE resource_limits_set_assign_score_limit_method(VALUE self, VALUE assign_score_limit)
95
+ {
96
+ resource_limits_t *resource_limits;
97
+ ResourceLimits_Get_Struct(self, resource_limits);
98
+
99
+ if (assign_score_limit == Qnil) {
100
+ resource_limits->assign_score_limit = LONG_MAX;
101
+ } else {
102
+ resource_limits->assign_score_limit = NUM2LONG(assign_score_limit);
103
+ }
104
+
105
+ return Qnil;
106
+ }
107
+
108
+ static VALUE resource_limits_render_score_method(VALUE self)
109
+ {
110
+ resource_limits_t *resource_limits;
111
+ ResourceLimits_Get_Struct(self, resource_limits);
112
+
113
+ return LONG2NUM(resource_limits->render_score);
114
+ }
115
+
116
+ static VALUE resource_limits_assign_score_method(VALUE self)
117
+ {
118
+ resource_limits_t *resource_limits;
119
+ ResourceLimits_Get_Struct(self, resource_limits);
120
+
121
+ return LONG2NUM(resource_limits->assign_score);
122
+ }
123
+
124
+ static VALUE resource_limits_initialize_method(VALUE self, VALUE render_length_limit,
125
+ VALUE render_score_limit, VALUE assign_score_limit)
126
+ {
127
+ resource_limits_set_render_length_limit_method(self, render_length_limit);
128
+ resource_limits_set_render_score_limit_method(self, render_score_limit);
129
+ resource_limits_set_assign_score_limit_method(self, assign_score_limit);
130
+
131
+ return Qnil;
132
+ }
133
+
134
+ __attribute__((noreturn))
135
+ void resource_limits_raise_limits_reached(resource_limits_t *resource_limit)
136
+ {
137
+ resource_limit->reached_limit = true;
138
+ rb_raise(cMemoryError, "Memory limits exceeded");
139
+ }
140
+
141
+ void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount)
142
+ {
143
+ resource_limits->render_score = resource_limits->render_score + amount;
144
+
145
+ if (resource_limits->render_score > resource_limits->render_score_limit) {
146
+ resource_limits_raise_limits_reached(resource_limits);
147
+ }
148
+ }
149
+
150
+ static VALUE resource_limits_increment_render_score_method(VALUE self, VALUE amount)
151
+ {
152
+ resource_limits_t *resource_limits;
153
+ ResourceLimits_Get_Struct(self, resource_limits);
154
+
155
+ resource_limits_increment_render_score(resource_limits, NUM2LONG(amount));
156
+
157
+ return Qnil;
158
+ }
159
+
160
+ static void resource_limits_increment_assign_score(resource_limits_t *resource_limits, long amount)
161
+ {
162
+ resource_limits->assign_score = resource_limits->assign_score + amount;
163
+
164
+ if (resource_limits->assign_score > resource_limits->assign_score_limit) {
165
+ resource_limits_raise_limits_reached(resource_limits);
166
+ }
167
+ }
168
+
169
+ static VALUE resource_limits_increment_assign_score_method(VALUE self, VALUE amount)
170
+ {
171
+ resource_limits_t *resource_limits;
172
+ ResourceLimits_Get_Struct(self, resource_limits);
173
+
174
+ resource_limits_increment_assign_score(resource_limits, NUM2LONG(amount));
175
+
176
+ return Qnil;
177
+ }
178
+
179
+ void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output)
180
+ {
181
+ long captured = RSTRING_LEN(output);
182
+
183
+ if (resource_limits->last_capture_length >= 0) {
184
+ long increment = captured - resource_limits->last_capture_length;
185
+ resource_limits->last_capture_length = captured;
186
+ resource_limits_increment_assign_score(resource_limits, increment);
187
+ } else if (captured > resource_limits->render_length_limit) {
188
+ resource_limits_raise_limits_reached(resource_limits);
189
+ }
190
+ }
191
+
192
+ static VALUE resource_limits_increment_write_score_method(VALUE self, VALUE output)
193
+ {
194
+ Check_Type(output, T_STRING);
195
+
196
+ resource_limits_t *resource_limits;
197
+ ResourceLimits_Get_Struct(self, resource_limits);
198
+
199
+ resource_limits_increment_write_score(resource_limits, output);
200
+
201
+ return Qnil;
202
+ }
203
+
204
+ static VALUE resource_limits_raise_limits_reached_method(VALUE self)
205
+ {
206
+ resource_limits_t *resource_limits;
207
+ ResourceLimits_Get_Struct(self, resource_limits);
208
+
209
+ resource_limits_raise_limits_reached(resource_limits);
210
+ }
211
+
212
+ static VALUE resource_limits_reached_method(VALUE self)
213
+ {
214
+ resource_limits_t *resource_limits;
215
+ ResourceLimits_Get_Struct(self, resource_limits);
216
+
217
+ return resource_limits->reached_limit ? Qtrue : Qfalse;
218
+ }
219
+
220
+ struct capture_ensure_t {
221
+ resource_limits_t *resource_limits;
222
+ long old_capture_length;
223
+ };
224
+
225
+ static VALUE capture_ensure(VALUE data)
226
+ {
227
+ struct capture_ensure_t *ensure_data = (struct capture_ensure_t *)data;
228
+ ensure_data->resource_limits->last_capture_length = ensure_data->old_capture_length;
229
+
230
+ return Qnil;
231
+ }
232
+
233
+ static VALUE resource_limits_with_capture_method(VALUE self)
234
+ {
235
+ resource_limits_t *resource_limits;
236
+ ResourceLimits_Get_Struct(self, resource_limits);
237
+
238
+ struct capture_ensure_t ensure_data = {
239
+ .resource_limits = resource_limits,
240
+ .old_capture_length = resource_limits->last_capture_length
241
+ };
242
+
243
+ resource_limits->last_capture_length = 0;
244
+
245
+ return rb_ensure(rb_yield, Qundef, capture_ensure, (VALUE)&ensure_data);
246
+ }
247
+
248
+
249
+ static VALUE resource_limits_reset_method(VALUE self)
250
+ {
251
+ resource_limits_t *resource_limits;
252
+ ResourceLimits_Get_Struct(self, resource_limits);
253
+ resource_limits_reset(resource_limits);
254
+ return Qnil;
255
+ }
256
+
257
+ void liquid_define_resource_limits(void)
258
+ {
259
+ cLiquidResourceLimits = rb_define_class_under(mLiquidC, "ResourceLimits", rb_cObject);
260
+ rb_global_variable(&cLiquidResourceLimits);
261
+
262
+ rb_define_alloc_func(cLiquidResourceLimits, resource_limits_allocate);
263
+ rb_define_method(cLiquidResourceLimits, "initialize", resource_limits_initialize_method, 3);
264
+ rb_define_method(cLiquidResourceLimits, "render_length_limit", resource_limits_render_length_limit_method, 0);
265
+ rb_define_method(cLiquidResourceLimits, "render_length_limit=", resource_limits_set_render_length_limit_method, 1);
266
+ rb_define_method(cLiquidResourceLimits, "render_score_limit", resource_limits_render_score_limit_method, 0);
267
+ rb_define_method(cLiquidResourceLimits, "render_score_limit=", resource_limits_set_render_score_limit_method, 1);
268
+ rb_define_method(cLiquidResourceLimits, "assign_score_limit", resource_limits_assign_score_limit_method, 0);
269
+ rb_define_method(cLiquidResourceLimits, "assign_score_limit=", resource_limits_set_assign_score_limit_method, 1);
270
+ rb_define_method(cLiquidResourceLimits, "render_score", resource_limits_render_score_method, 0);
271
+ rb_define_method(cLiquidResourceLimits, "assign_score", resource_limits_assign_score_method, 0);
272
+ rb_define_method(cLiquidResourceLimits, "increment_render_score", resource_limits_increment_render_score_method, 1);
273
+ rb_define_method(cLiquidResourceLimits, "increment_assign_score", resource_limits_increment_assign_score_method, 1);
274
+ rb_define_method(cLiquidResourceLimits, "increment_write_score", resource_limits_increment_write_score_method, 1);
275
+ rb_define_method(cLiquidResourceLimits, "raise_limits_reached", resource_limits_raise_limits_reached_method, 0);
276
+ rb_define_method(cLiquidResourceLimits, "reached?", resource_limits_reached_method, 0);
277
+ rb_define_method(cLiquidResourceLimits, "reset", resource_limits_reset_method, 0);
278
+ rb_define_method(cLiquidResourceLimits, "with_capture", resource_limits_with_capture_method, 0);
279
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef LIQUID_RESOURCE_LIMITS
2
+ #define LIQUID_RESOURCE_LIMITS
3
+
4
+ typedef struct resource_limits {
5
+ long render_length_limit;
6
+ long render_score_limit;
7
+ long assign_score_limit;
8
+ bool reached_limit;
9
+ long last_capture_length;
10
+ long render_score;
11
+ long assign_score;
12
+ } resource_limits_t;
13
+
14
+ extern VALUE cLiquidResourceLimits;
15
+ extern const rb_data_type_t resource_limits_data_type;
16
+ #define ResourceLimits_Get_Struct(obj, sval) TypedData_Get_Struct(obj, resource_limits_t, &resource_limits_data_type, sval)
17
+
18
+ void liquid_define_resource_limits(void);
19
+ void resource_limits_raise_limits_reached(resource_limits_t *resource_limit);
20
+ void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount);
21
+ void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output);
22
+
23
+ #endif
@@ -0,0 +1,44 @@
1
+ #if !defined(LIQUID_UTIL_H)
2
+ #define LIQUID_UTIL_H
3
+
4
+ inline static const char *read_while(const char *start, const char *end, int (func)(int))
5
+ {
6
+ while (start < end && func((unsigned char) *start)) start++;
7
+ return start;
8
+ }
9
+
10
+ inline static const char *read_while_reverse(const char *start, const char *end, int (func)(int))
11
+ {
12
+ end--;
13
+ while (start <= end && func((unsigned char) *end)) end--;
14
+ end++;
15
+ return end;
16
+ }
17
+
18
+ inline static int count_newlines(const char *start, const char *end)
19
+ {
20
+ int count = 0;
21
+ while (start < end) {
22
+ if (*start == '\n') count++;
23
+ start++;
24
+ }
25
+ return count;
26
+ }
27
+
28
+ inline static int is_non_newline_space(int c)
29
+ {
30
+ return rb_isspace(c) && c != '\n';
31
+ }
32
+
33
+ inline static int not_newline(int c)
34
+ {
35
+ return c != '\n';
36
+ }
37
+
38
+ inline static bool is_word_char(char c)
39
+ {
40
+ return ISALNUM(c) || c == '_';
41
+ }
42
+
43
+ #endif
44
+
@@ -1,5 +1,7 @@
1
+ #include <assert.h>
1
2
  #include "liquid.h"
2
3
  #include "tokenizer.h"
4
+ #include "stringutil.h"
3
5
 
4
6
  VALUE cLiquidTokenizer;
5
7
 
@@ -35,40 +37,89 @@ static VALUE tokenizer_allocate(VALUE klass)
35
37
 
36
38
  obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
37
39
  tokenizer->source = Qnil;
40
+ tokenizer->bug_compatible_whitespace_trimming = false;
41
+ tokenizer->raw_tag_body = NULL;
42
+ tokenizer->raw_tag_body_len = 0;
38
43
  return obj;
39
44
  }
40
45
 
41
- static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE line_numbers)
46
+ static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE start_line_number, VALUE for_liquid_tag)
42
47
  {
43
48
  tokenizer_t *tokenizer;
44
49
 
45
50
  Check_Type(source, T_STRING);
51
+ check_utf8_encoding(source, "source");
52
+
53
+ #define MAX_SOURCE_CODE_BYTES ((1 << 24) - 1)
54
+ if (RSTRING_LEN(source) > MAX_SOURCE_CODE_BYTES) {
55
+ rb_enc_raise(utf8_encoding, rb_eArgError, "Source too large, max %d bytes", MAX_SOURCE_CODE_BYTES);
56
+ }
57
+ #undef MAX_SOURCE_CODE_BYTES
58
+
46
59
  Tokenizer_Get_Struct(self, tokenizer);
47
60
  source = rb_str_dup_frozen(source);
48
61
  tokenizer->source = source;
49
62
  tokenizer->cursor = RSTRING_PTR(source);
50
- tokenizer->length = RSTRING_LEN(source);
51
- tokenizer->lstrip_flag = 0;
63
+ tokenizer->cursor_end = tokenizer->cursor + RSTRING_LEN(source);
64
+ tokenizer->lstrip_flag = false;
52
65
  // tokenizer->line_number keeps track of the current line number or it is 0
53
66
  // to indicate that line numbers aren't being calculated
54
- tokenizer->line_number = RTEST(line_numbers) ? 1 : 0;
67
+ tokenizer->line_number = FIX2UINT(start_line_number);
68
+ tokenizer->for_liquid_tag = RTEST(for_liquid_tag);
55
69
  return Qnil;
56
70
  }
57
71
 
58
- void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
72
+ // Internal function to setup an existing tokenizer from C for a liquid tag.
73
+ // This overwrites the passed in tokenizer, so a copy of the struct should
74
+ // be used to reset the tokenizer after parsing the liquid tag.
75
+ void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number)
59
76
  {
60
- if (tokenizer->length <= 0) {
61
- memset(token, 0, sizeof(*token));
62
- return;
77
+ tokenizer->cursor = cursor;
78
+ tokenizer->cursor_end = cursor_end;
79
+ tokenizer->lstrip_flag = false;
80
+ tokenizer->line_number = line_number;
81
+ tokenizer->for_liquid_tag = true;
82
+ }
83
+
84
+ // Tokenizes contents of {% liquid ... %}
85
+ static void tokenizer_next_for_liquid_tag(tokenizer_t *tokenizer, token_t *token)
86
+ {
87
+ const char *end = tokenizer->cursor_end;
88
+ const char *start = tokenizer->cursor;
89
+ const char *start_trimmed = read_while(start, end, is_non_newline_space);
90
+
91
+ token->str_full = start;
92
+ token->str_trimmed = start_trimmed;
93
+
94
+ const char *end_full = read_while(start_trimmed, end, not_newline);
95
+ if (end_full < end) {
96
+ tokenizer->cursor = end_full + 1;
97
+ if (tokenizer->line_number)
98
+ tokenizer->line_number++;
99
+ } else {
100
+ tokenizer->cursor = end_full;
63
101
  }
64
102
 
103
+ const char *end_trimmed = read_while_reverse(start_trimmed, end_full, rb_isspace);
104
+
105
+ token->len_trimmed = end_trimmed - start_trimmed;
106
+ token->len_full = end_full - token->str_full;
107
+
108
+ if (token->len_trimmed == 0) {
109
+ token->type = TOKEN_BLANK_LIQUID_TAG_LINE;
110
+ } else {
111
+ token->type = TOKEN_TAG;
112
+ }
113
+ }
114
+
115
+ // Tokenizes contents of a full Liquid template
116
+ static void tokenizer_next_for_template(tokenizer_t *tokenizer, token_t *token)
117
+ {
65
118
  const char *cursor = tokenizer->cursor;
66
- const char *last = cursor + tokenizer->length - 1;
119
+ const char *last = tokenizer->cursor_end - 1;
67
120
 
68
- token->str = cursor;
121
+ token->str_full = cursor;
69
122
  token->type = TOKEN_RAW;
70
- token->lstrip = 0;
71
- token->rstrip = 0;
72
123
 
73
124
  while (cursor < last) {
74
125
  if (*cursor++ != '{')
@@ -78,17 +129,17 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
78
129
  if (c != '%' && c != '{')
79
130
  continue;
80
131
  if (cursor <= last && *cursor == '-') {
81
- cursor++;
82
- token->rstrip = 1;
132
+ cursor++;
133
+ token->rstrip = 1;
83
134
  }
84
135
  if (cursor - tokenizer->cursor > (ptrdiff_t)(2 + token->rstrip)) {
85
136
  token->type = TOKEN_RAW;
86
137
  cursor -= 2 + token->rstrip;
87
138
  token->lstrip = tokenizer->lstrip_flag;
88
- tokenizer->lstrip_flag = 0;
139
+ tokenizer->lstrip_flag = false;
89
140
  goto found;
90
141
  }
91
- tokenizer->lstrip_flag = 0;
142
+ tokenizer->lstrip_flag = false;
92
143
  token->type = TOKEN_INVALID;
93
144
  token->lstrip = token->rstrip;
94
145
  token->rstrip = 0;
@@ -103,12 +154,12 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
103
154
  continue;
104
155
  token->type = TOKEN_TAG;
105
156
  if(cursor[-3] == '-')
106
- token->rstrip = tokenizer->lstrip_flag = 1;
157
+ token->rstrip = tokenizer->lstrip_flag = true;
107
158
  goto found;
108
159
  }
109
160
  // unterminated tag
110
161
  cursor = tokenizer->cursor + 2;
111
- tokenizer->lstrip_flag = 0;
162
+ tokenizer->lstrip_flag = false;
112
163
  goto found;
113
164
  } else {
114
165
  while (cursor < last) {
@@ -121,31 +172,51 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
121
172
  }
122
173
  token->type = TOKEN_VARIABLE;
123
174
  if(cursor[-3] == '-')
124
- token->rstrip = tokenizer->lstrip_flag = 1;
175
+ token->rstrip = tokenizer->lstrip_flag = true;
125
176
  goto found;
126
177
  }
127
178
  // unterminated variable
128
179
  cursor = tokenizer->cursor + 2;
129
- tokenizer->lstrip_flag = 0;
180
+ tokenizer->lstrip_flag = false;
130
181
  goto found;
131
182
  }
132
183
  }
133
184
  cursor = last + 1;
134
185
  token->lstrip = tokenizer->lstrip_flag;
135
- tokenizer->lstrip_flag = 0;
186
+ tokenizer->lstrip_flag = false;
136
187
  found:
137
- token->length = cursor - tokenizer->cursor;
138
- tokenizer->cursor += token->length;
139
- tokenizer->length -= token->length;
188
+ token->len_full = cursor - token->str_full;
189
+
190
+ token->str_trimmed = token->str_full;
191
+ token->len_trimmed = token->len_full;
192
+
193
+ if (token->type == TOKEN_VARIABLE || token->type == TOKEN_TAG) {
194
+ token->str_trimmed += 2 + token->lstrip;
195
+ token->len_trimmed -= 2 + token->lstrip + 2;
196
+ if (token->rstrip && token->len_trimmed)
197
+ token->len_trimmed--;
198
+ }
199
+
200
+ assert(token->len_trimmed >= 0);
201
+
202
+ tokenizer->cursor += token->len_full;
140
203
 
141
204
  if (tokenizer->line_number) {
142
- const char *cursor = token->str;
143
- const char *end = token->str + token->length;
144
- while (cursor < end) {
145
- if (*cursor == '\n')
146
- tokenizer->line_number++;
147
- cursor++;
148
- }
205
+ tokenizer->line_number += count_newlines(token->str_full, token->str_full + token->len_full);
206
+ }
207
+ }
208
+
209
+ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
210
+ {
211
+ memset(token, 0, sizeof(*token));
212
+
213
+ if (tokenizer->cursor >= tokenizer->cursor_end) {
214
+ return;
215
+ }
216
+ if (tokenizer->for_liquid_tag) {
217
+ tokenizer_next_for_liquid_tag(tokenizer, token);
218
+ } else {
219
+ tokenizer_next_for_template(tokenizer, token);
149
220
  }
150
221
  }
151
222
 
@@ -159,7 +230,25 @@ static VALUE tokenizer_shift_method(VALUE self)
159
230
  if (!token.type)
160
231
  return Qnil;
161
232
 
162
- return rb_enc_str_new(token.str, token.length, utf8_encoding);
233
+ // When sent back to Ruby, tokens are the raw string including whitespace
234
+ // and tag delimiters. It should be possible to reconstruct the exact
235
+ // template from the tokens.
236
+ return rb_enc_str_new(token.str_full, token.len_full, utf8_encoding);
237
+ }
238
+
239
+ static VALUE tokenizer_shift_trimmed_method(VALUE self)
240
+ {
241
+ tokenizer_t *tokenizer;
242
+ Tokenizer_Get_Struct(self, tokenizer);
243
+
244
+ token_t token;
245
+ tokenizer_next(tokenizer, &token);
246
+ if (!token.type)
247
+ return Qnil;
248
+
249
+ // This method doesn't include whitespace and tag delimiters. It allows for
250
+ // testing the output of tokenizer_next as used by rb_block_parse.
251
+ return rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
163
252
  }
164
253
 
165
254
  static VALUE tokenizer_line_number_method(VALUE self)
@@ -173,12 +262,37 @@ static VALUE tokenizer_line_number_method(VALUE self)
173
262
  return UINT2NUM(tokenizer->line_number);
174
263
  }
175
264
 
176
- void init_liquid_tokenizer()
265
+ static VALUE tokenizer_for_liquid_tag_method(VALUE self)
266
+ {
267
+ tokenizer_t *tokenizer;
268
+ Tokenizer_Get_Struct(self, tokenizer);
269
+
270
+ return tokenizer->for_liquid_tag ? Qtrue : Qfalse;
271
+ }
272
+
273
+
274
+ // Temporary to test rollout of the fix for this bug
275
+ static VALUE tokenizer_bug_compatible_whitespace_trimming(VALUE self) {
276
+ tokenizer_t *tokenizer;
277
+ Tokenizer_Get_Struct(self, tokenizer);
278
+
279
+ tokenizer->bug_compatible_whitespace_trimming = true;
280
+ return Qnil;
281
+ }
282
+
283
+ void liquid_define_tokenizer(void)
177
284
  {
178
285
  cLiquidTokenizer = rb_define_class_under(mLiquidC, "Tokenizer", rb_cObject);
286
+ rb_global_variable(&cLiquidTokenizer);
287
+
179
288
  rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
180
- rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 2);
181
- rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
289
+ rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
182
290
  rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
291
+ rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
292
+ rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);
293
+ rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
294
+
295
+ // For testing the internal token representation.
296
+ rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
183
297
  }
184
298
 
@@ -6,31 +6,42 @@ enum token_type {
6
6
  TOKEN_INVALID,
7
7
  TOKEN_RAW,
8
8
  TOKEN_TAG,
9
- TOKEN_VARIABLE
9
+ TOKEN_VARIABLE,
10
+ TOKEN_BLANK_LIQUID_TAG_LINE
10
11
  };
11
12
 
12
13
  typedef struct token {
13
14
  enum token_type type;
14
- const char *str;
15
- long length;
16
- unsigned int lstrip;
17
- unsigned int rstrip;
15
+
16
+ // str_trimmed contains no tag delimiters
17
+ const char *str_trimmed, *str_full;
18
+ long len_trimmed, len_full;
19
+
20
+ bool lstrip, rstrip;
18
21
  } token_t;
19
22
 
20
23
  typedef struct tokenizer {
21
24
  VALUE source;
22
- const char *cursor;
23
- long length;
25
+ const char *cursor, *cursor_end;
24
26
  unsigned int line_number;
25
- unsigned int lstrip_flag;
27
+ bool lstrip_flag;
28
+ bool for_liquid_tag;
29
+
30
+ // Temporary to test rollout of the fix for this bug
31
+ bool bug_compatible_whitespace_trimming;
32
+
33
+ char *raw_tag_body;
34
+ unsigned int raw_tag_body_len;
26
35
  } tokenizer_t;
27
36
 
28
37
  extern VALUE cLiquidTokenizer;
29
38
  extern const rb_data_type_t tokenizer_data_type;
30
39
  #define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
31
40
 
32
- void init_liquid_tokenizer();
41
+ void liquid_define_tokenizer(void);
33
42
  void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
34
43
 
44
+ void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number);
45
+
35
46
  #endif
36
47
 
@@ -0,0 +1,18 @@
1
+ #include "usage.h"
2
+
3
+ static VALUE cLiquidUsage;
4
+ static ID id_increment;
5
+
6
+ void usage_increment(const char *name)
7
+ {
8
+ VALUE name_str = rb_str_new_cstr(name);
9
+ rb_funcall(cLiquidUsage, id_increment, 1, name_str);
10
+ }
11
+
12
+ void liquid_define_usage(void)
13
+ {
14
+ cLiquidUsage = rb_const_get(mLiquid, rb_intern("Usage"));
15
+ rb_global_variable(&cLiquidUsage);
16
+
17
+ id_increment = rb_intern("increment");
18
+ }