liquid-c 4.0.1 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/cla.yml +23 -0
  3. data/.github/workflows/liquid.yml +36 -11
  4. data/.gitignore +4 -0
  5. data/.rubocop.yml +14 -0
  6. data/Gemfile +15 -5
  7. data/README.md +32 -8
  8. data/Rakefile +12 -63
  9. data/ext/liquid_c/block.c +493 -60
  10. data/ext/liquid_c/block.h +28 -2
  11. data/ext/liquid_c/c_buffer.c +42 -0
  12. data/ext/liquid_c/c_buffer.h +76 -0
  13. data/ext/liquid_c/context.c +233 -0
  14. data/ext/liquid_c/context.h +70 -0
  15. data/ext/liquid_c/document_body.c +97 -0
  16. data/ext/liquid_c/document_body.h +59 -0
  17. data/ext/liquid_c/expression.c +116 -0
  18. data/ext/liquid_c/expression.h +24 -0
  19. data/ext/liquid_c/extconf.rb +21 -9
  20. data/ext/liquid_c/intutil.h +22 -0
  21. data/ext/liquid_c/lexer.c +39 -3
  22. data/ext/liquid_c/lexer.h +18 -3
  23. data/ext/liquid_c/liquid.c +76 -6
  24. data/ext/liquid_c/liquid.h +24 -1
  25. data/ext/liquid_c/liquid_vm.c +618 -0
  26. data/ext/liquid_c/liquid_vm.h +25 -0
  27. data/ext/liquid_c/parse_context.c +76 -0
  28. data/ext/liquid_c/parse_context.h +13 -0
  29. data/ext/liquid_c/parser.c +153 -65
  30. data/ext/liquid_c/parser.h +4 -2
  31. data/ext/liquid_c/raw.c +136 -0
  32. data/ext/liquid_c/raw.h +6 -0
  33. data/ext/liquid_c/resource_limits.c +279 -0
  34. data/ext/liquid_c/resource_limits.h +23 -0
  35. data/ext/liquid_c/stringutil.h +44 -0
  36. data/ext/liquid_c/tokenizer.c +149 -35
  37. data/ext/liquid_c/tokenizer.h +20 -9
  38. data/ext/liquid_c/usage.c +18 -0
  39. data/ext/liquid_c/usage.h +9 -0
  40. data/ext/liquid_c/variable.c +196 -20
  41. data/ext/liquid_c/variable.h +18 -1
  42. data/ext/liquid_c/variable_lookup.c +44 -0
  43. data/ext/liquid_c/variable_lookup.h +8 -0
  44. data/ext/liquid_c/vm_assembler.c +491 -0
  45. data/ext/liquid_c/vm_assembler.h +240 -0
  46. data/ext/liquid_c/vm_assembler_pool.c +99 -0
  47. data/ext/liquid_c/vm_assembler_pool.h +26 -0
  48. data/lib/liquid/c/compile_ext.rb +44 -0
  49. data/lib/liquid/c/version.rb +3 -1
  50. data/lib/liquid/c.rb +226 -48
  51. data/liquid-c.gemspec +16 -10
  52. data/performance/c_profile.rb +23 -0
  53. data/performance.rb +6 -4
  54. data/rakelib/compile.rake +15 -0
  55. data/rakelib/integration_test.rake +43 -0
  56. data/rakelib/performance.rake +43 -0
  57. data/rakelib/rubocop.rake +6 -0
  58. data/rakelib/unit_test.rake +14 -0
  59. data/test/integration_test.rb +11 -0
  60. data/test/liquid_test_helper.rb +21 -0
  61. data/test/test_helper.rb +21 -2
  62. data/test/unit/block_test.rb +137 -0
  63. data/test/unit/context_test.rb +85 -0
  64. data/test/unit/expression_test.rb +191 -0
  65. data/test/unit/gc_stress_test.rb +28 -0
  66. data/test/unit/raw_test.rb +93 -0
  67. data/test/unit/resource_limits_test.rb +50 -0
  68. data/test/unit/tokenizer_test.rb +90 -20
  69. data/test/unit/variable_test.rb +279 -60
  70. metadata +60 -11
  71. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,279 @@
1
+ #include "liquid.h"
2
+ #include "resource_limits.h"
3
+
4
+ VALUE cLiquidResourceLimits;
5
+
6
+ static void resource_limits_free(void *ptr)
7
+ {
8
+ resource_limits_t *resource_limits = ptr;
9
+ xfree(resource_limits);
10
+ }
11
+
12
+ static size_t resource_limits_memsize(const void *ptr)
13
+ {
14
+ return sizeof(resource_limits_t);
15
+ }
16
+
17
+ const rb_data_type_t resource_limits_data_type = {
18
+ "liquid_resource_limits",
19
+ { NULL, resource_limits_free, resource_limits_memsize },
20
+ NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
21
+ };
22
+
23
+ static void resource_limits_reset(resource_limits_t *resource_limit)
24
+ {
25
+ resource_limit->reached_limit = true;
26
+ resource_limit->last_capture_length = -1;
27
+ resource_limit->render_score = 0;
28
+ resource_limit->assign_score = 0;
29
+ }
30
+
31
+ static VALUE resource_limits_allocate(VALUE klass)
32
+ {
33
+ resource_limits_t *resource_limits;
34
+
35
+ VALUE obj = TypedData_Make_Struct(klass, resource_limits_t, &resource_limits_data_type, resource_limits);
36
+
37
+ resource_limits_reset(resource_limits);
38
+
39
+ return obj;
40
+ }
41
+
42
+ static VALUE resource_limits_render_length_limit_method(VALUE self)
43
+ {
44
+ resource_limits_t *resource_limits;
45
+ ResourceLimits_Get_Struct(self, resource_limits);
46
+
47
+ return LONG2NUM(resource_limits->render_length_limit);
48
+ }
49
+
50
+ static VALUE resource_limits_set_render_length_limit_method(VALUE self, VALUE render_length_limit)
51
+ {
52
+ resource_limits_t *resource_limits;
53
+ ResourceLimits_Get_Struct(self, resource_limits);
54
+
55
+ if (render_length_limit == Qnil) {
56
+ resource_limits->render_length_limit = LONG_MAX;
57
+ } else {
58
+ resource_limits->render_length_limit = NUM2LONG(render_length_limit);
59
+ }
60
+
61
+ return Qnil;
62
+ }
63
+
64
+ static VALUE resource_limits_render_score_limit_method(VALUE self)
65
+ {
66
+ resource_limits_t *resource_limits;
67
+ ResourceLimits_Get_Struct(self, resource_limits);
68
+
69
+ return LONG2NUM(resource_limits->render_score_limit);
70
+ }
71
+
72
+ static VALUE resource_limits_set_render_score_limit_method(VALUE self, VALUE render_score_limit)
73
+ {
74
+ resource_limits_t *resource_limits;
75
+ ResourceLimits_Get_Struct(self, resource_limits);
76
+
77
+ if (render_score_limit == Qnil) {
78
+ resource_limits->render_score_limit = LONG_MAX;
79
+ } else {
80
+ resource_limits->render_score_limit = NUM2LONG(render_score_limit);
81
+ }
82
+
83
+ return Qnil;
84
+ }
85
+
86
+ static VALUE resource_limits_assign_score_limit_method(VALUE self)
87
+ {
88
+ resource_limits_t *resource_limits;
89
+ ResourceLimits_Get_Struct(self, resource_limits);
90
+
91
+ return LONG2NUM(resource_limits->assign_score_limit);
92
+ }
93
+
94
+ static VALUE resource_limits_set_assign_score_limit_method(VALUE self, VALUE assign_score_limit)
95
+ {
96
+ resource_limits_t *resource_limits;
97
+ ResourceLimits_Get_Struct(self, resource_limits);
98
+
99
+ if (assign_score_limit == Qnil) {
100
+ resource_limits->assign_score_limit = LONG_MAX;
101
+ } else {
102
+ resource_limits->assign_score_limit = NUM2LONG(assign_score_limit);
103
+ }
104
+
105
+ return Qnil;
106
+ }
107
+
108
+ static VALUE resource_limits_render_score_method(VALUE self)
109
+ {
110
+ resource_limits_t *resource_limits;
111
+ ResourceLimits_Get_Struct(self, resource_limits);
112
+
113
+ return LONG2NUM(resource_limits->render_score);
114
+ }
115
+
116
+ static VALUE resource_limits_assign_score_method(VALUE self)
117
+ {
118
+ resource_limits_t *resource_limits;
119
+ ResourceLimits_Get_Struct(self, resource_limits);
120
+
121
+ return LONG2NUM(resource_limits->assign_score);
122
+ }
123
+
124
+ static VALUE resource_limits_initialize_method(VALUE self, VALUE render_length_limit,
125
+ VALUE render_score_limit, VALUE assign_score_limit)
126
+ {
127
+ resource_limits_set_render_length_limit_method(self, render_length_limit);
128
+ resource_limits_set_render_score_limit_method(self, render_score_limit);
129
+ resource_limits_set_assign_score_limit_method(self, assign_score_limit);
130
+
131
+ return Qnil;
132
+ }
133
+
134
+ __attribute__((noreturn))
135
+ void resource_limits_raise_limits_reached(resource_limits_t *resource_limit)
136
+ {
137
+ resource_limit->reached_limit = true;
138
+ rb_raise(cMemoryError, "Memory limits exceeded");
139
+ }
140
+
141
+ void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount)
142
+ {
143
+ resource_limits->render_score = resource_limits->render_score + amount;
144
+
145
+ if (resource_limits->render_score > resource_limits->render_score_limit) {
146
+ resource_limits_raise_limits_reached(resource_limits);
147
+ }
148
+ }
149
+
150
+ static VALUE resource_limits_increment_render_score_method(VALUE self, VALUE amount)
151
+ {
152
+ resource_limits_t *resource_limits;
153
+ ResourceLimits_Get_Struct(self, resource_limits);
154
+
155
+ resource_limits_increment_render_score(resource_limits, NUM2LONG(amount));
156
+
157
+ return Qnil;
158
+ }
159
+
160
+ static void resource_limits_increment_assign_score(resource_limits_t *resource_limits, long amount)
161
+ {
162
+ resource_limits->assign_score = resource_limits->assign_score + amount;
163
+
164
+ if (resource_limits->assign_score > resource_limits->assign_score_limit) {
165
+ resource_limits_raise_limits_reached(resource_limits);
166
+ }
167
+ }
168
+
169
+ static VALUE resource_limits_increment_assign_score_method(VALUE self, VALUE amount)
170
+ {
171
+ resource_limits_t *resource_limits;
172
+ ResourceLimits_Get_Struct(self, resource_limits);
173
+
174
+ resource_limits_increment_assign_score(resource_limits, NUM2LONG(amount));
175
+
176
+ return Qnil;
177
+ }
178
+
179
+ void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output)
180
+ {
181
+ long captured = RSTRING_LEN(output);
182
+
183
+ if (resource_limits->last_capture_length >= 0) {
184
+ long increment = captured - resource_limits->last_capture_length;
185
+ resource_limits->last_capture_length = captured;
186
+ resource_limits_increment_assign_score(resource_limits, increment);
187
+ } else if (captured > resource_limits->render_length_limit) {
188
+ resource_limits_raise_limits_reached(resource_limits);
189
+ }
190
+ }
191
+
192
+ static VALUE resource_limits_increment_write_score_method(VALUE self, VALUE output)
193
+ {
194
+ Check_Type(output, T_STRING);
195
+
196
+ resource_limits_t *resource_limits;
197
+ ResourceLimits_Get_Struct(self, resource_limits);
198
+
199
+ resource_limits_increment_write_score(resource_limits, output);
200
+
201
+ return Qnil;
202
+ }
203
+
204
+ static VALUE resource_limits_raise_limits_reached_method(VALUE self)
205
+ {
206
+ resource_limits_t *resource_limits;
207
+ ResourceLimits_Get_Struct(self, resource_limits);
208
+
209
+ resource_limits_raise_limits_reached(resource_limits);
210
+ }
211
+
212
+ static VALUE resource_limits_reached_method(VALUE self)
213
+ {
214
+ resource_limits_t *resource_limits;
215
+ ResourceLimits_Get_Struct(self, resource_limits);
216
+
217
+ return resource_limits->reached_limit ? Qtrue : Qfalse;
218
+ }
219
+
220
+ struct capture_ensure_t {
221
+ resource_limits_t *resource_limits;
222
+ long old_capture_length;
223
+ };
224
+
225
+ static VALUE capture_ensure(VALUE data)
226
+ {
227
+ struct capture_ensure_t *ensure_data = (struct capture_ensure_t *)data;
228
+ ensure_data->resource_limits->last_capture_length = ensure_data->old_capture_length;
229
+
230
+ return Qnil;
231
+ }
232
+
233
+ static VALUE resource_limits_with_capture_method(VALUE self)
234
+ {
235
+ resource_limits_t *resource_limits;
236
+ ResourceLimits_Get_Struct(self, resource_limits);
237
+
238
+ struct capture_ensure_t ensure_data = {
239
+ .resource_limits = resource_limits,
240
+ .old_capture_length = resource_limits->last_capture_length
241
+ };
242
+
243
+ resource_limits->last_capture_length = 0;
244
+
245
+ return rb_ensure(rb_yield, Qundef, capture_ensure, (VALUE)&ensure_data);
246
+ }
247
+
248
+
249
+ static VALUE resource_limits_reset_method(VALUE self)
250
+ {
251
+ resource_limits_t *resource_limits;
252
+ ResourceLimits_Get_Struct(self, resource_limits);
253
+ resource_limits_reset(resource_limits);
254
+ return Qnil;
255
+ }
256
+
257
+ void liquid_define_resource_limits(void)
258
+ {
259
+ cLiquidResourceLimits = rb_define_class_under(mLiquidC, "ResourceLimits", rb_cObject);
260
+ rb_global_variable(&cLiquidResourceLimits);
261
+
262
+ rb_define_alloc_func(cLiquidResourceLimits, resource_limits_allocate);
263
+ rb_define_method(cLiquidResourceLimits, "initialize", resource_limits_initialize_method, 3);
264
+ rb_define_method(cLiquidResourceLimits, "render_length_limit", resource_limits_render_length_limit_method, 0);
265
+ rb_define_method(cLiquidResourceLimits, "render_length_limit=", resource_limits_set_render_length_limit_method, 1);
266
+ rb_define_method(cLiquidResourceLimits, "render_score_limit", resource_limits_render_score_limit_method, 0);
267
+ rb_define_method(cLiquidResourceLimits, "render_score_limit=", resource_limits_set_render_score_limit_method, 1);
268
+ rb_define_method(cLiquidResourceLimits, "assign_score_limit", resource_limits_assign_score_limit_method, 0);
269
+ rb_define_method(cLiquidResourceLimits, "assign_score_limit=", resource_limits_set_assign_score_limit_method, 1);
270
+ rb_define_method(cLiquidResourceLimits, "render_score", resource_limits_render_score_method, 0);
271
+ rb_define_method(cLiquidResourceLimits, "assign_score", resource_limits_assign_score_method, 0);
272
+ rb_define_method(cLiquidResourceLimits, "increment_render_score", resource_limits_increment_render_score_method, 1);
273
+ rb_define_method(cLiquidResourceLimits, "increment_assign_score", resource_limits_increment_assign_score_method, 1);
274
+ rb_define_method(cLiquidResourceLimits, "increment_write_score", resource_limits_increment_write_score_method, 1);
275
+ rb_define_method(cLiquidResourceLimits, "raise_limits_reached", resource_limits_raise_limits_reached_method, 0);
276
+ rb_define_method(cLiquidResourceLimits, "reached?", resource_limits_reached_method, 0);
277
+ rb_define_method(cLiquidResourceLimits, "reset", resource_limits_reset_method, 0);
278
+ rb_define_method(cLiquidResourceLimits, "with_capture", resource_limits_with_capture_method, 0);
279
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef LIQUID_RESOURCE_LIMITS
2
+ #define LIQUID_RESOURCE_LIMITS
3
+
4
+ typedef struct resource_limits {
5
+ long render_length_limit;
6
+ long render_score_limit;
7
+ long assign_score_limit;
8
+ bool reached_limit;
9
+ long last_capture_length;
10
+ long render_score;
11
+ long assign_score;
12
+ } resource_limits_t;
13
+
14
+ extern VALUE cLiquidResourceLimits;
15
+ extern const rb_data_type_t resource_limits_data_type;
16
+ #define ResourceLimits_Get_Struct(obj, sval) TypedData_Get_Struct(obj, resource_limits_t, &resource_limits_data_type, sval)
17
+
18
+ void liquid_define_resource_limits(void);
19
+ void resource_limits_raise_limits_reached(resource_limits_t *resource_limit);
20
+ void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount);
21
+ void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output);
22
+
23
+ #endif
@@ -0,0 +1,44 @@
1
+ #if !defined(LIQUID_UTIL_H)
2
+ #define LIQUID_UTIL_H
3
+
4
+ inline static const char *read_while(const char *start, const char *end, int (func)(int))
5
+ {
6
+ while (start < end && func((unsigned char) *start)) start++;
7
+ return start;
8
+ }
9
+
10
+ inline static const char *read_while_reverse(const char *start, const char *end, int (func)(int))
11
+ {
12
+ end--;
13
+ while (start <= end && func((unsigned char) *end)) end--;
14
+ end++;
15
+ return end;
16
+ }
17
+
18
+ inline static int count_newlines(const char *start, const char *end)
19
+ {
20
+ int count = 0;
21
+ while (start < end) {
22
+ if (*start == '\n') count++;
23
+ start++;
24
+ }
25
+ return count;
26
+ }
27
+
28
+ inline static int is_non_newline_space(int c)
29
+ {
30
+ return rb_isspace(c) && c != '\n';
31
+ }
32
+
33
+ inline static int not_newline(int c)
34
+ {
35
+ return c != '\n';
36
+ }
37
+
38
+ inline static bool is_word_char(char c)
39
+ {
40
+ return ISALNUM(c) || c == '_';
41
+ }
42
+
43
+ #endif
44
+
@@ -1,5 +1,7 @@
1
+ #include <assert.h>
1
2
  #include "liquid.h"
2
3
  #include "tokenizer.h"
4
+ #include "stringutil.h"
3
5
 
4
6
  VALUE cLiquidTokenizer;
5
7
 
@@ -35,40 +37,89 @@ static VALUE tokenizer_allocate(VALUE klass)
35
37
 
36
38
  obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
37
39
  tokenizer->source = Qnil;
40
+ tokenizer->bug_compatible_whitespace_trimming = false;
41
+ tokenizer->raw_tag_body = NULL;
42
+ tokenizer->raw_tag_body_len = 0;
38
43
  return obj;
39
44
  }
40
45
 
41
- static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE line_numbers)
46
+ static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE start_line_number, VALUE for_liquid_tag)
42
47
  {
43
48
  tokenizer_t *tokenizer;
44
49
 
45
50
  Check_Type(source, T_STRING);
51
+ check_utf8_encoding(source, "source");
52
+
53
+ #define MAX_SOURCE_CODE_BYTES ((1 << 24) - 1)
54
+ if (RSTRING_LEN(source) > MAX_SOURCE_CODE_BYTES) {
55
+ rb_enc_raise(utf8_encoding, rb_eArgError, "Source too large, max %d bytes", MAX_SOURCE_CODE_BYTES);
56
+ }
57
+ #undef MAX_SOURCE_CODE_BYTES
58
+
46
59
  Tokenizer_Get_Struct(self, tokenizer);
47
60
  source = rb_str_dup_frozen(source);
48
61
  tokenizer->source = source;
49
62
  tokenizer->cursor = RSTRING_PTR(source);
50
- tokenizer->length = RSTRING_LEN(source);
51
- tokenizer->lstrip_flag = 0;
63
+ tokenizer->cursor_end = tokenizer->cursor + RSTRING_LEN(source);
64
+ tokenizer->lstrip_flag = false;
52
65
  // tokenizer->line_number keeps track of the current line number or it is 0
53
66
  // to indicate that line numbers aren't being calculated
54
- tokenizer->line_number = RTEST(line_numbers) ? 1 : 0;
67
+ tokenizer->line_number = FIX2UINT(start_line_number);
68
+ tokenizer->for_liquid_tag = RTEST(for_liquid_tag);
55
69
  return Qnil;
56
70
  }
57
71
 
58
- void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
72
+ // Internal function to setup an existing tokenizer from C for a liquid tag.
73
+ // This overwrites the passed in tokenizer, so a copy of the struct should
74
+ // be used to reset the tokenizer after parsing the liquid tag.
75
+ void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number)
59
76
  {
60
- if (tokenizer->length <= 0) {
61
- memset(token, 0, sizeof(*token));
62
- return;
77
+ tokenizer->cursor = cursor;
78
+ tokenizer->cursor_end = cursor_end;
79
+ tokenizer->lstrip_flag = false;
80
+ tokenizer->line_number = line_number;
81
+ tokenizer->for_liquid_tag = true;
82
+ }
83
+
84
+ // Tokenizes contents of {% liquid ... %}
85
+ static void tokenizer_next_for_liquid_tag(tokenizer_t *tokenizer, token_t *token)
86
+ {
87
+ const char *end = tokenizer->cursor_end;
88
+ const char *start = tokenizer->cursor;
89
+ const char *start_trimmed = read_while(start, end, is_non_newline_space);
90
+
91
+ token->str_full = start;
92
+ token->str_trimmed = start_trimmed;
93
+
94
+ const char *end_full = read_while(start_trimmed, end, not_newline);
95
+ if (end_full < end) {
96
+ tokenizer->cursor = end_full + 1;
97
+ if (tokenizer->line_number)
98
+ tokenizer->line_number++;
99
+ } else {
100
+ tokenizer->cursor = end_full;
63
101
  }
64
102
 
103
+ const char *end_trimmed = read_while_reverse(start_trimmed, end_full, rb_isspace);
104
+
105
+ token->len_trimmed = end_trimmed - start_trimmed;
106
+ token->len_full = end_full - token->str_full;
107
+
108
+ if (token->len_trimmed == 0) {
109
+ token->type = TOKEN_BLANK_LIQUID_TAG_LINE;
110
+ } else {
111
+ token->type = TOKEN_TAG;
112
+ }
113
+ }
114
+
115
+ // Tokenizes contents of a full Liquid template
116
+ static void tokenizer_next_for_template(tokenizer_t *tokenizer, token_t *token)
117
+ {
65
118
  const char *cursor = tokenizer->cursor;
66
- const char *last = cursor + tokenizer->length - 1;
119
+ const char *last = tokenizer->cursor_end - 1;
67
120
 
68
- token->str = cursor;
121
+ token->str_full = cursor;
69
122
  token->type = TOKEN_RAW;
70
- token->lstrip = 0;
71
- token->rstrip = 0;
72
123
 
73
124
  while (cursor < last) {
74
125
  if (*cursor++ != '{')
@@ -78,17 +129,17 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
78
129
  if (c != '%' && c != '{')
79
130
  continue;
80
131
  if (cursor <= last && *cursor == '-') {
81
- cursor++;
82
- token->rstrip = 1;
132
+ cursor++;
133
+ token->rstrip = 1;
83
134
  }
84
135
  if (cursor - tokenizer->cursor > (ptrdiff_t)(2 + token->rstrip)) {
85
136
  token->type = TOKEN_RAW;
86
137
  cursor -= 2 + token->rstrip;
87
138
  token->lstrip = tokenizer->lstrip_flag;
88
- tokenizer->lstrip_flag = 0;
139
+ tokenizer->lstrip_flag = false;
89
140
  goto found;
90
141
  }
91
- tokenizer->lstrip_flag = 0;
142
+ tokenizer->lstrip_flag = false;
92
143
  token->type = TOKEN_INVALID;
93
144
  token->lstrip = token->rstrip;
94
145
  token->rstrip = 0;
@@ -103,12 +154,12 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
103
154
  continue;
104
155
  token->type = TOKEN_TAG;
105
156
  if(cursor[-3] == '-')
106
- token->rstrip = tokenizer->lstrip_flag = 1;
157
+ token->rstrip = tokenizer->lstrip_flag = true;
107
158
  goto found;
108
159
  }
109
160
  // unterminated tag
110
161
  cursor = tokenizer->cursor + 2;
111
- tokenizer->lstrip_flag = 0;
162
+ tokenizer->lstrip_flag = false;
112
163
  goto found;
113
164
  } else {
114
165
  while (cursor < last) {
@@ -121,31 +172,51 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
121
172
  }
122
173
  token->type = TOKEN_VARIABLE;
123
174
  if(cursor[-3] == '-')
124
- token->rstrip = tokenizer->lstrip_flag = 1;
175
+ token->rstrip = tokenizer->lstrip_flag = true;
125
176
  goto found;
126
177
  }
127
178
  // unterminated variable
128
179
  cursor = tokenizer->cursor + 2;
129
- tokenizer->lstrip_flag = 0;
180
+ tokenizer->lstrip_flag = false;
130
181
  goto found;
131
182
  }
132
183
  }
133
184
  cursor = last + 1;
134
185
  token->lstrip = tokenizer->lstrip_flag;
135
- tokenizer->lstrip_flag = 0;
186
+ tokenizer->lstrip_flag = false;
136
187
  found:
137
- token->length = cursor - tokenizer->cursor;
138
- tokenizer->cursor += token->length;
139
- tokenizer->length -= token->length;
188
+ token->len_full = cursor - token->str_full;
189
+
190
+ token->str_trimmed = token->str_full;
191
+ token->len_trimmed = token->len_full;
192
+
193
+ if (token->type == TOKEN_VARIABLE || token->type == TOKEN_TAG) {
194
+ token->str_trimmed += 2 + token->lstrip;
195
+ token->len_trimmed -= 2 + token->lstrip + 2;
196
+ if (token->rstrip && token->len_trimmed)
197
+ token->len_trimmed--;
198
+ }
199
+
200
+ assert(token->len_trimmed >= 0);
201
+
202
+ tokenizer->cursor += token->len_full;
140
203
 
141
204
  if (tokenizer->line_number) {
142
- const char *cursor = token->str;
143
- const char *end = token->str + token->length;
144
- while (cursor < end) {
145
- if (*cursor == '\n')
146
- tokenizer->line_number++;
147
- cursor++;
148
- }
205
+ tokenizer->line_number += count_newlines(token->str_full, token->str_full + token->len_full);
206
+ }
207
+ }
208
+
209
+ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
210
+ {
211
+ memset(token, 0, sizeof(*token));
212
+
213
+ if (tokenizer->cursor >= tokenizer->cursor_end) {
214
+ return;
215
+ }
216
+ if (tokenizer->for_liquid_tag) {
217
+ tokenizer_next_for_liquid_tag(tokenizer, token);
218
+ } else {
219
+ tokenizer_next_for_template(tokenizer, token);
149
220
  }
150
221
  }
151
222
 
@@ -159,7 +230,25 @@ static VALUE tokenizer_shift_method(VALUE self)
159
230
  if (!token.type)
160
231
  return Qnil;
161
232
 
162
- return rb_enc_str_new(token.str, token.length, utf8_encoding);
233
+ // When sent back to Ruby, tokens are the raw string including whitespace
234
+ // and tag delimiters. It should be possible to reconstruct the exact
235
+ // template from the tokens.
236
+ return rb_enc_str_new(token.str_full, token.len_full, utf8_encoding);
237
+ }
238
+
239
+ static VALUE tokenizer_shift_trimmed_method(VALUE self)
240
+ {
241
+ tokenizer_t *tokenizer;
242
+ Tokenizer_Get_Struct(self, tokenizer);
243
+
244
+ token_t token;
245
+ tokenizer_next(tokenizer, &token);
246
+ if (!token.type)
247
+ return Qnil;
248
+
249
+ // This method doesn't include whitespace and tag delimiters. It allows for
250
+ // testing the output of tokenizer_next as used by rb_block_parse.
251
+ return rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
163
252
  }
164
253
 
165
254
  static VALUE tokenizer_line_number_method(VALUE self)
@@ -173,12 +262,37 @@ static VALUE tokenizer_line_number_method(VALUE self)
173
262
  return UINT2NUM(tokenizer->line_number);
174
263
  }
175
264
 
176
- void init_liquid_tokenizer()
265
+ static VALUE tokenizer_for_liquid_tag_method(VALUE self)
266
+ {
267
+ tokenizer_t *tokenizer;
268
+ Tokenizer_Get_Struct(self, tokenizer);
269
+
270
+ return tokenizer->for_liquid_tag ? Qtrue : Qfalse;
271
+ }
272
+
273
+
274
+ // Temporary to test rollout of the fix for this bug
275
+ static VALUE tokenizer_bug_compatible_whitespace_trimming(VALUE self) {
276
+ tokenizer_t *tokenizer;
277
+ Tokenizer_Get_Struct(self, tokenizer);
278
+
279
+ tokenizer->bug_compatible_whitespace_trimming = true;
280
+ return Qnil;
281
+ }
282
+
283
+ void liquid_define_tokenizer(void)
177
284
  {
178
285
  cLiquidTokenizer = rb_define_class_under(mLiquidC, "Tokenizer", rb_cObject);
286
+ rb_global_variable(&cLiquidTokenizer);
287
+
179
288
  rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
180
- rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 2);
181
- rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
289
+ rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
182
290
  rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
291
+ rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
292
+ rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);
293
+ rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
294
+
295
+ // For testing the internal token representation.
296
+ rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
183
297
  }
184
298
 
@@ -6,31 +6,42 @@ enum token_type {
6
6
  TOKEN_INVALID,
7
7
  TOKEN_RAW,
8
8
  TOKEN_TAG,
9
- TOKEN_VARIABLE
9
+ TOKEN_VARIABLE,
10
+ TOKEN_BLANK_LIQUID_TAG_LINE
10
11
  };
11
12
 
12
13
  typedef struct token {
13
14
  enum token_type type;
14
- const char *str;
15
- long length;
16
- unsigned int lstrip;
17
- unsigned int rstrip;
15
+
16
+ // str_trimmed contains no tag delimiters
17
+ const char *str_trimmed, *str_full;
18
+ long len_trimmed, len_full;
19
+
20
+ bool lstrip, rstrip;
18
21
  } token_t;
19
22
 
20
23
  typedef struct tokenizer {
21
24
  VALUE source;
22
- const char *cursor;
23
- long length;
25
+ const char *cursor, *cursor_end;
24
26
  unsigned int line_number;
25
- unsigned int lstrip_flag;
27
+ bool lstrip_flag;
28
+ bool for_liquid_tag;
29
+
30
+ // Temporary to test rollout of the fix for this bug
31
+ bool bug_compatible_whitespace_trimming;
32
+
33
+ char *raw_tag_body;
34
+ unsigned int raw_tag_body_len;
26
35
  } tokenizer_t;
27
36
 
28
37
  extern VALUE cLiquidTokenizer;
29
38
  extern const rb_data_type_t tokenizer_data_type;
30
39
  #define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
31
40
 
32
- void init_liquid_tokenizer();
41
+ void liquid_define_tokenizer(void);
33
42
  void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
34
43
 
44
+ void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number);
45
+
35
46
  #endif
36
47
 
@@ -0,0 +1,18 @@
1
+ #include "usage.h"
2
+
3
+ static VALUE cLiquidUsage;
4
+ static ID id_increment;
5
+
6
+ void usage_increment(const char *name)
7
+ {
8
+ VALUE name_str = rb_str_new_cstr(name);
9
+ rb_funcall(cLiquidUsage, id_increment, 1, name_str);
10
+ }
11
+
12
+ void liquid_define_usage(void)
13
+ {
14
+ cLiquidUsage = rb_const_get(mLiquid, rb_intern("Usage"));
15
+ rb_global_variable(&cLiquidUsage);
16
+
17
+ id_increment = rb_intern("increment");
18
+ }