liquid-c 4.0.1 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/cla.yml +23 -0
- data/.github/workflows/liquid.yml +36 -11
- data/.gitignore +4 -0
- data/.rubocop.yml +14 -0
- data/Gemfile +15 -5
- data/README.md +32 -8
- data/Rakefile +12 -63
- data/ext/liquid_c/block.c +493 -60
- data/ext/liquid_c/block.h +28 -2
- data/ext/liquid_c/c_buffer.c +42 -0
- data/ext/liquid_c/c_buffer.h +76 -0
- data/ext/liquid_c/context.c +233 -0
- data/ext/liquid_c/context.h +70 -0
- data/ext/liquid_c/document_body.c +97 -0
- data/ext/liquid_c/document_body.h +59 -0
- data/ext/liquid_c/expression.c +116 -0
- data/ext/liquid_c/expression.h +24 -0
- data/ext/liquid_c/extconf.rb +21 -9
- data/ext/liquid_c/intutil.h +22 -0
- data/ext/liquid_c/lexer.c +39 -3
- data/ext/liquid_c/lexer.h +18 -3
- data/ext/liquid_c/liquid.c +76 -6
- data/ext/liquid_c/liquid.h +24 -1
- data/ext/liquid_c/liquid_vm.c +618 -0
- data/ext/liquid_c/liquid_vm.h +25 -0
- data/ext/liquid_c/parse_context.c +76 -0
- data/ext/liquid_c/parse_context.h +13 -0
- data/ext/liquid_c/parser.c +153 -65
- data/ext/liquid_c/parser.h +4 -2
- data/ext/liquid_c/raw.c +136 -0
- data/ext/liquid_c/raw.h +6 -0
- data/ext/liquid_c/resource_limits.c +279 -0
- data/ext/liquid_c/resource_limits.h +23 -0
- data/ext/liquid_c/stringutil.h +44 -0
- data/ext/liquid_c/tokenizer.c +149 -35
- data/ext/liquid_c/tokenizer.h +20 -9
- data/ext/liquid_c/usage.c +18 -0
- data/ext/liquid_c/usage.h +9 -0
- data/ext/liquid_c/variable.c +196 -20
- data/ext/liquid_c/variable.h +18 -1
- data/ext/liquid_c/variable_lookup.c +44 -0
- data/ext/liquid_c/variable_lookup.h +8 -0
- data/ext/liquid_c/vm_assembler.c +491 -0
- data/ext/liquid_c/vm_assembler.h +240 -0
- data/ext/liquid_c/vm_assembler_pool.c +99 -0
- data/ext/liquid_c/vm_assembler_pool.h +26 -0
- data/lib/liquid/c/compile_ext.rb +44 -0
- data/lib/liquid/c/version.rb +3 -1
- data/lib/liquid/c.rb +226 -48
- data/liquid-c.gemspec +16 -10
- data/performance/c_profile.rb +23 -0
- data/performance.rb +6 -4
- data/rakelib/compile.rake +15 -0
- data/rakelib/integration_test.rake +43 -0
- data/rakelib/performance.rake +43 -0
- data/rakelib/rubocop.rake +6 -0
- data/rakelib/unit_test.rake +14 -0
- data/test/integration_test.rb +11 -0
- data/test/liquid_test_helper.rb +21 -0
- data/test/test_helper.rb +21 -2
- data/test/unit/block_test.rb +137 -0
- data/test/unit/context_test.rb +85 -0
- data/test/unit/expression_test.rb +191 -0
- data/test/unit/gc_stress_test.rb +28 -0
- data/test/unit/raw_test.rb +93 -0
- data/test/unit/resource_limits_test.rb +50 -0
- data/test/unit/tokenizer_test.rb +90 -20
- data/test/unit/variable_test.rb +279 -60
- metadata +60 -11
- data/test/liquid_test.rb +0 -11
@@ -0,0 +1,279 @@
|
|
1
|
+
#include "liquid.h"
|
2
|
+
#include "resource_limits.h"
|
3
|
+
|
4
|
+
VALUE cLiquidResourceLimits;
|
5
|
+
|
6
|
+
static void resource_limits_free(void *ptr)
|
7
|
+
{
|
8
|
+
resource_limits_t *resource_limits = ptr;
|
9
|
+
xfree(resource_limits);
|
10
|
+
}
|
11
|
+
|
12
|
+
static size_t resource_limits_memsize(const void *ptr)
|
13
|
+
{
|
14
|
+
return sizeof(resource_limits_t);
|
15
|
+
}
|
16
|
+
|
17
|
+
const rb_data_type_t resource_limits_data_type = {
|
18
|
+
"liquid_resource_limits",
|
19
|
+
{ NULL, resource_limits_free, resource_limits_memsize },
|
20
|
+
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
|
21
|
+
};
|
22
|
+
|
23
|
+
static void resource_limits_reset(resource_limits_t *resource_limit)
|
24
|
+
{
|
25
|
+
resource_limit->reached_limit = true;
|
26
|
+
resource_limit->last_capture_length = -1;
|
27
|
+
resource_limit->render_score = 0;
|
28
|
+
resource_limit->assign_score = 0;
|
29
|
+
}
|
30
|
+
|
31
|
+
static VALUE resource_limits_allocate(VALUE klass)
|
32
|
+
{
|
33
|
+
resource_limits_t *resource_limits;
|
34
|
+
|
35
|
+
VALUE obj = TypedData_Make_Struct(klass, resource_limits_t, &resource_limits_data_type, resource_limits);
|
36
|
+
|
37
|
+
resource_limits_reset(resource_limits);
|
38
|
+
|
39
|
+
return obj;
|
40
|
+
}
|
41
|
+
|
42
|
+
static VALUE resource_limits_render_length_limit_method(VALUE self)
|
43
|
+
{
|
44
|
+
resource_limits_t *resource_limits;
|
45
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
46
|
+
|
47
|
+
return LONG2NUM(resource_limits->render_length_limit);
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE resource_limits_set_render_length_limit_method(VALUE self, VALUE render_length_limit)
|
51
|
+
{
|
52
|
+
resource_limits_t *resource_limits;
|
53
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
54
|
+
|
55
|
+
if (render_length_limit == Qnil) {
|
56
|
+
resource_limits->render_length_limit = LONG_MAX;
|
57
|
+
} else {
|
58
|
+
resource_limits->render_length_limit = NUM2LONG(render_length_limit);
|
59
|
+
}
|
60
|
+
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE resource_limits_render_score_limit_method(VALUE self)
|
65
|
+
{
|
66
|
+
resource_limits_t *resource_limits;
|
67
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
68
|
+
|
69
|
+
return LONG2NUM(resource_limits->render_score_limit);
|
70
|
+
}
|
71
|
+
|
72
|
+
static VALUE resource_limits_set_render_score_limit_method(VALUE self, VALUE render_score_limit)
|
73
|
+
{
|
74
|
+
resource_limits_t *resource_limits;
|
75
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
76
|
+
|
77
|
+
if (render_score_limit == Qnil) {
|
78
|
+
resource_limits->render_score_limit = LONG_MAX;
|
79
|
+
} else {
|
80
|
+
resource_limits->render_score_limit = NUM2LONG(render_score_limit);
|
81
|
+
}
|
82
|
+
|
83
|
+
return Qnil;
|
84
|
+
}
|
85
|
+
|
86
|
+
static VALUE resource_limits_assign_score_limit_method(VALUE self)
|
87
|
+
{
|
88
|
+
resource_limits_t *resource_limits;
|
89
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
90
|
+
|
91
|
+
return LONG2NUM(resource_limits->assign_score_limit);
|
92
|
+
}
|
93
|
+
|
94
|
+
static VALUE resource_limits_set_assign_score_limit_method(VALUE self, VALUE assign_score_limit)
|
95
|
+
{
|
96
|
+
resource_limits_t *resource_limits;
|
97
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
98
|
+
|
99
|
+
if (assign_score_limit == Qnil) {
|
100
|
+
resource_limits->assign_score_limit = LONG_MAX;
|
101
|
+
} else {
|
102
|
+
resource_limits->assign_score_limit = NUM2LONG(assign_score_limit);
|
103
|
+
}
|
104
|
+
|
105
|
+
return Qnil;
|
106
|
+
}
|
107
|
+
|
108
|
+
static VALUE resource_limits_render_score_method(VALUE self)
|
109
|
+
{
|
110
|
+
resource_limits_t *resource_limits;
|
111
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
112
|
+
|
113
|
+
return LONG2NUM(resource_limits->render_score);
|
114
|
+
}
|
115
|
+
|
116
|
+
static VALUE resource_limits_assign_score_method(VALUE self)
|
117
|
+
{
|
118
|
+
resource_limits_t *resource_limits;
|
119
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
120
|
+
|
121
|
+
return LONG2NUM(resource_limits->assign_score);
|
122
|
+
}
|
123
|
+
|
124
|
+
static VALUE resource_limits_initialize_method(VALUE self, VALUE render_length_limit,
|
125
|
+
VALUE render_score_limit, VALUE assign_score_limit)
|
126
|
+
{
|
127
|
+
resource_limits_set_render_length_limit_method(self, render_length_limit);
|
128
|
+
resource_limits_set_render_score_limit_method(self, render_score_limit);
|
129
|
+
resource_limits_set_assign_score_limit_method(self, assign_score_limit);
|
130
|
+
|
131
|
+
return Qnil;
|
132
|
+
}
|
133
|
+
|
134
|
+
__attribute__((noreturn))
|
135
|
+
void resource_limits_raise_limits_reached(resource_limits_t *resource_limit)
|
136
|
+
{
|
137
|
+
resource_limit->reached_limit = true;
|
138
|
+
rb_raise(cMemoryError, "Memory limits exceeded");
|
139
|
+
}
|
140
|
+
|
141
|
+
void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount)
|
142
|
+
{
|
143
|
+
resource_limits->render_score = resource_limits->render_score + amount;
|
144
|
+
|
145
|
+
if (resource_limits->render_score > resource_limits->render_score_limit) {
|
146
|
+
resource_limits_raise_limits_reached(resource_limits);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
static VALUE resource_limits_increment_render_score_method(VALUE self, VALUE amount)
|
151
|
+
{
|
152
|
+
resource_limits_t *resource_limits;
|
153
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
154
|
+
|
155
|
+
resource_limits_increment_render_score(resource_limits, NUM2LONG(amount));
|
156
|
+
|
157
|
+
return Qnil;
|
158
|
+
}
|
159
|
+
|
160
|
+
static void resource_limits_increment_assign_score(resource_limits_t *resource_limits, long amount)
|
161
|
+
{
|
162
|
+
resource_limits->assign_score = resource_limits->assign_score + amount;
|
163
|
+
|
164
|
+
if (resource_limits->assign_score > resource_limits->assign_score_limit) {
|
165
|
+
resource_limits_raise_limits_reached(resource_limits);
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
static VALUE resource_limits_increment_assign_score_method(VALUE self, VALUE amount)
|
170
|
+
{
|
171
|
+
resource_limits_t *resource_limits;
|
172
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
173
|
+
|
174
|
+
resource_limits_increment_assign_score(resource_limits, NUM2LONG(amount));
|
175
|
+
|
176
|
+
return Qnil;
|
177
|
+
}
|
178
|
+
|
179
|
+
void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output)
|
180
|
+
{
|
181
|
+
long captured = RSTRING_LEN(output);
|
182
|
+
|
183
|
+
if (resource_limits->last_capture_length >= 0) {
|
184
|
+
long increment = captured - resource_limits->last_capture_length;
|
185
|
+
resource_limits->last_capture_length = captured;
|
186
|
+
resource_limits_increment_assign_score(resource_limits, increment);
|
187
|
+
} else if (captured > resource_limits->render_length_limit) {
|
188
|
+
resource_limits_raise_limits_reached(resource_limits);
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
static VALUE resource_limits_increment_write_score_method(VALUE self, VALUE output)
|
193
|
+
{
|
194
|
+
Check_Type(output, T_STRING);
|
195
|
+
|
196
|
+
resource_limits_t *resource_limits;
|
197
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
198
|
+
|
199
|
+
resource_limits_increment_write_score(resource_limits, output);
|
200
|
+
|
201
|
+
return Qnil;
|
202
|
+
}
|
203
|
+
|
204
|
+
static VALUE resource_limits_raise_limits_reached_method(VALUE self)
|
205
|
+
{
|
206
|
+
resource_limits_t *resource_limits;
|
207
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
208
|
+
|
209
|
+
resource_limits_raise_limits_reached(resource_limits);
|
210
|
+
}
|
211
|
+
|
212
|
+
static VALUE resource_limits_reached_method(VALUE self)
|
213
|
+
{
|
214
|
+
resource_limits_t *resource_limits;
|
215
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
216
|
+
|
217
|
+
return resource_limits->reached_limit ? Qtrue : Qfalse;
|
218
|
+
}
|
219
|
+
|
220
|
+
struct capture_ensure_t {
|
221
|
+
resource_limits_t *resource_limits;
|
222
|
+
long old_capture_length;
|
223
|
+
};
|
224
|
+
|
225
|
+
static VALUE capture_ensure(VALUE data)
|
226
|
+
{
|
227
|
+
struct capture_ensure_t *ensure_data = (struct capture_ensure_t *)data;
|
228
|
+
ensure_data->resource_limits->last_capture_length = ensure_data->old_capture_length;
|
229
|
+
|
230
|
+
return Qnil;
|
231
|
+
}
|
232
|
+
|
233
|
+
static VALUE resource_limits_with_capture_method(VALUE self)
|
234
|
+
{
|
235
|
+
resource_limits_t *resource_limits;
|
236
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
237
|
+
|
238
|
+
struct capture_ensure_t ensure_data = {
|
239
|
+
.resource_limits = resource_limits,
|
240
|
+
.old_capture_length = resource_limits->last_capture_length
|
241
|
+
};
|
242
|
+
|
243
|
+
resource_limits->last_capture_length = 0;
|
244
|
+
|
245
|
+
return rb_ensure(rb_yield, Qundef, capture_ensure, (VALUE)&ensure_data);
|
246
|
+
}
|
247
|
+
|
248
|
+
|
249
|
+
static VALUE resource_limits_reset_method(VALUE self)
|
250
|
+
{
|
251
|
+
resource_limits_t *resource_limits;
|
252
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
253
|
+
resource_limits_reset(resource_limits);
|
254
|
+
return Qnil;
|
255
|
+
}
|
256
|
+
|
257
|
+
void liquid_define_resource_limits(void)
|
258
|
+
{
|
259
|
+
cLiquidResourceLimits = rb_define_class_under(mLiquidC, "ResourceLimits", rb_cObject);
|
260
|
+
rb_global_variable(&cLiquidResourceLimits);
|
261
|
+
|
262
|
+
rb_define_alloc_func(cLiquidResourceLimits, resource_limits_allocate);
|
263
|
+
rb_define_method(cLiquidResourceLimits, "initialize", resource_limits_initialize_method, 3);
|
264
|
+
rb_define_method(cLiquidResourceLimits, "render_length_limit", resource_limits_render_length_limit_method, 0);
|
265
|
+
rb_define_method(cLiquidResourceLimits, "render_length_limit=", resource_limits_set_render_length_limit_method, 1);
|
266
|
+
rb_define_method(cLiquidResourceLimits, "render_score_limit", resource_limits_render_score_limit_method, 0);
|
267
|
+
rb_define_method(cLiquidResourceLimits, "render_score_limit=", resource_limits_set_render_score_limit_method, 1);
|
268
|
+
rb_define_method(cLiquidResourceLimits, "assign_score_limit", resource_limits_assign_score_limit_method, 0);
|
269
|
+
rb_define_method(cLiquidResourceLimits, "assign_score_limit=", resource_limits_set_assign_score_limit_method, 1);
|
270
|
+
rb_define_method(cLiquidResourceLimits, "render_score", resource_limits_render_score_method, 0);
|
271
|
+
rb_define_method(cLiquidResourceLimits, "assign_score", resource_limits_assign_score_method, 0);
|
272
|
+
rb_define_method(cLiquidResourceLimits, "increment_render_score", resource_limits_increment_render_score_method, 1);
|
273
|
+
rb_define_method(cLiquidResourceLimits, "increment_assign_score", resource_limits_increment_assign_score_method, 1);
|
274
|
+
rb_define_method(cLiquidResourceLimits, "increment_write_score", resource_limits_increment_write_score_method, 1);
|
275
|
+
rb_define_method(cLiquidResourceLimits, "raise_limits_reached", resource_limits_raise_limits_reached_method, 0);
|
276
|
+
rb_define_method(cLiquidResourceLimits, "reached?", resource_limits_reached_method, 0);
|
277
|
+
rb_define_method(cLiquidResourceLimits, "reset", resource_limits_reset_method, 0);
|
278
|
+
rb_define_method(cLiquidResourceLimits, "with_capture", resource_limits_with_capture_method, 0);
|
279
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef LIQUID_RESOURCE_LIMITS
|
2
|
+
#define LIQUID_RESOURCE_LIMITS
|
3
|
+
|
4
|
+
typedef struct resource_limits {
|
5
|
+
long render_length_limit;
|
6
|
+
long render_score_limit;
|
7
|
+
long assign_score_limit;
|
8
|
+
bool reached_limit;
|
9
|
+
long last_capture_length;
|
10
|
+
long render_score;
|
11
|
+
long assign_score;
|
12
|
+
} resource_limits_t;
|
13
|
+
|
14
|
+
extern VALUE cLiquidResourceLimits;
|
15
|
+
extern const rb_data_type_t resource_limits_data_type;
|
16
|
+
#define ResourceLimits_Get_Struct(obj, sval) TypedData_Get_Struct(obj, resource_limits_t, &resource_limits_data_type, sval)
|
17
|
+
|
18
|
+
void liquid_define_resource_limits(void);
|
19
|
+
void resource_limits_raise_limits_reached(resource_limits_t *resource_limit);
|
20
|
+
void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount);
|
21
|
+
void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output);
|
22
|
+
|
23
|
+
#endif
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#if !defined(LIQUID_UTIL_H)
|
2
|
+
#define LIQUID_UTIL_H
|
3
|
+
|
4
|
+
inline static const char *read_while(const char *start, const char *end, int (func)(int))
|
5
|
+
{
|
6
|
+
while (start < end && func((unsigned char) *start)) start++;
|
7
|
+
return start;
|
8
|
+
}
|
9
|
+
|
10
|
+
inline static const char *read_while_reverse(const char *start, const char *end, int (func)(int))
|
11
|
+
{
|
12
|
+
end--;
|
13
|
+
while (start <= end && func((unsigned char) *end)) end--;
|
14
|
+
end++;
|
15
|
+
return end;
|
16
|
+
}
|
17
|
+
|
18
|
+
inline static int count_newlines(const char *start, const char *end)
|
19
|
+
{
|
20
|
+
int count = 0;
|
21
|
+
while (start < end) {
|
22
|
+
if (*start == '\n') count++;
|
23
|
+
start++;
|
24
|
+
}
|
25
|
+
return count;
|
26
|
+
}
|
27
|
+
|
28
|
+
inline static int is_non_newline_space(int c)
|
29
|
+
{
|
30
|
+
return rb_isspace(c) && c != '\n';
|
31
|
+
}
|
32
|
+
|
33
|
+
inline static int not_newline(int c)
|
34
|
+
{
|
35
|
+
return c != '\n';
|
36
|
+
}
|
37
|
+
|
38
|
+
inline static bool is_word_char(char c)
|
39
|
+
{
|
40
|
+
return ISALNUM(c) || c == '_';
|
41
|
+
}
|
42
|
+
|
43
|
+
#endif
|
44
|
+
|
data/ext/liquid_c/tokenizer.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
#include <assert.h>
|
1
2
|
#include "liquid.h"
|
2
3
|
#include "tokenizer.h"
|
4
|
+
#include "stringutil.h"
|
3
5
|
|
4
6
|
VALUE cLiquidTokenizer;
|
5
7
|
|
@@ -35,40 +37,89 @@ static VALUE tokenizer_allocate(VALUE klass)
|
|
35
37
|
|
36
38
|
obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
|
37
39
|
tokenizer->source = Qnil;
|
40
|
+
tokenizer->bug_compatible_whitespace_trimming = false;
|
41
|
+
tokenizer->raw_tag_body = NULL;
|
42
|
+
tokenizer->raw_tag_body_len = 0;
|
38
43
|
return obj;
|
39
44
|
}
|
40
45
|
|
41
|
-
static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE
|
46
|
+
static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE start_line_number, VALUE for_liquid_tag)
|
42
47
|
{
|
43
48
|
tokenizer_t *tokenizer;
|
44
49
|
|
45
50
|
Check_Type(source, T_STRING);
|
51
|
+
check_utf8_encoding(source, "source");
|
52
|
+
|
53
|
+
#define MAX_SOURCE_CODE_BYTES ((1 << 24) - 1)
|
54
|
+
if (RSTRING_LEN(source) > MAX_SOURCE_CODE_BYTES) {
|
55
|
+
rb_enc_raise(utf8_encoding, rb_eArgError, "Source too large, max %d bytes", MAX_SOURCE_CODE_BYTES);
|
56
|
+
}
|
57
|
+
#undef MAX_SOURCE_CODE_BYTES
|
58
|
+
|
46
59
|
Tokenizer_Get_Struct(self, tokenizer);
|
47
60
|
source = rb_str_dup_frozen(source);
|
48
61
|
tokenizer->source = source;
|
49
62
|
tokenizer->cursor = RSTRING_PTR(source);
|
50
|
-
tokenizer->
|
51
|
-
tokenizer->lstrip_flag =
|
63
|
+
tokenizer->cursor_end = tokenizer->cursor + RSTRING_LEN(source);
|
64
|
+
tokenizer->lstrip_flag = false;
|
52
65
|
// tokenizer->line_number keeps track of the current line number or it is 0
|
53
66
|
// to indicate that line numbers aren't being calculated
|
54
|
-
tokenizer->line_number =
|
67
|
+
tokenizer->line_number = FIX2UINT(start_line_number);
|
68
|
+
tokenizer->for_liquid_tag = RTEST(for_liquid_tag);
|
55
69
|
return Qnil;
|
56
70
|
}
|
57
71
|
|
58
|
-
|
72
|
+
// Internal function to setup an existing tokenizer from C for a liquid tag.
|
73
|
+
// This overwrites the passed in tokenizer, so a copy of the struct should
|
74
|
+
// be used to reset the tokenizer after parsing the liquid tag.
|
75
|
+
void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number)
|
59
76
|
{
|
60
|
-
|
61
|
-
|
62
|
-
|
77
|
+
tokenizer->cursor = cursor;
|
78
|
+
tokenizer->cursor_end = cursor_end;
|
79
|
+
tokenizer->lstrip_flag = false;
|
80
|
+
tokenizer->line_number = line_number;
|
81
|
+
tokenizer->for_liquid_tag = true;
|
82
|
+
}
|
83
|
+
|
84
|
+
// Tokenizes contents of {% liquid ... %}
|
85
|
+
static void tokenizer_next_for_liquid_tag(tokenizer_t *tokenizer, token_t *token)
|
86
|
+
{
|
87
|
+
const char *end = tokenizer->cursor_end;
|
88
|
+
const char *start = tokenizer->cursor;
|
89
|
+
const char *start_trimmed = read_while(start, end, is_non_newline_space);
|
90
|
+
|
91
|
+
token->str_full = start;
|
92
|
+
token->str_trimmed = start_trimmed;
|
93
|
+
|
94
|
+
const char *end_full = read_while(start_trimmed, end, not_newline);
|
95
|
+
if (end_full < end) {
|
96
|
+
tokenizer->cursor = end_full + 1;
|
97
|
+
if (tokenizer->line_number)
|
98
|
+
tokenizer->line_number++;
|
99
|
+
} else {
|
100
|
+
tokenizer->cursor = end_full;
|
63
101
|
}
|
64
102
|
|
103
|
+
const char *end_trimmed = read_while_reverse(start_trimmed, end_full, rb_isspace);
|
104
|
+
|
105
|
+
token->len_trimmed = end_trimmed - start_trimmed;
|
106
|
+
token->len_full = end_full - token->str_full;
|
107
|
+
|
108
|
+
if (token->len_trimmed == 0) {
|
109
|
+
token->type = TOKEN_BLANK_LIQUID_TAG_LINE;
|
110
|
+
} else {
|
111
|
+
token->type = TOKEN_TAG;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
// Tokenizes contents of a full Liquid template
|
116
|
+
static void tokenizer_next_for_template(tokenizer_t *tokenizer, token_t *token)
|
117
|
+
{
|
65
118
|
const char *cursor = tokenizer->cursor;
|
66
|
-
const char *last =
|
119
|
+
const char *last = tokenizer->cursor_end - 1;
|
67
120
|
|
68
|
-
token->
|
121
|
+
token->str_full = cursor;
|
69
122
|
token->type = TOKEN_RAW;
|
70
|
-
token->lstrip = 0;
|
71
|
-
token->rstrip = 0;
|
72
123
|
|
73
124
|
while (cursor < last) {
|
74
125
|
if (*cursor++ != '{')
|
@@ -78,17 +129,17 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
78
129
|
if (c != '%' && c != '{')
|
79
130
|
continue;
|
80
131
|
if (cursor <= last && *cursor == '-') {
|
81
|
-
|
82
|
-
|
132
|
+
cursor++;
|
133
|
+
token->rstrip = 1;
|
83
134
|
}
|
84
135
|
if (cursor - tokenizer->cursor > (ptrdiff_t)(2 + token->rstrip)) {
|
85
136
|
token->type = TOKEN_RAW;
|
86
137
|
cursor -= 2 + token->rstrip;
|
87
138
|
token->lstrip = tokenizer->lstrip_flag;
|
88
|
-
tokenizer->lstrip_flag =
|
139
|
+
tokenizer->lstrip_flag = false;
|
89
140
|
goto found;
|
90
141
|
}
|
91
|
-
tokenizer->lstrip_flag =
|
142
|
+
tokenizer->lstrip_flag = false;
|
92
143
|
token->type = TOKEN_INVALID;
|
93
144
|
token->lstrip = token->rstrip;
|
94
145
|
token->rstrip = 0;
|
@@ -103,12 +154,12 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
103
154
|
continue;
|
104
155
|
token->type = TOKEN_TAG;
|
105
156
|
if(cursor[-3] == '-')
|
106
|
-
token->rstrip = tokenizer->lstrip_flag =
|
157
|
+
token->rstrip = tokenizer->lstrip_flag = true;
|
107
158
|
goto found;
|
108
159
|
}
|
109
160
|
// unterminated tag
|
110
161
|
cursor = tokenizer->cursor + 2;
|
111
|
-
tokenizer->lstrip_flag =
|
162
|
+
tokenizer->lstrip_flag = false;
|
112
163
|
goto found;
|
113
164
|
} else {
|
114
165
|
while (cursor < last) {
|
@@ -121,31 +172,51 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
121
172
|
}
|
122
173
|
token->type = TOKEN_VARIABLE;
|
123
174
|
if(cursor[-3] == '-')
|
124
|
-
token->rstrip = tokenizer->lstrip_flag =
|
175
|
+
token->rstrip = tokenizer->lstrip_flag = true;
|
125
176
|
goto found;
|
126
177
|
}
|
127
178
|
// unterminated variable
|
128
179
|
cursor = tokenizer->cursor + 2;
|
129
|
-
tokenizer->lstrip_flag =
|
180
|
+
tokenizer->lstrip_flag = false;
|
130
181
|
goto found;
|
131
182
|
}
|
132
183
|
}
|
133
184
|
cursor = last + 1;
|
134
185
|
token->lstrip = tokenizer->lstrip_flag;
|
135
|
-
tokenizer->lstrip_flag =
|
186
|
+
tokenizer->lstrip_flag = false;
|
136
187
|
found:
|
137
|
-
token->
|
138
|
-
|
139
|
-
|
188
|
+
token->len_full = cursor - token->str_full;
|
189
|
+
|
190
|
+
token->str_trimmed = token->str_full;
|
191
|
+
token->len_trimmed = token->len_full;
|
192
|
+
|
193
|
+
if (token->type == TOKEN_VARIABLE || token->type == TOKEN_TAG) {
|
194
|
+
token->str_trimmed += 2 + token->lstrip;
|
195
|
+
token->len_trimmed -= 2 + token->lstrip + 2;
|
196
|
+
if (token->rstrip && token->len_trimmed)
|
197
|
+
token->len_trimmed--;
|
198
|
+
}
|
199
|
+
|
200
|
+
assert(token->len_trimmed >= 0);
|
201
|
+
|
202
|
+
tokenizer->cursor += token->len_full;
|
140
203
|
|
141
204
|
if (tokenizer->line_number) {
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
205
|
+
tokenizer->line_number += count_newlines(token->str_full, token->str_full + token->len_full);
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
210
|
+
{
|
211
|
+
memset(token, 0, sizeof(*token));
|
212
|
+
|
213
|
+
if (tokenizer->cursor >= tokenizer->cursor_end) {
|
214
|
+
return;
|
215
|
+
}
|
216
|
+
if (tokenizer->for_liquid_tag) {
|
217
|
+
tokenizer_next_for_liquid_tag(tokenizer, token);
|
218
|
+
} else {
|
219
|
+
tokenizer_next_for_template(tokenizer, token);
|
149
220
|
}
|
150
221
|
}
|
151
222
|
|
@@ -159,7 +230,25 @@ static VALUE tokenizer_shift_method(VALUE self)
|
|
159
230
|
if (!token.type)
|
160
231
|
return Qnil;
|
161
232
|
|
162
|
-
|
233
|
+
// When sent back to Ruby, tokens are the raw string including whitespace
|
234
|
+
// and tag delimiters. It should be possible to reconstruct the exact
|
235
|
+
// template from the tokens.
|
236
|
+
return rb_enc_str_new(token.str_full, token.len_full, utf8_encoding);
|
237
|
+
}
|
238
|
+
|
239
|
+
static VALUE tokenizer_shift_trimmed_method(VALUE self)
|
240
|
+
{
|
241
|
+
tokenizer_t *tokenizer;
|
242
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
243
|
+
|
244
|
+
token_t token;
|
245
|
+
tokenizer_next(tokenizer, &token);
|
246
|
+
if (!token.type)
|
247
|
+
return Qnil;
|
248
|
+
|
249
|
+
// This method doesn't include whitespace and tag delimiters. It allows for
|
250
|
+
// testing the output of tokenizer_next as used by rb_block_parse.
|
251
|
+
return rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
|
163
252
|
}
|
164
253
|
|
165
254
|
static VALUE tokenizer_line_number_method(VALUE self)
|
@@ -173,12 +262,37 @@ static VALUE tokenizer_line_number_method(VALUE self)
|
|
173
262
|
return UINT2NUM(tokenizer->line_number);
|
174
263
|
}
|
175
264
|
|
176
|
-
|
265
|
+
static VALUE tokenizer_for_liquid_tag_method(VALUE self)
|
266
|
+
{
|
267
|
+
tokenizer_t *tokenizer;
|
268
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
269
|
+
|
270
|
+
return tokenizer->for_liquid_tag ? Qtrue : Qfalse;
|
271
|
+
}
|
272
|
+
|
273
|
+
|
274
|
+
// Temporary to test rollout of the fix for this bug
|
275
|
+
static VALUE tokenizer_bug_compatible_whitespace_trimming(VALUE self) {
|
276
|
+
tokenizer_t *tokenizer;
|
277
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
278
|
+
|
279
|
+
tokenizer->bug_compatible_whitespace_trimming = true;
|
280
|
+
return Qnil;
|
281
|
+
}
|
282
|
+
|
283
|
+
void liquid_define_tokenizer(void)
|
177
284
|
{
|
178
285
|
cLiquidTokenizer = rb_define_class_under(mLiquidC, "Tokenizer", rb_cObject);
|
286
|
+
rb_global_variable(&cLiquidTokenizer);
|
287
|
+
|
179
288
|
rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
|
180
|
-
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method,
|
181
|
-
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
|
289
|
+
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
|
182
290
|
rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
|
291
|
+
rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
|
292
|
+
rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);
|
293
|
+
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
|
294
|
+
|
295
|
+
// For testing the internal token representation.
|
296
|
+
rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
|
183
297
|
}
|
184
298
|
|
data/ext/liquid_c/tokenizer.h
CHANGED
@@ -6,31 +6,42 @@ enum token_type {
|
|
6
6
|
TOKEN_INVALID,
|
7
7
|
TOKEN_RAW,
|
8
8
|
TOKEN_TAG,
|
9
|
-
TOKEN_VARIABLE
|
9
|
+
TOKEN_VARIABLE,
|
10
|
+
TOKEN_BLANK_LIQUID_TAG_LINE
|
10
11
|
};
|
11
12
|
|
12
13
|
typedef struct token {
|
13
14
|
enum token_type type;
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
|
16
|
+
// str_trimmed contains no tag delimiters
|
17
|
+
const char *str_trimmed, *str_full;
|
18
|
+
long len_trimmed, len_full;
|
19
|
+
|
20
|
+
bool lstrip, rstrip;
|
18
21
|
} token_t;
|
19
22
|
|
20
23
|
typedef struct tokenizer {
|
21
24
|
VALUE source;
|
22
|
-
const char *cursor;
|
23
|
-
long length;
|
25
|
+
const char *cursor, *cursor_end;
|
24
26
|
unsigned int line_number;
|
25
|
-
|
27
|
+
bool lstrip_flag;
|
28
|
+
bool for_liquid_tag;
|
29
|
+
|
30
|
+
// Temporary to test rollout of the fix for this bug
|
31
|
+
bool bug_compatible_whitespace_trimming;
|
32
|
+
|
33
|
+
char *raw_tag_body;
|
34
|
+
unsigned int raw_tag_body_len;
|
26
35
|
} tokenizer_t;
|
27
36
|
|
28
37
|
extern VALUE cLiquidTokenizer;
|
29
38
|
extern const rb_data_type_t tokenizer_data_type;
|
30
39
|
#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
|
31
40
|
|
32
|
-
void
|
41
|
+
void liquid_define_tokenizer(void);
|
33
42
|
void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
|
34
43
|
|
44
|
+
void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number);
|
45
|
+
|
35
46
|
#endif
|
36
47
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#include "usage.h"
|
2
|
+
|
3
|
+
static VALUE cLiquidUsage;
|
4
|
+
static ID id_increment;
|
5
|
+
|
6
|
+
void usage_increment(const char *name)
|
7
|
+
{
|
8
|
+
VALUE name_str = rb_str_new_cstr(name);
|
9
|
+
rb_funcall(cLiquidUsage, id_increment, 1, name_str);
|
10
|
+
}
|
11
|
+
|
12
|
+
void liquid_define_usage(void)
|
13
|
+
{
|
14
|
+
cLiquidUsage = rb_const_get(mLiquid, rb_intern("Usage"));
|
15
|
+
rb_global_variable(&cLiquidUsage);
|
16
|
+
|
17
|
+
id_increment = rb_intern("increment");
|
18
|
+
}
|