liquid-c 4.0.1 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/cla.yml +23 -0
- data/.github/workflows/liquid.yml +36 -11
- data/.gitignore +4 -0
- data/.rubocop.yml +14 -0
- data/Gemfile +15 -5
- data/README.md +32 -8
- data/Rakefile +12 -63
- data/ext/liquid_c/block.c +493 -60
- data/ext/liquid_c/block.h +28 -2
- data/ext/liquid_c/c_buffer.c +42 -0
- data/ext/liquid_c/c_buffer.h +76 -0
- data/ext/liquid_c/context.c +233 -0
- data/ext/liquid_c/context.h +70 -0
- data/ext/liquid_c/document_body.c +97 -0
- data/ext/liquid_c/document_body.h +59 -0
- data/ext/liquid_c/expression.c +116 -0
- data/ext/liquid_c/expression.h +24 -0
- data/ext/liquid_c/extconf.rb +21 -9
- data/ext/liquid_c/intutil.h +22 -0
- data/ext/liquid_c/lexer.c +39 -3
- data/ext/liquid_c/lexer.h +18 -3
- data/ext/liquid_c/liquid.c +76 -6
- data/ext/liquid_c/liquid.h +24 -1
- data/ext/liquid_c/liquid_vm.c +618 -0
- data/ext/liquid_c/liquid_vm.h +25 -0
- data/ext/liquid_c/parse_context.c +76 -0
- data/ext/liquid_c/parse_context.h +13 -0
- data/ext/liquid_c/parser.c +153 -65
- data/ext/liquid_c/parser.h +4 -2
- data/ext/liquid_c/raw.c +136 -0
- data/ext/liquid_c/raw.h +6 -0
- data/ext/liquid_c/resource_limits.c +279 -0
- data/ext/liquid_c/resource_limits.h +23 -0
- data/ext/liquid_c/stringutil.h +44 -0
- data/ext/liquid_c/tokenizer.c +149 -35
- data/ext/liquid_c/tokenizer.h +20 -9
- data/ext/liquid_c/usage.c +18 -0
- data/ext/liquid_c/usage.h +9 -0
- data/ext/liquid_c/variable.c +196 -20
- data/ext/liquid_c/variable.h +18 -1
- data/ext/liquid_c/variable_lookup.c +44 -0
- data/ext/liquid_c/variable_lookup.h +8 -0
- data/ext/liquid_c/vm_assembler.c +491 -0
- data/ext/liquid_c/vm_assembler.h +240 -0
- data/ext/liquid_c/vm_assembler_pool.c +99 -0
- data/ext/liquid_c/vm_assembler_pool.h +26 -0
- data/lib/liquid/c/compile_ext.rb +44 -0
- data/lib/liquid/c/version.rb +3 -1
- data/lib/liquid/c.rb +226 -48
- data/liquid-c.gemspec +16 -10
- data/performance/c_profile.rb +23 -0
- data/performance.rb +6 -4
- data/rakelib/compile.rake +15 -0
- data/rakelib/integration_test.rake +43 -0
- data/rakelib/performance.rake +43 -0
- data/rakelib/rubocop.rake +6 -0
- data/rakelib/unit_test.rake +14 -0
- data/test/integration_test.rb +11 -0
- data/test/liquid_test_helper.rb +21 -0
- data/test/test_helper.rb +21 -2
- data/test/unit/block_test.rb +137 -0
- data/test/unit/context_test.rb +85 -0
- data/test/unit/expression_test.rb +191 -0
- data/test/unit/gc_stress_test.rb +28 -0
- data/test/unit/raw_test.rb +93 -0
- data/test/unit/resource_limits_test.rb +50 -0
- data/test/unit/tokenizer_test.rb +90 -20
- data/test/unit/variable_test.rb +279 -60
- metadata +60 -11
- data/test/liquid_test.rb +0 -11
@@ -0,0 +1,279 @@
|
|
1
|
+
#include "liquid.h"
|
2
|
+
#include "resource_limits.h"
|
3
|
+
|
4
|
+
VALUE cLiquidResourceLimits;
|
5
|
+
|
6
|
+
static void resource_limits_free(void *ptr)
|
7
|
+
{
|
8
|
+
resource_limits_t *resource_limits = ptr;
|
9
|
+
xfree(resource_limits);
|
10
|
+
}
|
11
|
+
|
12
|
+
static size_t resource_limits_memsize(const void *ptr)
|
13
|
+
{
|
14
|
+
return sizeof(resource_limits_t);
|
15
|
+
}
|
16
|
+
|
17
|
+
const rb_data_type_t resource_limits_data_type = {
|
18
|
+
"liquid_resource_limits",
|
19
|
+
{ NULL, resource_limits_free, resource_limits_memsize },
|
20
|
+
NULL, NULL, RUBY_TYPED_FREE_IMMEDIATELY
|
21
|
+
};
|
22
|
+
|
23
|
+
static void resource_limits_reset(resource_limits_t *resource_limit)
|
24
|
+
{
|
25
|
+
resource_limit->reached_limit = true;
|
26
|
+
resource_limit->last_capture_length = -1;
|
27
|
+
resource_limit->render_score = 0;
|
28
|
+
resource_limit->assign_score = 0;
|
29
|
+
}
|
30
|
+
|
31
|
+
static VALUE resource_limits_allocate(VALUE klass)
|
32
|
+
{
|
33
|
+
resource_limits_t *resource_limits;
|
34
|
+
|
35
|
+
VALUE obj = TypedData_Make_Struct(klass, resource_limits_t, &resource_limits_data_type, resource_limits);
|
36
|
+
|
37
|
+
resource_limits_reset(resource_limits);
|
38
|
+
|
39
|
+
return obj;
|
40
|
+
}
|
41
|
+
|
42
|
+
static VALUE resource_limits_render_length_limit_method(VALUE self)
|
43
|
+
{
|
44
|
+
resource_limits_t *resource_limits;
|
45
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
46
|
+
|
47
|
+
return LONG2NUM(resource_limits->render_length_limit);
|
48
|
+
}
|
49
|
+
|
50
|
+
static VALUE resource_limits_set_render_length_limit_method(VALUE self, VALUE render_length_limit)
|
51
|
+
{
|
52
|
+
resource_limits_t *resource_limits;
|
53
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
54
|
+
|
55
|
+
if (render_length_limit == Qnil) {
|
56
|
+
resource_limits->render_length_limit = LONG_MAX;
|
57
|
+
} else {
|
58
|
+
resource_limits->render_length_limit = NUM2LONG(render_length_limit);
|
59
|
+
}
|
60
|
+
|
61
|
+
return Qnil;
|
62
|
+
}
|
63
|
+
|
64
|
+
static VALUE resource_limits_render_score_limit_method(VALUE self)
|
65
|
+
{
|
66
|
+
resource_limits_t *resource_limits;
|
67
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
68
|
+
|
69
|
+
return LONG2NUM(resource_limits->render_score_limit);
|
70
|
+
}
|
71
|
+
|
72
|
+
static VALUE resource_limits_set_render_score_limit_method(VALUE self, VALUE render_score_limit)
|
73
|
+
{
|
74
|
+
resource_limits_t *resource_limits;
|
75
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
76
|
+
|
77
|
+
if (render_score_limit == Qnil) {
|
78
|
+
resource_limits->render_score_limit = LONG_MAX;
|
79
|
+
} else {
|
80
|
+
resource_limits->render_score_limit = NUM2LONG(render_score_limit);
|
81
|
+
}
|
82
|
+
|
83
|
+
return Qnil;
|
84
|
+
}
|
85
|
+
|
86
|
+
static VALUE resource_limits_assign_score_limit_method(VALUE self)
|
87
|
+
{
|
88
|
+
resource_limits_t *resource_limits;
|
89
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
90
|
+
|
91
|
+
return LONG2NUM(resource_limits->assign_score_limit);
|
92
|
+
}
|
93
|
+
|
94
|
+
static VALUE resource_limits_set_assign_score_limit_method(VALUE self, VALUE assign_score_limit)
|
95
|
+
{
|
96
|
+
resource_limits_t *resource_limits;
|
97
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
98
|
+
|
99
|
+
if (assign_score_limit == Qnil) {
|
100
|
+
resource_limits->assign_score_limit = LONG_MAX;
|
101
|
+
} else {
|
102
|
+
resource_limits->assign_score_limit = NUM2LONG(assign_score_limit);
|
103
|
+
}
|
104
|
+
|
105
|
+
return Qnil;
|
106
|
+
}
|
107
|
+
|
108
|
+
static VALUE resource_limits_render_score_method(VALUE self)
|
109
|
+
{
|
110
|
+
resource_limits_t *resource_limits;
|
111
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
112
|
+
|
113
|
+
return LONG2NUM(resource_limits->render_score);
|
114
|
+
}
|
115
|
+
|
116
|
+
static VALUE resource_limits_assign_score_method(VALUE self)
|
117
|
+
{
|
118
|
+
resource_limits_t *resource_limits;
|
119
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
120
|
+
|
121
|
+
return LONG2NUM(resource_limits->assign_score);
|
122
|
+
}
|
123
|
+
|
124
|
+
static VALUE resource_limits_initialize_method(VALUE self, VALUE render_length_limit,
|
125
|
+
VALUE render_score_limit, VALUE assign_score_limit)
|
126
|
+
{
|
127
|
+
resource_limits_set_render_length_limit_method(self, render_length_limit);
|
128
|
+
resource_limits_set_render_score_limit_method(self, render_score_limit);
|
129
|
+
resource_limits_set_assign_score_limit_method(self, assign_score_limit);
|
130
|
+
|
131
|
+
return Qnil;
|
132
|
+
}
|
133
|
+
|
134
|
+
__attribute__((noreturn))
|
135
|
+
void resource_limits_raise_limits_reached(resource_limits_t *resource_limit)
|
136
|
+
{
|
137
|
+
resource_limit->reached_limit = true;
|
138
|
+
rb_raise(cMemoryError, "Memory limits exceeded");
|
139
|
+
}
|
140
|
+
|
141
|
+
void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount)
|
142
|
+
{
|
143
|
+
resource_limits->render_score = resource_limits->render_score + amount;
|
144
|
+
|
145
|
+
if (resource_limits->render_score > resource_limits->render_score_limit) {
|
146
|
+
resource_limits_raise_limits_reached(resource_limits);
|
147
|
+
}
|
148
|
+
}
|
149
|
+
|
150
|
+
static VALUE resource_limits_increment_render_score_method(VALUE self, VALUE amount)
|
151
|
+
{
|
152
|
+
resource_limits_t *resource_limits;
|
153
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
154
|
+
|
155
|
+
resource_limits_increment_render_score(resource_limits, NUM2LONG(amount));
|
156
|
+
|
157
|
+
return Qnil;
|
158
|
+
}
|
159
|
+
|
160
|
+
static void resource_limits_increment_assign_score(resource_limits_t *resource_limits, long amount)
|
161
|
+
{
|
162
|
+
resource_limits->assign_score = resource_limits->assign_score + amount;
|
163
|
+
|
164
|
+
if (resource_limits->assign_score > resource_limits->assign_score_limit) {
|
165
|
+
resource_limits_raise_limits_reached(resource_limits);
|
166
|
+
}
|
167
|
+
}
|
168
|
+
|
169
|
+
static VALUE resource_limits_increment_assign_score_method(VALUE self, VALUE amount)
|
170
|
+
{
|
171
|
+
resource_limits_t *resource_limits;
|
172
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
173
|
+
|
174
|
+
resource_limits_increment_assign_score(resource_limits, NUM2LONG(amount));
|
175
|
+
|
176
|
+
return Qnil;
|
177
|
+
}
|
178
|
+
|
179
|
+
void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output)
|
180
|
+
{
|
181
|
+
long captured = RSTRING_LEN(output);
|
182
|
+
|
183
|
+
if (resource_limits->last_capture_length >= 0) {
|
184
|
+
long increment = captured - resource_limits->last_capture_length;
|
185
|
+
resource_limits->last_capture_length = captured;
|
186
|
+
resource_limits_increment_assign_score(resource_limits, increment);
|
187
|
+
} else if (captured > resource_limits->render_length_limit) {
|
188
|
+
resource_limits_raise_limits_reached(resource_limits);
|
189
|
+
}
|
190
|
+
}
|
191
|
+
|
192
|
+
static VALUE resource_limits_increment_write_score_method(VALUE self, VALUE output)
|
193
|
+
{
|
194
|
+
Check_Type(output, T_STRING);
|
195
|
+
|
196
|
+
resource_limits_t *resource_limits;
|
197
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
198
|
+
|
199
|
+
resource_limits_increment_write_score(resource_limits, output);
|
200
|
+
|
201
|
+
return Qnil;
|
202
|
+
}
|
203
|
+
|
204
|
+
static VALUE resource_limits_raise_limits_reached_method(VALUE self)
|
205
|
+
{
|
206
|
+
resource_limits_t *resource_limits;
|
207
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
208
|
+
|
209
|
+
resource_limits_raise_limits_reached(resource_limits);
|
210
|
+
}
|
211
|
+
|
212
|
+
static VALUE resource_limits_reached_method(VALUE self)
|
213
|
+
{
|
214
|
+
resource_limits_t *resource_limits;
|
215
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
216
|
+
|
217
|
+
return resource_limits->reached_limit ? Qtrue : Qfalse;
|
218
|
+
}
|
219
|
+
|
220
|
+
struct capture_ensure_t {
|
221
|
+
resource_limits_t *resource_limits;
|
222
|
+
long old_capture_length;
|
223
|
+
};
|
224
|
+
|
225
|
+
static VALUE capture_ensure(VALUE data)
|
226
|
+
{
|
227
|
+
struct capture_ensure_t *ensure_data = (struct capture_ensure_t *)data;
|
228
|
+
ensure_data->resource_limits->last_capture_length = ensure_data->old_capture_length;
|
229
|
+
|
230
|
+
return Qnil;
|
231
|
+
}
|
232
|
+
|
233
|
+
static VALUE resource_limits_with_capture_method(VALUE self)
|
234
|
+
{
|
235
|
+
resource_limits_t *resource_limits;
|
236
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
237
|
+
|
238
|
+
struct capture_ensure_t ensure_data = {
|
239
|
+
.resource_limits = resource_limits,
|
240
|
+
.old_capture_length = resource_limits->last_capture_length
|
241
|
+
};
|
242
|
+
|
243
|
+
resource_limits->last_capture_length = 0;
|
244
|
+
|
245
|
+
return rb_ensure(rb_yield, Qundef, capture_ensure, (VALUE)&ensure_data);
|
246
|
+
}
|
247
|
+
|
248
|
+
|
249
|
+
static VALUE resource_limits_reset_method(VALUE self)
|
250
|
+
{
|
251
|
+
resource_limits_t *resource_limits;
|
252
|
+
ResourceLimits_Get_Struct(self, resource_limits);
|
253
|
+
resource_limits_reset(resource_limits);
|
254
|
+
return Qnil;
|
255
|
+
}
|
256
|
+
|
257
|
+
void liquid_define_resource_limits(void)
|
258
|
+
{
|
259
|
+
cLiquidResourceLimits = rb_define_class_under(mLiquidC, "ResourceLimits", rb_cObject);
|
260
|
+
rb_global_variable(&cLiquidResourceLimits);
|
261
|
+
|
262
|
+
rb_define_alloc_func(cLiquidResourceLimits, resource_limits_allocate);
|
263
|
+
rb_define_method(cLiquidResourceLimits, "initialize", resource_limits_initialize_method, 3);
|
264
|
+
rb_define_method(cLiquidResourceLimits, "render_length_limit", resource_limits_render_length_limit_method, 0);
|
265
|
+
rb_define_method(cLiquidResourceLimits, "render_length_limit=", resource_limits_set_render_length_limit_method, 1);
|
266
|
+
rb_define_method(cLiquidResourceLimits, "render_score_limit", resource_limits_render_score_limit_method, 0);
|
267
|
+
rb_define_method(cLiquidResourceLimits, "render_score_limit=", resource_limits_set_render_score_limit_method, 1);
|
268
|
+
rb_define_method(cLiquidResourceLimits, "assign_score_limit", resource_limits_assign_score_limit_method, 0);
|
269
|
+
rb_define_method(cLiquidResourceLimits, "assign_score_limit=", resource_limits_set_assign_score_limit_method, 1);
|
270
|
+
rb_define_method(cLiquidResourceLimits, "render_score", resource_limits_render_score_method, 0);
|
271
|
+
rb_define_method(cLiquidResourceLimits, "assign_score", resource_limits_assign_score_method, 0);
|
272
|
+
rb_define_method(cLiquidResourceLimits, "increment_render_score", resource_limits_increment_render_score_method, 1);
|
273
|
+
rb_define_method(cLiquidResourceLimits, "increment_assign_score", resource_limits_increment_assign_score_method, 1);
|
274
|
+
rb_define_method(cLiquidResourceLimits, "increment_write_score", resource_limits_increment_write_score_method, 1);
|
275
|
+
rb_define_method(cLiquidResourceLimits, "raise_limits_reached", resource_limits_raise_limits_reached_method, 0);
|
276
|
+
rb_define_method(cLiquidResourceLimits, "reached?", resource_limits_reached_method, 0);
|
277
|
+
rb_define_method(cLiquidResourceLimits, "reset", resource_limits_reset_method, 0);
|
278
|
+
rb_define_method(cLiquidResourceLimits, "with_capture", resource_limits_with_capture_method, 0);
|
279
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef LIQUID_RESOURCE_LIMITS
|
2
|
+
#define LIQUID_RESOURCE_LIMITS
|
3
|
+
|
4
|
+
typedef struct resource_limits {
|
5
|
+
long render_length_limit;
|
6
|
+
long render_score_limit;
|
7
|
+
long assign_score_limit;
|
8
|
+
bool reached_limit;
|
9
|
+
long last_capture_length;
|
10
|
+
long render_score;
|
11
|
+
long assign_score;
|
12
|
+
} resource_limits_t;
|
13
|
+
|
14
|
+
extern VALUE cLiquidResourceLimits;
|
15
|
+
extern const rb_data_type_t resource_limits_data_type;
|
16
|
+
#define ResourceLimits_Get_Struct(obj, sval) TypedData_Get_Struct(obj, resource_limits_t, &resource_limits_data_type, sval)
|
17
|
+
|
18
|
+
void liquid_define_resource_limits(void);
|
19
|
+
void resource_limits_raise_limits_reached(resource_limits_t *resource_limit);
|
20
|
+
void resource_limits_increment_render_score(resource_limits_t *resource_limits, long amount);
|
21
|
+
void resource_limits_increment_write_score(resource_limits_t *resource_limits, VALUE output);
|
22
|
+
|
23
|
+
#endif
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#if !defined(LIQUID_UTIL_H)
|
2
|
+
#define LIQUID_UTIL_H
|
3
|
+
|
4
|
+
inline static const char *read_while(const char *start, const char *end, int (func)(int))
|
5
|
+
{
|
6
|
+
while (start < end && func((unsigned char) *start)) start++;
|
7
|
+
return start;
|
8
|
+
}
|
9
|
+
|
10
|
+
inline static const char *read_while_reverse(const char *start, const char *end, int (func)(int))
|
11
|
+
{
|
12
|
+
end--;
|
13
|
+
while (start <= end && func((unsigned char) *end)) end--;
|
14
|
+
end++;
|
15
|
+
return end;
|
16
|
+
}
|
17
|
+
|
18
|
+
inline static int count_newlines(const char *start, const char *end)
|
19
|
+
{
|
20
|
+
int count = 0;
|
21
|
+
while (start < end) {
|
22
|
+
if (*start == '\n') count++;
|
23
|
+
start++;
|
24
|
+
}
|
25
|
+
return count;
|
26
|
+
}
|
27
|
+
|
28
|
+
inline static int is_non_newline_space(int c)
|
29
|
+
{
|
30
|
+
return rb_isspace(c) && c != '\n';
|
31
|
+
}
|
32
|
+
|
33
|
+
inline static int not_newline(int c)
|
34
|
+
{
|
35
|
+
return c != '\n';
|
36
|
+
}
|
37
|
+
|
38
|
+
inline static bool is_word_char(char c)
|
39
|
+
{
|
40
|
+
return ISALNUM(c) || c == '_';
|
41
|
+
}
|
42
|
+
|
43
|
+
#endif
|
44
|
+
|
data/ext/liquid_c/tokenizer.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
+
#include <assert.h>
|
1
2
|
#include "liquid.h"
|
2
3
|
#include "tokenizer.h"
|
4
|
+
#include "stringutil.h"
|
3
5
|
|
4
6
|
VALUE cLiquidTokenizer;
|
5
7
|
|
@@ -35,40 +37,89 @@ static VALUE tokenizer_allocate(VALUE klass)
|
|
35
37
|
|
36
38
|
obj = TypedData_Make_Struct(klass, tokenizer_t, &tokenizer_data_type, tokenizer);
|
37
39
|
tokenizer->source = Qnil;
|
40
|
+
tokenizer->bug_compatible_whitespace_trimming = false;
|
41
|
+
tokenizer->raw_tag_body = NULL;
|
42
|
+
tokenizer->raw_tag_body_len = 0;
|
38
43
|
return obj;
|
39
44
|
}
|
40
45
|
|
41
|
-
static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE
|
46
|
+
static VALUE tokenizer_initialize_method(VALUE self, VALUE source, VALUE start_line_number, VALUE for_liquid_tag)
|
42
47
|
{
|
43
48
|
tokenizer_t *tokenizer;
|
44
49
|
|
45
50
|
Check_Type(source, T_STRING);
|
51
|
+
check_utf8_encoding(source, "source");
|
52
|
+
|
53
|
+
#define MAX_SOURCE_CODE_BYTES ((1 << 24) - 1)
|
54
|
+
if (RSTRING_LEN(source) > MAX_SOURCE_CODE_BYTES) {
|
55
|
+
rb_enc_raise(utf8_encoding, rb_eArgError, "Source too large, max %d bytes", MAX_SOURCE_CODE_BYTES);
|
56
|
+
}
|
57
|
+
#undef MAX_SOURCE_CODE_BYTES
|
58
|
+
|
46
59
|
Tokenizer_Get_Struct(self, tokenizer);
|
47
60
|
source = rb_str_dup_frozen(source);
|
48
61
|
tokenizer->source = source;
|
49
62
|
tokenizer->cursor = RSTRING_PTR(source);
|
50
|
-
tokenizer->
|
51
|
-
tokenizer->lstrip_flag =
|
63
|
+
tokenizer->cursor_end = tokenizer->cursor + RSTRING_LEN(source);
|
64
|
+
tokenizer->lstrip_flag = false;
|
52
65
|
// tokenizer->line_number keeps track of the current line number or it is 0
|
53
66
|
// to indicate that line numbers aren't being calculated
|
54
|
-
tokenizer->line_number =
|
67
|
+
tokenizer->line_number = FIX2UINT(start_line_number);
|
68
|
+
tokenizer->for_liquid_tag = RTEST(for_liquid_tag);
|
55
69
|
return Qnil;
|
56
70
|
}
|
57
71
|
|
58
|
-
|
72
|
+
// Internal function to setup an existing tokenizer from C for a liquid tag.
|
73
|
+
// This overwrites the passed in tokenizer, so a copy of the struct should
|
74
|
+
// be used to reset the tokenizer after parsing the liquid tag.
|
75
|
+
void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number)
|
59
76
|
{
|
60
|
-
|
61
|
-
|
62
|
-
|
77
|
+
tokenizer->cursor = cursor;
|
78
|
+
tokenizer->cursor_end = cursor_end;
|
79
|
+
tokenizer->lstrip_flag = false;
|
80
|
+
tokenizer->line_number = line_number;
|
81
|
+
tokenizer->for_liquid_tag = true;
|
82
|
+
}
|
83
|
+
|
84
|
+
// Tokenizes contents of {% liquid ... %}
|
85
|
+
static void tokenizer_next_for_liquid_tag(tokenizer_t *tokenizer, token_t *token)
|
86
|
+
{
|
87
|
+
const char *end = tokenizer->cursor_end;
|
88
|
+
const char *start = tokenizer->cursor;
|
89
|
+
const char *start_trimmed = read_while(start, end, is_non_newline_space);
|
90
|
+
|
91
|
+
token->str_full = start;
|
92
|
+
token->str_trimmed = start_trimmed;
|
93
|
+
|
94
|
+
const char *end_full = read_while(start_trimmed, end, not_newline);
|
95
|
+
if (end_full < end) {
|
96
|
+
tokenizer->cursor = end_full + 1;
|
97
|
+
if (tokenizer->line_number)
|
98
|
+
tokenizer->line_number++;
|
99
|
+
} else {
|
100
|
+
tokenizer->cursor = end_full;
|
63
101
|
}
|
64
102
|
|
103
|
+
const char *end_trimmed = read_while_reverse(start_trimmed, end_full, rb_isspace);
|
104
|
+
|
105
|
+
token->len_trimmed = end_trimmed - start_trimmed;
|
106
|
+
token->len_full = end_full - token->str_full;
|
107
|
+
|
108
|
+
if (token->len_trimmed == 0) {
|
109
|
+
token->type = TOKEN_BLANK_LIQUID_TAG_LINE;
|
110
|
+
} else {
|
111
|
+
token->type = TOKEN_TAG;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
// Tokenizes contents of a full Liquid template
|
116
|
+
static void tokenizer_next_for_template(tokenizer_t *tokenizer, token_t *token)
|
117
|
+
{
|
65
118
|
const char *cursor = tokenizer->cursor;
|
66
|
-
const char *last =
|
119
|
+
const char *last = tokenizer->cursor_end - 1;
|
67
120
|
|
68
|
-
token->
|
121
|
+
token->str_full = cursor;
|
69
122
|
token->type = TOKEN_RAW;
|
70
|
-
token->lstrip = 0;
|
71
|
-
token->rstrip = 0;
|
72
123
|
|
73
124
|
while (cursor < last) {
|
74
125
|
if (*cursor++ != '{')
|
@@ -78,17 +129,17 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
78
129
|
if (c != '%' && c != '{')
|
79
130
|
continue;
|
80
131
|
if (cursor <= last && *cursor == '-') {
|
81
|
-
|
82
|
-
|
132
|
+
cursor++;
|
133
|
+
token->rstrip = 1;
|
83
134
|
}
|
84
135
|
if (cursor - tokenizer->cursor > (ptrdiff_t)(2 + token->rstrip)) {
|
85
136
|
token->type = TOKEN_RAW;
|
86
137
|
cursor -= 2 + token->rstrip;
|
87
138
|
token->lstrip = tokenizer->lstrip_flag;
|
88
|
-
tokenizer->lstrip_flag =
|
139
|
+
tokenizer->lstrip_flag = false;
|
89
140
|
goto found;
|
90
141
|
}
|
91
|
-
tokenizer->lstrip_flag =
|
142
|
+
tokenizer->lstrip_flag = false;
|
92
143
|
token->type = TOKEN_INVALID;
|
93
144
|
token->lstrip = token->rstrip;
|
94
145
|
token->rstrip = 0;
|
@@ -103,12 +154,12 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
103
154
|
continue;
|
104
155
|
token->type = TOKEN_TAG;
|
105
156
|
if(cursor[-3] == '-')
|
106
|
-
token->rstrip = tokenizer->lstrip_flag =
|
157
|
+
token->rstrip = tokenizer->lstrip_flag = true;
|
107
158
|
goto found;
|
108
159
|
}
|
109
160
|
// unterminated tag
|
110
161
|
cursor = tokenizer->cursor + 2;
|
111
|
-
tokenizer->lstrip_flag =
|
162
|
+
tokenizer->lstrip_flag = false;
|
112
163
|
goto found;
|
113
164
|
} else {
|
114
165
|
while (cursor < last) {
|
@@ -121,31 +172,51 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
|
121
172
|
}
|
122
173
|
token->type = TOKEN_VARIABLE;
|
123
174
|
if(cursor[-3] == '-')
|
124
|
-
token->rstrip = tokenizer->lstrip_flag =
|
175
|
+
token->rstrip = tokenizer->lstrip_flag = true;
|
125
176
|
goto found;
|
126
177
|
}
|
127
178
|
// unterminated variable
|
128
179
|
cursor = tokenizer->cursor + 2;
|
129
|
-
tokenizer->lstrip_flag =
|
180
|
+
tokenizer->lstrip_flag = false;
|
130
181
|
goto found;
|
131
182
|
}
|
132
183
|
}
|
133
184
|
cursor = last + 1;
|
134
185
|
token->lstrip = tokenizer->lstrip_flag;
|
135
|
-
tokenizer->lstrip_flag =
|
186
|
+
tokenizer->lstrip_flag = false;
|
136
187
|
found:
|
137
|
-
token->
|
138
|
-
|
139
|
-
|
188
|
+
token->len_full = cursor - token->str_full;
|
189
|
+
|
190
|
+
token->str_trimmed = token->str_full;
|
191
|
+
token->len_trimmed = token->len_full;
|
192
|
+
|
193
|
+
if (token->type == TOKEN_VARIABLE || token->type == TOKEN_TAG) {
|
194
|
+
token->str_trimmed += 2 + token->lstrip;
|
195
|
+
token->len_trimmed -= 2 + token->lstrip + 2;
|
196
|
+
if (token->rstrip && token->len_trimmed)
|
197
|
+
token->len_trimmed--;
|
198
|
+
}
|
199
|
+
|
200
|
+
assert(token->len_trimmed >= 0);
|
201
|
+
|
202
|
+
tokenizer->cursor += token->len_full;
|
140
203
|
|
141
204
|
if (tokenizer->line_number) {
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
205
|
+
tokenizer->line_number += count_newlines(token->str_full, token->str_full + token->len_full);
|
206
|
+
}
|
207
|
+
}
|
208
|
+
|
209
|
+
void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
|
210
|
+
{
|
211
|
+
memset(token, 0, sizeof(*token));
|
212
|
+
|
213
|
+
if (tokenizer->cursor >= tokenizer->cursor_end) {
|
214
|
+
return;
|
215
|
+
}
|
216
|
+
if (tokenizer->for_liquid_tag) {
|
217
|
+
tokenizer_next_for_liquid_tag(tokenizer, token);
|
218
|
+
} else {
|
219
|
+
tokenizer_next_for_template(tokenizer, token);
|
149
220
|
}
|
150
221
|
}
|
151
222
|
|
@@ -159,7 +230,25 @@ static VALUE tokenizer_shift_method(VALUE self)
|
|
159
230
|
if (!token.type)
|
160
231
|
return Qnil;
|
161
232
|
|
162
|
-
|
233
|
+
// When sent back to Ruby, tokens are the raw string including whitespace
|
234
|
+
// and tag delimiters. It should be possible to reconstruct the exact
|
235
|
+
// template from the tokens.
|
236
|
+
return rb_enc_str_new(token.str_full, token.len_full, utf8_encoding);
|
237
|
+
}
|
238
|
+
|
239
|
+
static VALUE tokenizer_shift_trimmed_method(VALUE self)
|
240
|
+
{
|
241
|
+
tokenizer_t *tokenizer;
|
242
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
243
|
+
|
244
|
+
token_t token;
|
245
|
+
tokenizer_next(tokenizer, &token);
|
246
|
+
if (!token.type)
|
247
|
+
return Qnil;
|
248
|
+
|
249
|
+
// This method doesn't include whitespace and tag delimiters. It allows for
|
250
|
+
// testing the output of tokenizer_next as used by rb_block_parse.
|
251
|
+
return rb_enc_str_new(token.str_trimmed, token.len_trimmed, utf8_encoding);
|
163
252
|
}
|
164
253
|
|
165
254
|
static VALUE tokenizer_line_number_method(VALUE self)
|
@@ -173,12 +262,37 @@ static VALUE tokenizer_line_number_method(VALUE self)
|
|
173
262
|
return UINT2NUM(tokenizer->line_number);
|
174
263
|
}
|
175
264
|
|
176
|
-
|
265
|
+
static VALUE tokenizer_for_liquid_tag_method(VALUE self)
|
266
|
+
{
|
267
|
+
tokenizer_t *tokenizer;
|
268
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
269
|
+
|
270
|
+
return tokenizer->for_liquid_tag ? Qtrue : Qfalse;
|
271
|
+
}
|
272
|
+
|
273
|
+
|
274
|
+
// Temporary to test rollout of the fix for this bug
|
275
|
+
static VALUE tokenizer_bug_compatible_whitespace_trimming(VALUE self) {
|
276
|
+
tokenizer_t *tokenizer;
|
277
|
+
Tokenizer_Get_Struct(self, tokenizer);
|
278
|
+
|
279
|
+
tokenizer->bug_compatible_whitespace_trimming = true;
|
280
|
+
return Qnil;
|
281
|
+
}
|
282
|
+
|
283
|
+
void liquid_define_tokenizer(void)
|
177
284
|
{
|
178
285
|
cLiquidTokenizer = rb_define_class_under(mLiquidC, "Tokenizer", rb_cObject);
|
286
|
+
rb_global_variable(&cLiquidTokenizer);
|
287
|
+
|
179
288
|
rb_define_alloc_func(cLiquidTokenizer, tokenizer_allocate);
|
180
|
-
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method,
|
181
|
-
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
|
289
|
+
rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 3);
|
182
290
|
rb_define_method(cLiquidTokenizer, "line_number", tokenizer_line_number_method, 0);
|
291
|
+
rb_define_method(cLiquidTokenizer, "for_liquid_tag", tokenizer_for_liquid_tag_method, 0);
|
292
|
+
rb_define_method(cLiquidTokenizer, "bug_compatible_whitespace_trimming!", tokenizer_bug_compatible_whitespace_trimming, 0);
|
293
|
+
rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
|
294
|
+
|
295
|
+
// For testing the internal token representation.
|
296
|
+
rb_define_private_method(cLiquidTokenizer, "shift_trimmed", tokenizer_shift_trimmed_method, 0);
|
183
297
|
}
|
184
298
|
|
data/ext/liquid_c/tokenizer.h
CHANGED
@@ -6,31 +6,42 @@ enum token_type {
|
|
6
6
|
TOKEN_INVALID,
|
7
7
|
TOKEN_RAW,
|
8
8
|
TOKEN_TAG,
|
9
|
-
TOKEN_VARIABLE
|
9
|
+
TOKEN_VARIABLE,
|
10
|
+
TOKEN_BLANK_LIQUID_TAG_LINE
|
10
11
|
};
|
11
12
|
|
12
13
|
typedef struct token {
|
13
14
|
enum token_type type;
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
|
16
|
+
// str_trimmed contains no tag delimiters
|
17
|
+
const char *str_trimmed, *str_full;
|
18
|
+
long len_trimmed, len_full;
|
19
|
+
|
20
|
+
bool lstrip, rstrip;
|
18
21
|
} token_t;
|
19
22
|
|
20
23
|
typedef struct tokenizer {
|
21
24
|
VALUE source;
|
22
|
-
const char *cursor;
|
23
|
-
long length;
|
25
|
+
const char *cursor, *cursor_end;
|
24
26
|
unsigned int line_number;
|
25
|
-
|
27
|
+
bool lstrip_flag;
|
28
|
+
bool for_liquid_tag;
|
29
|
+
|
30
|
+
// Temporary to test rollout of the fix for this bug
|
31
|
+
bool bug_compatible_whitespace_trimming;
|
32
|
+
|
33
|
+
char *raw_tag_body;
|
34
|
+
unsigned int raw_tag_body_len;
|
26
35
|
} tokenizer_t;
|
27
36
|
|
28
37
|
extern VALUE cLiquidTokenizer;
|
29
38
|
extern const rb_data_type_t tokenizer_data_type;
|
30
39
|
#define Tokenizer_Get_Struct(obj, sval) TypedData_Get_Struct(obj, tokenizer_t, &tokenizer_data_type, sval)
|
31
40
|
|
32
|
-
void
|
41
|
+
void liquid_define_tokenizer(void);
|
33
42
|
void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
|
34
43
|
|
44
|
+
void tokenizer_setup_for_liquid_tag(tokenizer_t *tokenizer, const char *cursor, const char *cursor_end, int line_number);
|
45
|
+
|
35
46
|
#endif
|
36
47
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
#include "usage.h"
|
2
|
+
|
3
|
+
static VALUE cLiquidUsage;
|
4
|
+
static ID id_increment;
|
5
|
+
|
6
|
+
void usage_increment(const char *name)
|
7
|
+
{
|
8
|
+
VALUE name_str = rb_str_new_cstr(name);
|
9
|
+
rb_funcall(cLiquidUsage, id_increment, 1, name_str);
|
10
|
+
}
|
11
|
+
|
12
|
+
void liquid_define_usage(void)
|
13
|
+
{
|
14
|
+
cLiquidUsage = rb_const_get(mLiquid, rb_intern("Usage"));
|
15
|
+
rb_global_variable(&cLiquidUsage);
|
16
|
+
|
17
|
+
id_increment = rb_intern("increment");
|
18
|
+
}
|