liquid-c 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f1e4b7a91ef500fc48563ecdff875658c045b0e
4
- data.tar.gz: 460cbca81b9ff70a9c20e4051cdc62a26a1e2c7b
3
+ metadata.gz: ceaa0d7db7673b4b6e73fb7e669a2728dbafb1b3
4
+ data.tar.gz: 018116a6bedfc9305a3e64aaa6624c4dfb52d7b3
5
5
  SHA512:
6
- metadata.gz: 1ecf742609e18e26a982897562f3ff0ef0c06e77045d12b98ad36fda3081e4350faffb9bae860ce1ecc060d8c6138b7fd482c323d763538dc38b13bc5738d1f8
7
- data.tar.gz: 817638febdcb9a54399ab8aec528091b1c24f66510816e455f09b85bce9b2c1f7a5e559a89ab5611327b1d0f780c9d62fcb36ad0b231eaba35554f551bcdc80e
6
+ metadata.gz: 8a415cb28d0ad601e63e5ae6e979e762caee118f8019063383da5e24a34034d6b22e0973f0efa16f747b858ea2520299681ae9b32010c547e79877227a3b9cdd
7
+ data.tar.gz: 1913e1d901f48241a36d25b5eb4a72c6c1e61ccc98f17494d7d75d4e613bac0dc251b49957472abe1f410ce62575817906023182e3c01d64dc7af0ae5a934bb3
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem 'liquid', github: 'Shopify/liquid', branch: 'master'
6
6
 
7
7
  group :test do
8
8
  gem 'spy', '0.4.1'
9
+ gem 'benchmark-ips'
9
10
  end
10
11
 
11
12
  group :development do
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # Liquid::C
2
- [![Build Status](https://api.travis-ci.org/Shopify/liquid-c.png?branch=master)](https://travis-ci.org/Shopify/liquid-c)
2
+ [![Build Status](https://travis-ci.org/Shopify/liquid-c.svg?branch=master)](https://travis-ci.org/Shopify/liquid-c)
3
3
 
4
4
  Partial native implementation of the liquid ruby gem in C.
5
5
 
data/Rakefile CHANGED
@@ -59,18 +59,11 @@ namespace :profile do
59
59
  end
60
60
 
61
61
  namespace :compare do
62
- include Benchmark
63
- desc "Compare Liquid to Liquid + Liquid-C"
64
- task :run do
65
- bare = Benchmark.measure do
66
- ruby "./performance.rb bare profile lax"
67
- end
68
- liquid_c = Benchmark.measure do
69
- ruby "./performance.rb c profile lax"
70
- end
71
- Benchmark.benchmark(CAPTION, 10, FORMAT, "Liquid:", "Liquid-C:") do |x|
72
- [bare, liquid_c]
62
+ %w(lax warn strict).each do |type|
63
+ desc "Compare Liquid to Liquid-C in #{type} mode"
64
+ task type.to_sym do
65
+ ruby "./performance.rb bare benchmark #{type}"
66
+ ruby "./performance.rb c benchmark #{type}"
73
67
  end
74
- puts "Ratio: #{liquid_c.real / bare.real * 100}%"
75
68
  end
76
69
  end
@@ -0,0 +1,122 @@
1
+ #include "liquid.h"
2
+ #include "tokenizer.h"
3
+ #include <stdio.h>
4
+
5
+ static ID
6
+ intern_raise_missing_variable_terminator,
7
+ intern_raise_missing_tag_terminator,
8
+ intern_nodelist,
9
+ intern_blank,
10
+ intern_is_blank,
11
+ intern_clear,
12
+ intern_tags,
13
+ intern_parse,
14
+ intern_square_brackets;
15
+
16
+ static int is_id(int c)
17
+ {
18
+ return rb_isalnum(c) || c == '_';
19
+ }
20
+
21
+ inline static const char *read_while(const char *start, const char *end, int (func)(int))
22
+ {
23
+ while (start < end && func((unsigned char) *start)) start++;
24
+ return start;
25
+ }
26
+
27
+ static VALUE rb_block_parse(VALUE self, VALUE tokens, VALUE options)
28
+ {
29
+ tokenizer_t *tokenizer;
30
+ Tokenizer_Get_Struct(tokens, tokenizer);
31
+
32
+ token_t token;
33
+ VALUE tags = Qnil;
34
+ VALUE nodelist = rb_ivar_get(self, intern_nodelist);
35
+
36
+ while (true) {
37
+ tokenizer_next(tokenizer, &token);
38
+
39
+ switch (token.type) {
40
+ case TOKENIZER_TOKEN_NONE:
41
+ return rb_yield_values(2, Qnil, Qnil);
42
+
43
+ case TOKEN_INVALID:
44
+ {
45
+ VALUE str = rb_enc_str_new(token.str, token.length, utf8_encoding);
46
+
47
+ ID raise_method_id = intern_raise_missing_variable_terminator;
48
+ if (token.str[1] == '%') raise_method_id = intern_raise_missing_tag_terminator;
49
+
50
+ return rb_funcall(self, raise_method_id, 2, str, options);
51
+ }
52
+ case TOKEN_RAW:
53
+ {
54
+ VALUE str = rb_enc_str_new(token.str, token.length, utf8_encoding);
55
+ rb_ary_push(nodelist, str);
56
+
57
+ if (rb_ivar_get(self, intern_blank) == Qtrue) {
58
+ const char *end = token.str + token.length;
59
+
60
+ if (read_while(token.str, end, rb_isspace) < end)
61
+ rb_ivar_set(self, intern_blank, Qfalse);
62
+ }
63
+ break;
64
+ }
65
+ case TOKEN_VARIABLE:
66
+ {
67
+ VALUE args[2] = {rb_enc_str_new(token.str + 2, token.length - 4, utf8_encoding), options};
68
+ VALUE var = rb_class_new_instance(2, args, cLiquidVariable);
69
+ rb_ary_push(nodelist, var);
70
+ rb_ivar_set(self, intern_blank, Qfalse);
71
+ break;
72
+ }
73
+ case TOKEN_TAG:
74
+ {
75
+ const char *start = token.str + 2, *end = token.str + token.length - 2;
76
+
77
+ // Imitate \s*(\w+)\s*(.*)? regex
78
+ const char *name_start = read_while(start, end, rb_isspace);
79
+ const char *name_end = read_while(name_start, end, is_id);
80
+
81
+ VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding);
82
+
83
+ if (tags == Qnil)
84
+ tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
85
+
86
+ VALUE tag_class = rb_funcall(tags, intern_square_brackets, 1, tag_name);
87
+
88
+ const char *markup_start = read_while(name_end, end, rb_isspace);
89
+ VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding);
90
+
91
+ if (tag_class == Qnil)
92
+ return rb_yield_values(2, tag_name, markup);
93
+
94
+ VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, tokens, options);
95
+
96
+ if (rb_ivar_get(self, intern_blank) == Qtrue && !RTEST(rb_funcall(new_tag, intern_is_blank, 0)))
97
+ rb_ivar_set(self, intern_blank, Qfalse);
98
+
99
+ rb_ary_push(nodelist, new_tag);
100
+ break;
101
+ }
102
+ }
103
+ }
104
+ return Qnil;
105
+ }
106
+
107
+ void init_liquid_block()
108
+ {
109
+ intern_raise_missing_variable_terminator = rb_intern("raise_missing_variable_terminator");
110
+ intern_raise_missing_tag_terminator = rb_intern("raise_missing_tag_terminator");
111
+ intern_nodelist = rb_intern("@nodelist");
112
+ intern_blank = rb_intern("@blank");
113
+ intern_is_blank = rb_intern("blank?");
114
+ intern_clear = rb_intern("clear");
115
+ intern_tags = rb_intern("tags");
116
+ intern_parse = rb_intern("parse");
117
+ intern_square_brackets = rb_intern("[]");
118
+
119
+ VALUE cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
120
+ rb_define_method(cLiquidBlockBody, "c_parse", rb_block_parse, 2);
121
+ }
122
+
@@ -0,0 +1,7 @@
1
+ #if !defined(LIQUID_BLOCK_H)
2
+ #define LIQUID_BLOCK_H
3
+
4
+ void init_liquid_block();
5
+
6
+ #endif
7
+
@@ -0,0 +1,148 @@
1
+ #include "liquid.h"
2
+ #include "lexer.h"
3
+ #include <stdio.h>
4
+
5
+ const char *symbol_names[TOKEN_END] = {
6
+ [TOKEN_NONE] = "none",
7
+ [TOKEN_COMPARISON] = "comparison",
8
+ [TOKEN_STRING] = "string",
9
+ [TOKEN_NUMBER] = "number",
10
+ [TOKEN_IDENTIFIER] = "id",
11
+ [TOKEN_DOTDOT] = "dotdot",
12
+ [TOKEN_EOS] = "end_of_string",
13
+ [TOKEN_PIPE] = "pipe",
14
+ [TOKEN_DOT] = "dot",
15
+ [TOKEN_COLON] = "colon",
16
+ [TOKEN_COMMA] = "comma",
17
+ [TOKEN_OPEN_SQUARE] = "open_square",
18
+ [TOKEN_CLOSE_SQUARE] = "close_square",
19
+ [TOKEN_OPEN_ROUND] = "open_round",
20
+ [TOKEN_CLOSE_ROUND] = "close_round",
21
+ [TOKEN_QUESTION] = "question",
22
+ [TOKEN_DASH] = "dash"
23
+ };
24
+
25
+ inline static int is_identifier(char c)
26
+ {
27
+ return ISALNUM(c) || c == '_' || c == '-';
28
+ }
29
+
30
+ inline static int is_special(char c)
31
+ {
32
+ switch (c) {
33
+ case '|': case '.': case ':': case ',':
34
+ case '[': case ']': case '(': case ')':
35
+ case '?': case '-':
36
+ return 1;
37
+ }
38
+ return 0;
39
+ }
40
+
41
+ // Returns a pointer to the character after the end of the match.
42
+ inline static const char *prefix_end(const char *cur, const char *end, const char *pattern)
43
+ {
44
+ size_t pattern_len = strlen(pattern);
45
+
46
+ if (pattern_len > (size_t)(end - cur)) return NULL;
47
+ if (memcmp(cur, pattern, pattern_len) != 0) return NULL;
48
+
49
+ return cur + pattern_len;
50
+ }
51
+
52
+ inline static const char *scan_past(const char *cur, const char *end, char target)
53
+ {
54
+ const char *match = memchr(cur + 1, target, end - cur - 1);
55
+ return match ? match + 1 : NULL;
56
+ }
57
+
58
+ #define RETURN_TOKEN(t, n) { \
59
+ const char *tok_end = str + (n); \
60
+ token->type = (t); \
61
+ token->val = str; \
62
+ if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \
63
+ if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \
64
+ return (token->val_end = tok_end); \
65
+ }
66
+
67
+ // Reads one token from start, and fills it into the token argument.
68
+ // Returns the start of the next token if any, otherwise the end of the string.
69
+ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
70
+ {
71
+ // str references the start of the token, after whitespace is skipped.
72
+ // cur references the currently processing character during iterative lexing.
73
+ const char *str = start, *cur;
74
+
75
+ while (str < end && ISSPACE(*str)) ++str;
76
+
77
+ token->val = token->val_end = NULL;
78
+ token->flags = 0;
79
+
80
+ if (str >= end) return str;
81
+
82
+ char c = *str; // First character of the token.
83
+ char cn = '\0'; // Second character if available, for lookahead.
84
+ if (str + 1 < end) cn = str[1];
85
+
86
+ switch (c) {
87
+ case '<':
88
+ RETURN_TOKEN(TOKEN_COMPARISON, cn == '>' || cn == '=' ? 2 : 1);
89
+ case '>':
90
+ RETURN_TOKEN(TOKEN_COMPARISON, cn == '=' ? 2 : 1);
91
+ case '=':
92
+ case '!':
93
+ if (cn == '=') RETURN_TOKEN(TOKEN_COMPARISON, 2);
94
+ break;
95
+ case '.':
96
+ if (cn == '.') RETURN_TOKEN(TOKEN_DOTDOT, 2);
97
+ break;
98
+ }
99
+
100
+ if ((cur = prefix_end(str, end, "contains")))
101
+ RETURN_TOKEN(TOKEN_COMPARISON, cur - str);
102
+
103
+ if (c == '\'' || c == '"') {
104
+ cur = scan_past(str, end, c);
105
+
106
+ if (cur) {
107
+ // Quote was properly terminated.
108
+ RETURN_TOKEN(TOKEN_STRING, cur - str);
109
+ }
110
+ }
111
+
112
+ if (ISDIGIT(c) || c == '-') {
113
+ int has_dot = 0;
114
+ cur = str;
115
+ while (++cur < end) {
116
+ if (!has_dot && *cur == '.') {
117
+ has_dot = 1;
118
+ } else if (!ISDIGIT(*cur)) {
119
+ break;
120
+ }
121
+ }
122
+ cur--; // Point to last digit (or dot).
123
+
124
+ if (*cur == '.') {
125
+ cur--; // Ignore any trailing dot.
126
+ has_dot = 0;
127
+ }
128
+ if (*cur != '-') {
129
+ if (has_dot) token->flags |= TOKEN_FLOAT_NUMBER;
130
+ RETURN_TOKEN(TOKEN_NUMBER, cur + 1 - str);
131
+ }
132
+ }
133
+
134
+ if (ISALPHA(c) || c == '_') {
135
+ cur = str;
136
+ while (++cur < end && is_identifier(*cur)) {}
137
+ if (cur < end && *cur == '?') cur++;
138
+ RETURN_TOKEN(TOKEN_IDENTIFIER, cur - str);
139
+ }
140
+
141
+ if (is_special(c)) RETURN_TOKEN(c, 1);
142
+
143
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
144
+ return NULL;
145
+ }
146
+
147
+ #undef RETURN_TOKEN
148
+
@@ -0,0 +1,46 @@
1
+ #if !defined(LIQUID_LEXER_H)
2
+ #define LIQUID_LEXER_H
3
+
4
+ enum lexer_token_type {
5
+ TOKEN_NONE,
6
+ TOKEN_COMPARISON,
7
+ TOKEN_STRING,
8
+ TOKEN_NUMBER,
9
+ TOKEN_IDENTIFIER,
10
+ TOKEN_DOTDOT,
11
+ TOKEN_EOS,
12
+
13
+ TOKEN_PIPE = '|',
14
+ TOKEN_DOT = '.',
15
+ TOKEN_COLON = ':',
16
+ TOKEN_COMMA = ',',
17
+ TOKEN_OPEN_SQUARE = '[',
18
+ TOKEN_CLOSE_SQUARE = ']',
19
+ TOKEN_OPEN_ROUND = '(',
20
+ TOKEN_CLOSE_ROUND = ')',
21
+ TOKEN_QUESTION = '?',
22
+ TOKEN_DASH = '-',
23
+
24
+ TOKEN_END = 256
25
+ };
26
+
27
+ #define TOKEN_SPACE_PREFIX 0x1
28
+ #define TOKEN_SPACE_SUFFIX 0x2
29
+ #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX)
30
+ #define TOKEN_FLOAT_NUMBER 0x4
31
+
32
+ typedef struct lexer_token {
33
+ unsigned char type, flags;
34
+ const char *val, *val_end;
35
+ } lexer_token_t;
36
+
37
+ extern const char *symbol_names[TOKEN_END];
38
+
39
+ const char *lex_one(const char *str, const char *end, lexer_token_t *token);
40
+
41
+ inline static VALUE token_to_rstr(lexer_token_t token) {
42
+ return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding);
43
+ }
44
+
45
+ #endif
46
+
@@ -1,12 +1,24 @@
1
1
  #include "liquid.h"
2
2
  #include "tokenizer.h"
3
+ #include "variable.h"
4
+ #include "lexer.h"
5
+ #include "parser.h"
6
+ #include "block.h"
3
7
 
4
- VALUE mLiquid;
8
+ VALUE mLiquid, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
5
9
  rb_encoding *utf8_encoding;
6
10
 
7
11
  void Init_liquid_c(void)
8
12
  {
9
13
  utf8_encoding = rb_utf8_encoding();
10
14
  mLiquid = rb_define_module("Liquid");
15
+ cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError"));
16
+ cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable"));
17
+ cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template"));
18
+
11
19
  init_liquid_tokenizer();
20
+ init_liquid_parser();
21
+ init_liquid_variable();
22
+ init_liquid_block();
12
23
  }
24
+
@@ -5,7 +5,8 @@
5
5
  #include <ruby/encoding.h>
6
6
  #include <stdbool.h>
7
7
 
8
- extern VALUE mLiquid;
8
+ extern VALUE mLiquid, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
9
9
  extern rb_encoding *utf8_encoding;
10
10
 
11
11
  #endif
12
+
@@ -0,0 +1,198 @@
1
+ #include "liquid.h"
2
+ #include "parser.h"
3
+ #include "lexer.h"
4
+
5
+ static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, symBlank, symEmpty;
6
+ static ID idToI, idEvaluate;
7
+
8
+ void init_parser(parser_t *p, const char *str, const char *end)
9
+ {
10
+ p->str_end = end;
11
+ p->cur.type = p->next.type = TOKEN_EOS;
12
+ p->str = lex_one(str, end, &p->cur);
13
+ p->str = lex_one(p->str, end, &p->next);
14
+ }
15
+
16
+ lexer_token_t parser_consume_any(parser_t *p)
17
+ {
18
+ lexer_token_t cur = p->cur;
19
+ p->cur = p->next;
20
+ p->next.type = TOKEN_EOS;
21
+ p->str = lex_one(p->str, p->str_end, &p->next);
22
+ return cur;
23
+ }
24
+
25
+ lexer_token_t parser_must_consume(parser_t *p, unsigned char type)
26
+ {
27
+ if (p->cur.type != type) {
28
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Expected %s but found %s",
29
+ symbol_names[type], symbol_names[p->cur.type]);
30
+ }
31
+ return parser_consume_any(p);
32
+ }
33
+
34
+ lexer_token_t parser_consume(parser_t *p, unsigned char type)
35
+ {
36
+ if (p->cur.type != type) {
37
+ lexer_token_t zero = {0};
38
+ return zero;
39
+ }
40
+ return parser_consume_any(p);
41
+ }
42
+
43
+ inline static int rstring_eq(VALUE rstr, const char *str) {
44
+ size_t str_len = strlen(str);
45
+
46
+ return TYPE(rstr) == T_STRING &&
47
+ str_len == (size_t)RSTRING_LEN(rstr) &&
48
+ memcmp(RSTRING_PTR(rstr), str, str_len) == 0;
49
+ }
50
+
51
+ static VALUE parse_number(parser_t *p)
52
+ {
53
+ VALUE out;
54
+ lexer_token_t token = parser_must_consume(p, TOKEN_NUMBER);
55
+
56
+ // Set up sentinel for rb_cstr operations.
57
+ char tmp = *token.val_end;
58
+ *(char *)token.val_end = '\0';
59
+
60
+ if (token.flags & TOKEN_FLOAT_NUMBER) {
61
+ out = DBL2NUM(rb_cstr_to_dbl(token.val, 1));
62
+ } else {
63
+ out = rb_cstr_to_inum(token.val, 10, 1);
64
+ }
65
+
66
+ *(char *)token.val_end = tmp;
67
+ return out;
68
+ }
69
+
70
+ static VALUE parse_range(parser_t *p)
71
+ {
72
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
73
+
74
+ VALUE args[2];
75
+ args[0] = parse_expression(p);
76
+ parser_must_consume(p, TOKEN_DOTDOT);
77
+
78
+ args[1] = parse_expression(p);
79
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
+
81
+ if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
+ return rb_class_new_instance(2, args, cLiquidRangeLookup);
83
+
84
+ return rb_class_new_instance(2, args, cRange);
85
+ }
86
+
87
+ static VALUE parse_variable(parser_t *p)
88
+ {
89
+ VALUE name, lookups = rb_ary_new(), lookup;
90
+ unsigned long long command_flags = 0;
91
+
92
+ if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
+ name = parse_expression(p);
94
+ parser_must_consume(p, TOKEN_CLOSE_SQUARE);
95
+ } else {
96
+ name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
97
+ }
98
+
99
+ while (true) {
100
+ if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
+ parser_consume_any(p);
102
+ lookup = parse_expression(p);
103
+ parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
+
105
+ rb_ary_push(lookups, lookup);
106
+ } else if (p->cur.type == TOKEN_DOT) {
107
+ int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
+ lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
+
110
+ if (has_space_affix)
111
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
+
113
+ if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
+ command_flags |= 1 << RARRAY_LEN(lookups);
115
+
116
+ rb_ary_push(lookups, lookup);
117
+ } else {
118
+ break;
119
+ }
120
+ }
121
+
122
+ if (RARRAY_LEN(lookups) == 0 && TYPE(name) == T_STRING) {
123
+ if (rstring_eq(name, "nil") || rstring_eq(name, "null")) return Qnil;
124
+ if (rstring_eq(name, "true")) return Qtrue;
125
+ if (rstring_eq(name, "false")) return Qfalse;
126
+ if (rstring_eq(name, "blank")) return symBlank;
127
+ if (rstring_eq(name, "empty")) return symEmpty;
128
+ }
129
+
130
+ VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
131
+ return rb_class_new_instance(4, args, cLiquidVariableLookup);
132
+ }
133
+
134
+ VALUE parse_expression(parser_t *p)
135
+ {
136
+ switch (p->cur.type) {
137
+ case TOKEN_IDENTIFIER:
138
+ case TOKEN_OPEN_SQUARE:
139
+ return parse_variable(p);
140
+
141
+ case TOKEN_NUMBER:
142
+ return parse_number(p);
143
+
144
+ case TOKEN_OPEN_ROUND:
145
+ return parse_range(p);
146
+
147
+ case TOKEN_STRING:
148
+ {
149
+ lexer_token_t token = parser_consume_any(p);
150
+ token.val++;
151
+ token.val_end--;
152
+ return token_to_rstr(token);
153
+ }
154
+ }
155
+
156
+ if (p->cur.type == TOKEN_EOS) {
157
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
158
+ } else {
159
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
160
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
161
+ }
162
+ return Qnil;
163
+ }
164
+
165
+ static VALUE rb_parse_expression(VALUE self, VALUE markup)
166
+ {
167
+ StringValue(markup);
168
+ char *start = RSTRING_PTR(markup);
169
+
170
+ parser_t p;
171
+ init_parser(&p, start, start + RSTRING_LEN(markup));
172
+
173
+ if (p.cur.type == TOKEN_EOS)
174
+ return Qnil;
175
+
176
+ VALUE expr = parse_expression(&p);
177
+
178
+ if (p.cur.type != TOKEN_EOS)
179
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
180
+
181
+ return expr;
182
+ }
183
+
184
+ void init_liquid_parser(void)
185
+ {
186
+ idToI = rb_intern("to_i");
187
+ idEvaluate = rb_intern("evaluate");
188
+ symBlank = ID2SYM(rb_intern("blank?"));
189
+ symEmpty = ID2SYM(rb_intern("empty?"));
190
+
191
+ cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
192
+ cRange = rb_const_get(rb_cObject, rb_intern("Range"));
193
+ cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
194
+
195
+ VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
196
+ rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
197
+ }
198
+
@@ -0,0 +1,22 @@
1
+ #if !defined(LIQUID_PARSER_H)
2
+ #define LIQUID_PARSER_H
3
+
4
+ #include "lexer.h"
5
+
6
+ typedef struct parser {
7
+ lexer_token_t cur, next;
8
+ const char *str, *str_end;
9
+ } parser_t;
10
+
11
+ void init_parser(parser_t *parser, const char *str, const char *end);
12
+
13
+ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
+ lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
+ lexer_token_t parser_consume_any(parser_t *parser);
16
+
17
+ VALUE parse_expression(parser_t *parser);
18
+
19
+ void init_liquid_parser(void);
20
+
21
+ #endif
22
+
@@ -62,7 +62,7 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
62
62
  const char *last = cursor + tokenizer->length - 1;
63
63
 
64
64
  token->str = cursor;
65
- token->type = TOKEN_STRING;
65
+ token->type = TOKEN_RAW;
66
66
 
67
67
  while (cursor < last) {
68
68
  if (*cursor++ != '{')
@@ -72,7 +72,7 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
72
72
  if (c != '%' && c != '{')
73
73
  continue;
74
74
  if (cursor - tokenizer->cursor > 2) {
75
- token->type = TOKEN_STRING;
75
+ token->type = TOKEN_RAW;
76
76
  cursor -= 2;
77
77
  goto found;
78
78
  }
@@ -123,7 +123,7 @@ static VALUE tokenizer_shift_method(VALUE self)
123
123
 
124
124
  token_t token;
125
125
  tokenizer_next(tokenizer, &token);
126
- if (token.type == TOKEN_NONE)
126
+ if (!token.type)
127
127
  return Qnil;
128
128
 
129
129
  return rb_enc_str_new(token.str, token.length, utf8_encoding);
@@ -136,3 +136,4 @@ void init_liquid_tokenizer()
136
136
  rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1);
137
137
  rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
138
138
  }
139
+
@@ -2,9 +2,9 @@
2
2
  #define LIQUID_TOKENIZER_H
3
3
 
4
4
  enum token_type {
5
- TOKEN_NONE,
5
+ TOKENIZER_TOKEN_NONE = 0,
6
6
  TOKEN_INVALID,
7
- TOKEN_STRING,
7
+ TOKEN_RAW,
8
8
  TOKEN_TAG,
9
9
  TOKEN_VARIABLE
10
10
  };
@@ -29,3 +29,4 @@ void init_liquid_tokenizer();
29
29
  void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
30
30
 
31
31
  #endif
32
+
@@ -0,0 +1,54 @@
1
+ #include "liquid.h"
2
+ #include "variable.h"
3
+ #include "parser.h"
4
+ #include <stdio.h>
5
+
6
+ static VALUE rb_variable_parse(VALUE self, VALUE markup, VALUE filters)
7
+ {
8
+ StringValue(markup);
9
+ char *start = RSTRING_PTR(markup);
10
+
11
+ parser_t p;
12
+ init_parser(&p, start, start + RSTRING_LEN(markup));
13
+
14
+ if (p.cur.type == TOKEN_EOS)
15
+ return Qnil;
16
+
17
+ VALUE name = parse_expression(&p);
18
+
19
+ while (parser_consume(&p, TOKEN_PIPE).type) {
20
+ lexer_token_t filter_name = parser_must_consume(&p, TOKEN_IDENTIFIER);
21
+
22
+ VALUE filter_args = rb_ary_new(), keyword_args = Qnil, filter;
23
+
24
+ if (parser_consume(&p, TOKEN_COLON).type) {
25
+ do {
26
+ if (p.cur.type == TOKEN_IDENTIFIER && p.next.type == TOKEN_COLON) {
27
+ VALUE key = token_to_rstr(parser_consume_any(&p));
28
+ parser_consume_any(&p);
29
+
30
+ if (keyword_args == Qnil) keyword_args = rb_hash_new();
31
+ rb_hash_aset(keyword_args, key, parse_expression(&p));
32
+ } else {
33
+ rb_ary_push(filter_args, parse_expression(&p));
34
+ }
35
+ } while (parser_consume(&p, TOKEN_COMMA).type);
36
+ }
37
+
38
+ if (keyword_args == Qnil) {
39
+ filter = rb_ary_new3(2, token_to_rstr(filter_name), filter_args);
40
+ } else {
41
+ filter = rb_ary_new3(3, token_to_rstr(filter_name), filter_args, keyword_args);
42
+ }
43
+ rb_ary_push(filters, filter);
44
+ }
45
+
46
+ parser_must_consume(&p, TOKEN_EOS);
47
+ return name;
48
+ }
49
+
50
+ void init_liquid_variable(void)
51
+ {
52
+ rb_define_singleton_method(cLiquidVariable, "c_strict_parse", rb_variable_parse, 2);
53
+ }
54
+
@@ -0,0 +1,7 @@
1
+ #if !defined(LIQUID_VARIABLE_H)
2
+ #define LIQUID_VARIABLE_H
3
+
4
+ void init_liquid_variable(void);
5
+
6
+ #endif
7
+
@@ -2,17 +2,98 @@ require 'liquid/c/version'
2
2
  require 'liquid'
3
3
  require 'liquid_c'
4
4
 
5
+ module Liquid
6
+ module C
7
+ @enabled = true
8
+
9
+ class << self
10
+ attr_accessor :enabled
11
+ end
12
+ end
13
+ end
14
+
5
15
  Liquid::Template.class_eval do
6
16
  private
7
17
 
8
18
  alias_method :ruby_tokenize, :tokenize
9
19
 
10
20
  def tokenize(source)
11
- if @line_numbers
21
+ if Liquid::C.enabled && !@line_numbers
22
+ Liquid::Tokenizer.new(source.to_s)
23
+ else
12
24
  ruby_tokenize(source)
25
+ end
26
+ end
27
+ end
28
+
29
+ Liquid::BlockBody.class_eval do
30
+ alias_method :ruby_parse, :parse
31
+
32
+ def parse(tokens, options)
33
+ if Liquid::C.enabled && !options[:line_numbers] && !options[:profile]
34
+ c_parse(tokens, options) { |t, m| yield t, m }
13
35
  else
14
- Liquid::Tokenizer.new(source.to_s)
36
+ ruby_parse(tokens, options) { |t, m| yield t, m }
15
37
  end
16
38
  end
17
39
  end
18
40
 
41
+ Liquid::Variable.class_eval do
42
+ alias_method :ruby_lax_parse, :lax_parse
43
+ alias_method :ruby_strict_parse, :strict_parse
44
+
45
+ def lax_parse(markup)
46
+ stats = @options[:stats_callbacks]
47
+ stats[:variable_parse].call if stats
48
+
49
+ if Liquid::C.enabled
50
+ begin
51
+ return strict_parse(markup)
52
+ rescue Liquid::SyntaxError
53
+ stats[:variable_fallback].call if stats
54
+ end
55
+ end
56
+
57
+ ruby_lax_parse(markup)
58
+ end
59
+
60
+ def strict_parse(markup)
61
+ if Liquid::C.enabled
62
+ @name = Liquid::Variable.c_strict_parse(markup, @filters = [])
63
+ else
64
+ ruby_strict_parse(markup)
65
+ end
66
+ end
67
+ end
68
+
69
+ Liquid::VariableLookup.class_eval do
70
+ alias_method :ruby_initialize, :initialize
71
+
72
+ def initialize(markup, name = nil, lookups = nil, command_flags = nil)
73
+ if Liquid::C.enabled && markup == false
74
+ @name = name
75
+ @lookups = lookups
76
+ @command_flags = command_flags
77
+ else
78
+ ruby_initialize(markup)
79
+ end
80
+ end
81
+ end
82
+
83
+ Liquid::Expression.class_eval do
84
+ class << self
85
+ alias_method :ruby_parse, :parse
86
+
87
+ def parse(markup)
88
+ return nil unless markup
89
+
90
+ if Liquid::C.enabled
91
+ begin
92
+ return c_parse(markup)
93
+ rescue Liquid::SyntaxError
94
+ end
95
+ end
96
+ ruby_parse(markup)
97
+ end
98
+ end
99
+ end
@@ -1,5 +1,5 @@
1
1
  module Liquid
2
2
  module C
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
@@ -0,0 +1,106 @@
1
+ # encoding: utf-8
2
+ require 'test_helper'
3
+
4
+ class VariableTest < MiniTest::Unit::TestCase
5
+ def test_variable_parse
6
+ assert_equal [lookup('hello'), []], variable_parse('hello')
7
+ assert_equal ['world', []], variable_parse(' "world" ')
8
+ assert_equal [lookup('hello["world"]'), []], variable_parse(' hello["world"] ')
9
+ assert_equal [nil, []], variable_parse('')
10
+ assert_equal [lookup('question?'), []], variable_parse('question?')
11
+ assert_equal [lookup('[meta]'), []], variable_parse('[meta]')
12
+ assert_equal [lookup('a-b'), []], variable_parse('a-b')
13
+ assert_equal [lookup('a-2'), []], variable_parse('a-2')
14
+ end
15
+
16
+ def test_strictness
17
+ assert_raises(Liquid::SyntaxError) { variable_parse(' hello["world\']" ') }
18
+ assert_raises(Liquid::SyntaxError) { variable_parse('-..') }
19
+ assert_raises(Liquid::SyntaxError) { variable_parse('question?mark') }
20
+ assert_raises(Liquid::SyntaxError) { variable_parse('123.foo') }
21
+ assert_raises(Liquid::SyntaxError) { variable_parse(' | nothing') }
22
+
23
+ ['a .b', 'a. b', 'a . b'].each do |var|
24
+ assert_raises(Liquid::SyntaxError) { variable_parse(var) }
25
+ end
26
+
27
+ ['a -b', 'a- b', 'a - b'].each do |var|
28
+ assert_raises(Liquid::SyntaxError) { variable_parse(var) }
29
+ end
30
+ end
31
+
32
+ def test_literals
33
+ assert_equal [true, []], variable_parse('true')
34
+ assert_equal [nil, []], variable_parse('nil')
35
+ assert_equal [123.4, []], variable_parse('123.4')
36
+
37
+ assert_equal [lookup('[blank]'), []], variable_parse('[blank]')
38
+ assert_equal [lookup(false, true, [:blank?], 0), []], variable_parse('[true][blank]')
39
+ assert_equal [lookup('[true][blank]'), []], variable_parse('[true][blank]')
40
+ assert_equal [lookup('x["size"]'), []], variable_parse('x["size"]')
41
+ end
42
+
43
+ def test_variable_filter
44
+ name = lookup('name')
45
+ assert_equal [name, [['filter', []]]], variable_parse(' name | filter ')
46
+ assert_equal [name, [['filter1', []], ['filter2', []]]], variable_parse(' name | filter1 | filter2 ')
47
+ end
48
+
49
+ def test_variable_filter_args
50
+ name = lookup('name')
51
+ abc = lookup('abc')
52
+
53
+ assert_equal [name, [['filter', [abc]]]], variable_parse(' name | filter: abc ')
54
+
55
+ assert_equal [name, [['filter1', [abc]], ['filter2', [abc]]]],
56
+ variable_parse(' name | filter1: abc | filter2: abc ')
57
+
58
+ assert_equal [name, [['filter', [lookup('a')], {'b' => lookup('c'), 'd' => lookup('e')}]]],
59
+ variable_parse('name | filter : a , b : c , d : e')
60
+
61
+ assert_raises Liquid::SyntaxError do
62
+ variable_parse('name | filter : a : b : c : d : e')
63
+ end
64
+ end
65
+
66
+ def test_unicode_strings
67
+ assert_equal ['å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr', []],
68
+ variable_parse('"å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr"')
69
+ end
70
+
71
+ def test_broken_unicode_errors
72
+ err = assert_raises(Liquid::SyntaxError) do
73
+ Liquid::Template.parse("test {{ \xC2\xA0 test }}", error_mode: :strict)
74
+ end
75
+ assert err.message
76
+ end
77
+
78
+ def test_callbacks
79
+ variable_parses = 0
80
+ variable_fallbacks = 0
81
+
82
+ callbacks = {
83
+ variable_parse: lambda { variable_parses += 1 },
84
+ variable_fallback: lambda { variable_fallbacks += 1 }
85
+ }
86
+
87
+ Liquid::Variable.new('abc', error_mode: :lax, stats_callbacks: callbacks)
88
+ assert_equal 1, variable_parses
89
+ assert_equal 0, variable_fallbacks
90
+
91
+ Liquid::Variable.new('@!#', error_mode: :lax, stats_callbacks: callbacks)
92
+ assert_equal 2, variable_parses
93
+ assert_equal 1, variable_fallbacks
94
+ end
95
+
96
+ private
97
+
98
+ def variable_parse(markup)
99
+ name = Liquid::Variable.c_strict_parse(markup, filters = [])
100
+ [name, filters]
101
+ end
102
+
103
+ def lookup(*args)
104
+ Liquid::VariableLookup.new(*args)
105
+ end
106
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liquid-c
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dylan Thacker-Smith
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-08 00:00:00.000000000 Z
11
+ date: 2014-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: liquid
@@ -94,11 +94,19 @@ files:
94
94
  - LICENSE.txt
95
95
  - README.md
96
96
  - Rakefile
97
+ - ext/liquid_c/block.c
98
+ - ext/liquid_c/block.h
97
99
  - ext/liquid_c/extconf.rb
100
+ - ext/liquid_c/lexer.c
101
+ - ext/liquid_c/lexer.h
98
102
  - ext/liquid_c/liquid.c
99
103
  - ext/liquid_c/liquid.h
104
+ - ext/liquid_c/parser.c
105
+ - ext/liquid_c/parser.h
100
106
  - ext/liquid_c/tokenizer.c
101
107
  - ext/liquid_c/tokenizer.h
108
+ - ext/liquid_c/variable.c
109
+ - ext/liquid_c/variable.h
102
110
  - lib/liquid/c.rb
103
111
  - lib/liquid/c/version.rb
104
112
  - liquid-c.gemspec
@@ -106,6 +114,7 @@ files:
106
114
  - test/liquid_test.rb
107
115
  - test/test_helper.rb
108
116
  - test/unit/tokenizer_test.rb
117
+ - test/unit/variable_test.rb
109
118
  homepage: ''
110
119
  licenses:
111
120
  - MIT
@@ -126,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
135
  version: '0'
127
136
  requirements: []
128
137
  rubyforge_project:
129
- rubygems_version: 2.2.0
138
+ rubygems_version: 2.2.2
130
139
  signing_key:
131
140
  specification_version: 4
132
141
  summary: Liquid performance extension in C
@@ -134,3 +143,4 @@ test_files:
134
143
  - test/liquid_test.rb
135
144
  - test/test_helper.rb
136
145
  - test/unit/tokenizer_test.rb
146
+ - test/unit/variable_test.rb