liquid-c 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f1e4b7a91ef500fc48563ecdff875658c045b0e
4
- data.tar.gz: 460cbca81b9ff70a9c20e4051cdc62a26a1e2c7b
3
+ metadata.gz: ceaa0d7db7673b4b6e73fb7e669a2728dbafb1b3
4
+ data.tar.gz: 018116a6bedfc9305a3e64aaa6624c4dfb52d7b3
5
5
  SHA512:
6
- metadata.gz: 1ecf742609e18e26a982897562f3ff0ef0c06e77045d12b98ad36fda3081e4350faffb9bae860ce1ecc060d8c6138b7fd482c323d763538dc38b13bc5738d1f8
7
- data.tar.gz: 817638febdcb9a54399ab8aec528091b1c24f66510816e455f09b85bce9b2c1f7a5e559a89ab5611327b1d0f780c9d62fcb36ad0b231eaba35554f551bcdc80e
6
+ metadata.gz: 8a415cb28d0ad601e63e5ae6e979e762caee118f8019063383da5e24a34034d6b22e0973f0efa16f747b858ea2520299681ae9b32010c547e79877227a3b9cdd
7
+ data.tar.gz: 1913e1d901f48241a36d25b5eb4a72c6c1e61ccc98f17494d7d75d4e613bac0dc251b49957472abe1f410ce62575817906023182e3c01d64dc7af0ae5a934bb3
data/Gemfile CHANGED
@@ -6,6 +6,7 @@ gem 'liquid', github: 'Shopify/liquid', branch: 'master'
6
6
 
7
7
  group :test do
8
8
  gem 'spy', '0.4.1'
9
+ gem 'benchmark-ips'
9
10
  end
10
11
 
11
12
  group :development do
data/README.md CHANGED
@@ -1,5 +1,5 @@
1
1
  # Liquid::C
2
- [![Build Status](https://api.travis-ci.org/Shopify/liquid-c.png?branch=master)](https://travis-ci.org/Shopify/liquid-c)
2
+ [![Build Status](https://travis-ci.org/Shopify/liquid-c.svg?branch=master)](https://travis-ci.org/Shopify/liquid-c)
3
3
 
4
4
  Partial native implementation of the liquid ruby gem in C.
5
5
 
data/Rakefile CHANGED
@@ -59,18 +59,11 @@ namespace :profile do
59
59
  end
60
60
 
61
61
  namespace :compare do
62
- include Benchmark
63
- desc "Compare Liquid to Liquid + Liquid-C"
64
- task :run do
65
- bare = Benchmark.measure do
66
- ruby "./performance.rb bare profile lax"
67
- end
68
- liquid_c = Benchmark.measure do
69
- ruby "./performance.rb c profile lax"
70
- end
71
- Benchmark.benchmark(CAPTION, 10, FORMAT, "Liquid:", "Liquid-C:") do |x|
72
- [bare, liquid_c]
62
+ %w(lax warn strict).each do |type|
63
+ desc "Compare Liquid to Liquid-C in #{type} mode"
64
+ task type.to_sym do
65
+ ruby "./performance.rb bare benchmark #{type}"
66
+ ruby "./performance.rb c benchmark #{type}"
73
67
  end
74
- puts "Ratio: #{liquid_c.real / bare.real * 100}%"
75
68
  end
76
69
  end
@@ -0,0 +1,122 @@
1
+ #include "liquid.h"
2
+ #include "tokenizer.h"
3
+ #include <stdio.h>
4
+
5
+ static ID
6
+ intern_raise_missing_variable_terminator,
7
+ intern_raise_missing_tag_terminator,
8
+ intern_nodelist,
9
+ intern_blank,
10
+ intern_is_blank,
11
+ intern_clear,
12
+ intern_tags,
13
+ intern_parse,
14
+ intern_square_brackets;
15
+
16
+ static int is_id(int c)
17
+ {
18
+ return rb_isalnum(c) || c == '_';
19
+ }
20
+
21
+ inline static const char *read_while(const char *start, const char *end, int (func)(int))
22
+ {
23
+ while (start < end && func((unsigned char) *start)) start++;
24
+ return start;
25
+ }
26
+
27
+ static VALUE rb_block_parse(VALUE self, VALUE tokens, VALUE options)
28
+ {
29
+ tokenizer_t *tokenizer;
30
+ Tokenizer_Get_Struct(tokens, tokenizer);
31
+
32
+ token_t token;
33
+ VALUE tags = Qnil;
34
+ VALUE nodelist = rb_ivar_get(self, intern_nodelist);
35
+
36
+ while (true) {
37
+ tokenizer_next(tokenizer, &token);
38
+
39
+ switch (token.type) {
40
+ case TOKENIZER_TOKEN_NONE:
41
+ return rb_yield_values(2, Qnil, Qnil);
42
+
43
+ case TOKEN_INVALID:
44
+ {
45
+ VALUE str = rb_enc_str_new(token.str, token.length, utf8_encoding);
46
+
47
+ ID raise_method_id = intern_raise_missing_variable_terminator;
48
+ if (token.str[1] == '%') raise_method_id = intern_raise_missing_tag_terminator;
49
+
50
+ return rb_funcall(self, raise_method_id, 2, str, options);
51
+ }
52
+ case TOKEN_RAW:
53
+ {
54
+ VALUE str = rb_enc_str_new(token.str, token.length, utf8_encoding);
55
+ rb_ary_push(nodelist, str);
56
+
57
+ if (rb_ivar_get(self, intern_blank) == Qtrue) {
58
+ const char *end = token.str + token.length;
59
+
60
+ if (read_while(token.str, end, rb_isspace) < end)
61
+ rb_ivar_set(self, intern_blank, Qfalse);
62
+ }
63
+ break;
64
+ }
65
+ case TOKEN_VARIABLE:
66
+ {
67
+ VALUE args[2] = {rb_enc_str_new(token.str + 2, token.length - 4, utf8_encoding), options};
68
+ VALUE var = rb_class_new_instance(2, args, cLiquidVariable);
69
+ rb_ary_push(nodelist, var);
70
+ rb_ivar_set(self, intern_blank, Qfalse);
71
+ break;
72
+ }
73
+ case TOKEN_TAG:
74
+ {
75
+ const char *start = token.str + 2, *end = token.str + token.length - 2;
76
+
77
+ // Imitate \s*(\w+)\s*(.*)? regex
78
+ const char *name_start = read_while(start, end, rb_isspace);
79
+ const char *name_end = read_while(name_start, end, is_id);
80
+
81
+ VALUE tag_name = rb_enc_str_new(name_start, name_end - name_start, utf8_encoding);
82
+
83
+ if (tags == Qnil)
84
+ tags = rb_funcall(cLiquidTemplate, intern_tags, 0);
85
+
86
+ VALUE tag_class = rb_funcall(tags, intern_square_brackets, 1, tag_name);
87
+
88
+ const char *markup_start = read_while(name_end, end, rb_isspace);
89
+ VALUE markup = rb_enc_str_new(markup_start, end - markup_start, utf8_encoding);
90
+
91
+ if (tag_class == Qnil)
92
+ return rb_yield_values(2, tag_name, markup);
93
+
94
+ VALUE new_tag = rb_funcall(tag_class, intern_parse, 4, tag_name, markup, tokens, options);
95
+
96
+ if (rb_ivar_get(self, intern_blank) == Qtrue && !RTEST(rb_funcall(new_tag, intern_is_blank, 0)))
97
+ rb_ivar_set(self, intern_blank, Qfalse);
98
+
99
+ rb_ary_push(nodelist, new_tag);
100
+ break;
101
+ }
102
+ }
103
+ }
104
+ return Qnil;
105
+ }
106
+
107
+ void init_liquid_block()
108
+ {
109
+ intern_raise_missing_variable_terminator = rb_intern("raise_missing_variable_terminator");
110
+ intern_raise_missing_tag_terminator = rb_intern("raise_missing_tag_terminator");
111
+ intern_nodelist = rb_intern("@nodelist");
112
+ intern_blank = rb_intern("@blank");
113
+ intern_is_blank = rb_intern("blank?");
114
+ intern_clear = rb_intern("clear");
115
+ intern_tags = rb_intern("tags");
116
+ intern_parse = rb_intern("parse");
117
+ intern_square_brackets = rb_intern("[]");
118
+
119
+ VALUE cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
120
+ rb_define_method(cLiquidBlockBody, "c_parse", rb_block_parse, 2);
121
+ }
122
+
@@ -0,0 +1,7 @@
1
+ #if !defined(LIQUID_BLOCK_H)
2
+ #define LIQUID_BLOCK_H
3
+
4
+ void init_liquid_block();
5
+
6
+ #endif
7
+
@@ -0,0 +1,148 @@
1
+ #include "liquid.h"
2
+ #include "lexer.h"
3
+ #include <stdio.h>
4
+
5
+ const char *symbol_names[TOKEN_END] = {
6
+ [TOKEN_NONE] = "none",
7
+ [TOKEN_COMPARISON] = "comparison",
8
+ [TOKEN_STRING] = "string",
9
+ [TOKEN_NUMBER] = "number",
10
+ [TOKEN_IDENTIFIER] = "id",
11
+ [TOKEN_DOTDOT] = "dotdot",
12
+ [TOKEN_EOS] = "end_of_string",
13
+ [TOKEN_PIPE] = "pipe",
14
+ [TOKEN_DOT] = "dot",
15
+ [TOKEN_COLON] = "colon",
16
+ [TOKEN_COMMA] = "comma",
17
+ [TOKEN_OPEN_SQUARE] = "open_square",
18
+ [TOKEN_CLOSE_SQUARE] = "close_square",
19
+ [TOKEN_OPEN_ROUND] = "open_round",
20
+ [TOKEN_CLOSE_ROUND] = "close_round",
21
+ [TOKEN_QUESTION] = "question",
22
+ [TOKEN_DASH] = "dash"
23
+ };
24
+
25
+ inline static int is_identifier(char c)
26
+ {
27
+ return ISALNUM(c) || c == '_' || c == '-';
28
+ }
29
+
30
+ inline static int is_special(char c)
31
+ {
32
+ switch (c) {
33
+ case '|': case '.': case ':': case ',':
34
+ case '[': case ']': case '(': case ')':
35
+ case '?': case '-':
36
+ return 1;
37
+ }
38
+ return 0;
39
+ }
40
+
41
+ // Returns a pointer to the character after the end of the match.
42
+ inline static const char *prefix_end(const char *cur, const char *end, const char *pattern)
43
+ {
44
+ size_t pattern_len = strlen(pattern);
45
+
46
+ if (pattern_len > (size_t)(end - cur)) return NULL;
47
+ if (memcmp(cur, pattern, pattern_len) != 0) return NULL;
48
+
49
+ return cur + pattern_len;
50
+ }
51
+
52
+ inline static const char *scan_past(const char *cur, const char *end, char target)
53
+ {
54
+ const char *match = memchr(cur + 1, target, end - cur - 1);
55
+ return match ? match + 1 : NULL;
56
+ }
57
+
58
+ #define RETURN_TOKEN(t, n) { \
59
+ const char *tok_end = str + (n); \
60
+ token->type = (t); \
61
+ token->val = str; \
62
+ if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \
63
+ if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \
64
+ return (token->val_end = tok_end); \
65
+ }
66
+
67
+ // Reads one token from start, and fills it into the token argument.
68
+ // Returns the start of the next token if any, otherwise the end of the string.
69
+ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
70
+ {
71
+ // str references the start of the token, after whitespace is skipped.
72
+ // cur references the currently processing character during iterative lexing.
73
+ const char *str = start, *cur;
74
+
75
+ while (str < end && ISSPACE(*str)) ++str;
76
+
77
+ token->val = token->val_end = NULL;
78
+ token->flags = 0;
79
+
80
+ if (str >= end) return str;
81
+
82
+ char c = *str; // First character of the token.
83
+ char cn = '\0'; // Second character if available, for lookahead.
84
+ if (str + 1 < end) cn = str[1];
85
+
86
+ switch (c) {
87
+ case '<':
88
+ RETURN_TOKEN(TOKEN_COMPARISON, cn == '>' || cn == '=' ? 2 : 1);
89
+ case '>':
90
+ RETURN_TOKEN(TOKEN_COMPARISON, cn == '=' ? 2 : 1);
91
+ case '=':
92
+ case '!':
93
+ if (cn == '=') RETURN_TOKEN(TOKEN_COMPARISON, 2);
94
+ break;
95
+ case '.':
96
+ if (cn == '.') RETURN_TOKEN(TOKEN_DOTDOT, 2);
97
+ break;
98
+ }
99
+
100
+ if ((cur = prefix_end(str, end, "contains")))
101
+ RETURN_TOKEN(TOKEN_COMPARISON, cur - str);
102
+
103
+ if (c == '\'' || c == '"') {
104
+ cur = scan_past(str, end, c);
105
+
106
+ if (cur) {
107
+ // Quote was properly terminated.
108
+ RETURN_TOKEN(TOKEN_STRING, cur - str);
109
+ }
110
+ }
111
+
112
+ if (ISDIGIT(c) || c == '-') {
113
+ int has_dot = 0;
114
+ cur = str;
115
+ while (++cur < end) {
116
+ if (!has_dot && *cur == '.') {
117
+ has_dot = 1;
118
+ } else if (!ISDIGIT(*cur)) {
119
+ break;
120
+ }
121
+ }
122
+ cur--; // Point to last digit (or dot).
123
+
124
+ if (*cur == '.') {
125
+ cur--; // Ignore any trailing dot.
126
+ has_dot = 0;
127
+ }
128
+ if (*cur != '-') {
129
+ if (has_dot) token->flags |= TOKEN_FLOAT_NUMBER;
130
+ RETURN_TOKEN(TOKEN_NUMBER, cur + 1 - str);
131
+ }
132
+ }
133
+
134
+ if (ISALPHA(c) || c == '_') {
135
+ cur = str;
136
+ while (++cur < end && is_identifier(*cur)) {}
137
+ if (cur < end && *cur == '?') cur++;
138
+ RETURN_TOKEN(TOKEN_IDENTIFIER, cur - str);
139
+ }
140
+
141
+ if (is_special(c)) RETURN_TOKEN(c, 1);
142
+
143
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected character %c", c);
144
+ return NULL;
145
+ }
146
+
147
+ #undef RETURN_TOKEN
148
+
@@ -0,0 +1,46 @@
1
+ #if !defined(LIQUID_LEXER_H)
2
+ #define LIQUID_LEXER_H
3
+
4
+ enum lexer_token_type {
5
+ TOKEN_NONE,
6
+ TOKEN_COMPARISON,
7
+ TOKEN_STRING,
8
+ TOKEN_NUMBER,
9
+ TOKEN_IDENTIFIER,
10
+ TOKEN_DOTDOT,
11
+ TOKEN_EOS,
12
+
13
+ TOKEN_PIPE = '|',
14
+ TOKEN_DOT = '.',
15
+ TOKEN_COLON = ':',
16
+ TOKEN_COMMA = ',',
17
+ TOKEN_OPEN_SQUARE = '[',
18
+ TOKEN_CLOSE_SQUARE = ']',
19
+ TOKEN_OPEN_ROUND = '(',
20
+ TOKEN_CLOSE_ROUND = ')',
21
+ TOKEN_QUESTION = '?',
22
+ TOKEN_DASH = '-',
23
+
24
+ TOKEN_END = 256
25
+ };
26
+
27
+ #define TOKEN_SPACE_PREFIX 0x1
28
+ #define TOKEN_SPACE_SUFFIX 0x2
29
+ #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX)
30
+ #define TOKEN_FLOAT_NUMBER 0x4
31
+
32
+ typedef struct lexer_token {
33
+ unsigned char type, flags;
34
+ const char *val, *val_end;
35
+ } lexer_token_t;
36
+
37
+ extern const char *symbol_names[TOKEN_END];
38
+
39
+ const char *lex_one(const char *str, const char *end, lexer_token_t *token);
40
+
41
+ inline static VALUE token_to_rstr(lexer_token_t token) {
42
+ return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding);
43
+ }
44
+
45
+ #endif
46
+
@@ -1,12 +1,24 @@
1
1
  #include "liquid.h"
2
2
  #include "tokenizer.h"
3
+ #include "variable.h"
4
+ #include "lexer.h"
5
+ #include "parser.h"
6
+ #include "block.h"
3
7
 
4
- VALUE mLiquid;
8
+ VALUE mLiquid, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
5
9
  rb_encoding *utf8_encoding;
6
10
 
7
11
  void Init_liquid_c(void)
8
12
  {
9
13
  utf8_encoding = rb_utf8_encoding();
10
14
  mLiquid = rb_define_module("Liquid");
15
+ cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError"));
16
+ cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable"));
17
+ cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template"));
18
+
11
19
  init_liquid_tokenizer();
20
+ init_liquid_parser();
21
+ init_liquid_variable();
22
+ init_liquid_block();
12
23
  }
24
+
@@ -5,7 +5,8 @@
5
5
  #include <ruby/encoding.h>
6
6
  #include <stdbool.h>
7
7
 
8
- extern VALUE mLiquid;
8
+ extern VALUE mLiquid, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
9
9
  extern rb_encoding *utf8_encoding;
10
10
 
11
11
  #endif
12
+
@@ -0,0 +1,198 @@
1
+ #include "liquid.h"
2
+ #include "parser.h"
3
+ #include "lexer.h"
4
+
5
+ static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, symBlank, symEmpty;
6
+ static ID idToI, idEvaluate;
7
+
8
+ void init_parser(parser_t *p, const char *str, const char *end)
9
+ {
10
+ p->str_end = end;
11
+ p->cur.type = p->next.type = TOKEN_EOS;
12
+ p->str = lex_one(str, end, &p->cur);
13
+ p->str = lex_one(p->str, end, &p->next);
14
+ }
15
+
16
+ lexer_token_t parser_consume_any(parser_t *p)
17
+ {
18
+ lexer_token_t cur = p->cur;
19
+ p->cur = p->next;
20
+ p->next.type = TOKEN_EOS;
21
+ p->str = lex_one(p->str, p->str_end, &p->next);
22
+ return cur;
23
+ }
24
+
25
+ lexer_token_t parser_must_consume(parser_t *p, unsigned char type)
26
+ {
27
+ if (p->cur.type != type) {
28
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Expected %s but found %s",
29
+ symbol_names[type], symbol_names[p->cur.type]);
30
+ }
31
+ return parser_consume_any(p);
32
+ }
33
+
34
+ lexer_token_t parser_consume(parser_t *p, unsigned char type)
35
+ {
36
+ if (p->cur.type != type) {
37
+ lexer_token_t zero = {0};
38
+ return zero;
39
+ }
40
+ return parser_consume_any(p);
41
+ }
42
+
43
+ inline static int rstring_eq(VALUE rstr, const char *str) {
44
+ size_t str_len = strlen(str);
45
+
46
+ return TYPE(rstr) == T_STRING &&
47
+ str_len == (size_t)RSTRING_LEN(rstr) &&
48
+ memcmp(RSTRING_PTR(rstr), str, str_len) == 0;
49
+ }
50
+
51
+ static VALUE parse_number(parser_t *p)
52
+ {
53
+ VALUE out;
54
+ lexer_token_t token = parser_must_consume(p, TOKEN_NUMBER);
55
+
56
+ // Set up sentinel for rb_cstr operations.
57
+ char tmp = *token.val_end;
58
+ *(char *)token.val_end = '\0';
59
+
60
+ if (token.flags & TOKEN_FLOAT_NUMBER) {
61
+ out = DBL2NUM(rb_cstr_to_dbl(token.val, 1));
62
+ } else {
63
+ out = rb_cstr_to_inum(token.val, 10, 1);
64
+ }
65
+
66
+ *(char *)token.val_end = tmp;
67
+ return out;
68
+ }
69
+
70
+ static VALUE parse_range(parser_t *p)
71
+ {
72
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
73
+
74
+ VALUE args[2];
75
+ args[0] = parse_expression(p);
76
+ parser_must_consume(p, TOKEN_DOTDOT);
77
+
78
+ args[1] = parse_expression(p);
79
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
+
81
+ if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
+ return rb_class_new_instance(2, args, cLiquidRangeLookup);
83
+
84
+ return rb_class_new_instance(2, args, cRange);
85
+ }
86
+
87
+ static VALUE parse_variable(parser_t *p)
88
+ {
89
+ VALUE name, lookups = rb_ary_new(), lookup;
90
+ unsigned long long command_flags = 0;
91
+
92
+ if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
+ name = parse_expression(p);
94
+ parser_must_consume(p, TOKEN_CLOSE_SQUARE);
95
+ } else {
96
+ name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
97
+ }
98
+
99
+ while (true) {
100
+ if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
+ parser_consume_any(p);
102
+ lookup = parse_expression(p);
103
+ parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
+
105
+ rb_ary_push(lookups, lookup);
106
+ } else if (p->cur.type == TOKEN_DOT) {
107
+ int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
+ lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
+
110
+ if (has_space_affix)
111
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
+
113
+ if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
+ command_flags |= 1 << RARRAY_LEN(lookups);
115
+
116
+ rb_ary_push(lookups, lookup);
117
+ } else {
118
+ break;
119
+ }
120
+ }
121
+
122
+ if (RARRAY_LEN(lookups) == 0 && TYPE(name) == T_STRING) {
123
+ if (rstring_eq(name, "nil") || rstring_eq(name, "null")) return Qnil;
124
+ if (rstring_eq(name, "true")) return Qtrue;
125
+ if (rstring_eq(name, "false")) return Qfalse;
126
+ if (rstring_eq(name, "blank")) return symBlank;
127
+ if (rstring_eq(name, "empty")) return symEmpty;
128
+ }
129
+
130
+ VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
131
+ return rb_class_new_instance(4, args, cLiquidVariableLookup);
132
+ }
133
+
134
+ VALUE parse_expression(parser_t *p)
135
+ {
136
+ switch (p->cur.type) {
137
+ case TOKEN_IDENTIFIER:
138
+ case TOKEN_OPEN_SQUARE:
139
+ return parse_variable(p);
140
+
141
+ case TOKEN_NUMBER:
142
+ return parse_number(p);
143
+
144
+ case TOKEN_OPEN_ROUND:
145
+ return parse_range(p);
146
+
147
+ case TOKEN_STRING:
148
+ {
149
+ lexer_token_t token = parser_consume_any(p);
150
+ token.val++;
151
+ token.val_end--;
152
+ return token_to_rstr(token);
153
+ }
154
+ }
155
+
156
+ if (p->cur.type == TOKEN_EOS) {
157
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
158
+ } else {
159
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
160
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
161
+ }
162
+ return Qnil;
163
+ }
164
+
165
+ static VALUE rb_parse_expression(VALUE self, VALUE markup)
166
+ {
167
+ StringValue(markup);
168
+ char *start = RSTRING_PTR(markup);
169
+
170
+ parser_t p;
171
+ init_parser(&p, start, start + RSTRING_LEN(markup));
172
+
173
+ if (p.cur.type == TOKEN_EOS)
174
+ return Qnil;
175
+
176
+ VALUE expr = parse_expression(&p);
177
+
178
+ if (p.cur.type != TOKEN_EOS)
179
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
180
+
181
+ return expr;
182
+ }
183
+
184
+ void init_liquid_parser(void)
185
+ {
186
+ idToI = rb_intern("to_i");
187
+ idEvaluate = rb_intern("evaluate");
188
+ symBlank = ID2SYM(rb_intern("blank?"));
189
+ symEmpty = ID2SYM(rb_intern("empty?"));
190
+
191
+ cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
192
+ cRange = rb_const_get(rb_cObject, rb_intern("Range"));
193
+ cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
194
+
195
+ VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
196
+ rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
197
+ }
198
+
@@ -0,0 +1,22 @@
1
+ #if !defined(LIQUID_PARSER_H)
2
+ #define LIQUID_PARSER_H
3
+
4
+ #include "lexer.h"
5
+
6
+ typedef struct parser {
7
+ lexer_token_t cur, next;
8
+ const char *str, *str_end;
9
+ } parser_t;
10
+
11
+ void init_parser(parser_t *parser, const char *str, const char *end);
12
+
13
+ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
+ lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
+ lexer_token_t parser_consume_any(parser_t *parser);
16
+
17
+ VALUE parse_expression(parser_t *parser);
18
+
19
+ void init_liquid_parser(void);
20
+
21
+ #endif
22
+
@@ -62,7 +62,7 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
62
62
  const char *last = cursor + tokenizer->length - 1;
63
63
 
64
64
  token->str = cursor;
65
- token->type = TOKEN_STRING;
65
+ token->type = TOKEN_RAW;
66
66
 
67
67
  while (cursor < last) {
68
68
  if (*cursor++ != '{')
@@ -72,7 +72,7 @@ void tokenizer_next(tokenizer_t *tokenizer, token_t *token)
72
72
  if (c != '%' && c != '{')
73
73
  continue;
74
74
  if (cursor - tokenizer->cursor > 2) {
75
- token->type = TOKEN_STRING;
75
+ token->type = TOKEN_RAW;
76
76
  cursor -= 2;
77
77
  goto found;
78
78
  }
@@ -123,7 +123,7 @@ static VALUE tokenizer_shift_method(VALUE self)
123
123
 
124
124
  token_t token;
125
125
  tokenizer_next(tokenizer, &token);
126
- if (token.type == TOKEN_NONE)
126
+ if (!token.type)
127
127
  return Qnil;
128
128
 
129
129
  return rb_enc_str_new(token.str, token.length, utf8_encoding);
@@ -136,3 +136,4 @@ void init_liquid_tokenizer()
136
136
  rb_define_method(cLiquidTokenizer, "initialize", tokenizer_initialize_method, 1);
137
137
  rb_define_method(cLiquidTokenizer, "shift", tokenizer_shift_method, 0);
138
138
  }
139
+
@@ -2,9 +2,9 @@
2
2
  #define LIQUID_TOKENIZER_H
3
3
 
4
4
  enum token_type {
5
- TOKEN_NONE,
5
+ TOKENIZER_TOKEN_NONE = 0,
6
6
  TOKEN_INVALID,
7
- TOKEN_STRING,
7
+ TOKEN_RAW,
8
8
  TOKEN_TAG,
9
9
  TOKEN_VARIABLE
10
10
  };
@@ -29,3 +29,4 @@ void init_liquid_tokenizer();
29
29
  void tokenizer_next(tokenizer_t *tokenizer, token_t *token);
30
30
 
31
31
  #endif
32
+
@@ -0,0 +1,54 @@
1
+ #include "liquid.h"
2
+ #include "variable.h"
3
+ #include "parser.h"
4
+ #include <stdio.h>
5
+
6
+ static VALUE rb_variable_parse(VALUE self, VALUE markup, VALUE filters)
7
+ {
8
+ StringValue(markup);
9
+ char *start = RSTRING_PTR(markup);
10
+
11
+ parser_t p;
12
+ init_parser(&p, start, start + RSTRING_LEN(markup));
13
+
14
+ if (p.cur.type == TOKEN_EOS)
15
+ return Qnil;
16
+
17
+ VALUE name = parse_expression(&p);
18
+
19
+ while (parser_consume(&p, TOKEN_PIPE).type) {
20
+ lexer_token_t filter_name = parser_must_consume(&p, TOKEN_IDENTIFIER);
21
+
22
+ VALUE filter_args = rb_ary_new(), keyword_args = Qnil, filter;
23
+
24
+ if (parser_consume(&p, TOKEN_COLON).type) {
25
+ do {
26
+ if (p.cur.type == TOKEN_IDENTIFIER && p.next.type == TOKEN_COLON) {
27
+ VALUE key = token_to_rstr(parser_consume_any(&p));
28
+ parser_consume_any(&p);
29
+
30
+ if (keyword_args == Qnil) keyword_args = rb_hash_new();
31
+ rb_hash_aset(keyword_args, key, parse_expression(&p));
32
+ } else {
33
+ rb_ary_push(filter_args, parse_expression(&p));
34
+ }
35
+ } while (parser_consume(&p, TOKEN_COMMA).type);
36
+ }
37
+
38
+ if (keyword_args == Qnil) {
39
+ filter = rb_ary_new3(2, token_to_rstr(filter_name), filter_args);
40
+ } else {
41
+ filter = rb_ary_new3(3, token_to_rstr(filter_name), filter_args, keyword_args);
42
+ }
43
+ rb_ary_push(filters, filter);
44
+ }
45
+
46
+ parser_must_consume(&p, TOKEN_EOS);
47
+ return name;
48
+ }
49
+
50
+ void init_liquid_variable(void)
51
+ {
52
+ rb_define_singleton_method(cLiquidVariable, "c_strict_parse", rb_variable_parse, 2);
53
+ }
54
+
@@ -0,0 +1,7 @@
1
+ #if !defined(LIQUID_VARIABLE_H)
2
+ #define LIQUID_VARIABLE_H
3
+
4
+ void init_liquid_variable(void);
5
+
6
+ #endif
7
+
@@ -2,17 +2,98 @@ require 'liquid/c/version'
2
2
  require 'liquid'
3
3
  require 'liquid_c'
4
4
 
5
+ module Liquid
6
+ module C
7
+ @enabled = true
8
+
9
+ class << self
10
+ attr_accessor :enabled
11
+ end
12
+ end
13
+ end
14
+
5
15
  Liquid::Template.class_eval do
6
16
  private
7
17
 
8
18
  alias_method :ruby_tokenize, :tokenize
9
19
 
10
20
  def tokenize(source)
11
- if @line_numbers
21
+ if Liquid::C.enabled && !@line_numbers
22
+ Liquid::Tokenizer.new(source.to_s)
23
+ else
12
24
  ruby_tokenize(source)
25
+ end
26
+ end
27
+ end
28
+
29
+ Liquid::BlockBody.class_eval do
30
+ alias_method :ruby_parse, :parse
31
+
32
+ def parse(tokens, options)
33
+ if Liquid::C.enabled && !options[:line_numbers] && !options[:profile]
34
+ c_parse(tokens, options) { |t, m| yield t, m }
13
35
  else
14
- Liquid::Tokenizer.new(source.to_s)
36
+ ruby_parse(tokens, options) { |t, m| yield t, m }
15
37
  end
16
38
  end
17
39
  end
18
40
 
41
+ Liquid::Variable.class_eval do
42
+ alias_method :ruby_lax_parse, :lax_parse
43
+ alias_method :ruby_strict_parse, :strict_parse
44
+
45
+ def lax_parse(markup)
46
+ stats = @options[:stats_callbacks]
47
+ stats[:variable_parse].call if stats
48
+
49
+ if Liquid::C.enabled
50
+ begin
51
+ return strict_parse(markup)
52
+ rescue Liquid::SyntaxError
53
+ stats[:variable_fallback].call if stats
54
+ end
55
+ end
56
+
57
+ ruby_lax_parse(markup)
58
+ end
59
+
60
+ def strict_parse(markup)
61
+ if Liquid::C.enabled
62
+ @name = Liquid::Variable.c_strict_parse(markup, @filters = [])
63
+ else
64
+ ruby_strict_parse(markup)
65
+ end
66
+ end
67
+ end
68
+
69
+ Liquid::VariableLookup.class_eval do
70
+ alias_method :ruby_initialize, :initialize
71
+
72
+ def initialize(markup, name = nil, lookups = nil, command_flags = nil)
73
+ if Liquid::C.enabled && markup == false
74
+ @name = name
75
+ @lookups = lookups
76
+ @command_flags = command_flags
77
+ else
78
+ ruby_initialize(markup)
79
+ end
80
+ end
81
+ end
82
+
83
+ Liquid::Expression.class_eval do
84
+ class << self
85
+ alias_method :ruby_parse, :parse
86
+
87
+ def parse(markup)
88
+ return nil unless markup
89
+
90
+ if Liquid::C.enabled
91
+ begin
92
+ return c_parse(markup)
93
+ rescue Liquid::SyntaxError
94
+ end
95
+ end
96
+ ruby_parse(markup)
97
+ end
98
+ end
99
+ end
@@ -1,5 +1,5 @@
1
1
  module Liquid
2
2
  module C
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
@@ -0,0 +1,106 @@
1
+ # encoding: utf-8
2
+ require 'test_helper'
3
+
4
+ class VariableTest < MiniTest::Unit::TestCase
5
+ def test_variable_parse
6
+ assert_equal [lookup('hello'), []], variable_parse('hello')
7
+ assert_equal ['world', []], variable_parse(' "world" ')
8
+ assert_equal [lookup('hello["world"]'), []], variable_parse(' hello["world"] ')
9
+ assert_equal [nil, []], variable_parse('')
10
+ assert_equal [lookup('question?'), []], variable_parse('question?')
11
+ assert_equal [lookup('[meta]'), []], variable_parse('[meta]')
12
+ assert_equal [lookup('a-b'), []], variable_parse('a-b')
13
+ assert_equal [lookup('a-2'), []], variable_parse('a-2')
14
+ end
15
+
16
+ def test_strictness
17
+ assert_raises(Liquid::SyntaxError) { variable_parse(' hello["world\']" ') }
18
+ assert_raises(Liquid::SyntaxError) { variable_parse('-..') }
19
+ assert_raises(Liquid::SyntaxError) { variable_parse('question?mark') }
20
+ assert_raises(Liquid::SyntaxError) { variable_parse('123.foo') }
21
+ assert_raises(Liquid::SyntaxError) { variable_parse(' | nothing') }
22
+
23
+ ['a .b', 'a. b', 'a . b'].each do |var|
24
+ assert_raises(Liquid::SyntaxError) { variable_parse(var) }
25
+ end
26
+
27
+ ['a -b', 'a- b', 'a - b'].each do |var|
28
+ assert_raises(Liquid::SyntaxError) { variable_parse(var) }
29
+ end
30
+ end
31
+
32
+ def test_literals
33
+ assert_equal [true, []], variable_parse('true')
34
+ assert_equal [nil, []], variable_parse('nil')
35
+ assert_equal [123.4, []], variable_parse('123.4')
36
+
37
+ assert_equal [lookup('[blank]'), []], variable_parse('[blank]')
38
+ assert_equal [lookup(false, true, [:blank?], 0), []], variable_parse('[true][blank]')
39
+ assert_equal [lookup('[true][blank]'), []], variable_parse('[true][blank]')
40
+ assert_equal [lookup('x["size"]'), []], variable_parse('x["size"]')
41
+ end
42
+
43
+ def test_variable_filter
44
+ name = lookup('name')
45
+ assert_equal [name, [['filter', []]]], variable_parse(' name | filter ')
46
+ assert_equal [name, [['filter1', []], ['filter2', []]]], variable_parse(' name | filter1 | filter2 ')
47
+ end
48
+
49
+ def test_variable_filter_args
50
+ name = lookup('name')
51
+ abc = lookup('abc')
52
+
53
+ assert_equal [name, [['filter', [abc]]]], variable_parse(' name | filter: abc ')
54
+
55
+ assert_equal [name, [['filter1', [abc]], ['filter2', [abc]]]],
56
+ variable_parse(' name | filter1: abc | filter2: abc ')
57
+
58
+ assert_equal [name, [['filter', [lookup('a')], {'b' => lookup('c'), 'd' => lookup('e')}]]],
59
+ variable_parse('name | filter : a , b : c , d : e')
60
+
61
+ assert_raises Liquid::SyntaxError do
62
+ variable_parse('name | filter : a : b : c : d : e')
63
+ end
64
+ end
65
+
66
+ def test_unicode_strings
67
+ assert_equal ['å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr', []],
68
+ variable_parse('"å߀êùidhtлsԁѵ߀ráƙìstɦeƅêstpcmáѕterrãcêcհèrr"')
69
+ end
70
+
71
+ def test_broken_unicode_errors
72
+ err = assert_raises(Liquid::SyntaxError) do
73
+ Liquid::Template.parse("test {{ \xC2\xA0 test }}", error_mode: :strict)
74
+ end
75
+ assert err.message
76
+ end
77
+
78
+ def test_callbacks
79
+ variable_parses = 0
80
+ variable_fallbacks = 0
81
+
82
+ callbacks = {
83
+ variable_parse: lambda { variable_parses += 1 },
84
+ variable_fallback: lambda { variable_fallbacks += 1 }
85
+ }
86
+
87
+ Liquid::Variable.new('abc', error_mode: :lax, stats_callbacks: callbacks)
88
+ assert_equal 1, variable_parses
89
+ assert_equal 0, variable_fallbacks
90
+
91
+ Liquid::Variable.new('@!#', error_mode: :lax, stats_callbacks: callbacks)
92
+ assert_equal 2, variable_parses
93
+ assert_equal 1, variable_fallbacks
94
+ end
95
+
96
+ private
97
+
98
+ def variable_parse(markup)
99
+ name = Liquid::Variable.c_strict_parse(markup, filters = [])
100
+ [name, filters]
101
+ end
102
+
103
+ def lookup(*args)
104
+ Liquid::VariableLookup.new(*args)
105
+ end
106
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liquid-c
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dylan Thacker-Smith
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-08 00:00:00.000000000 Z
11
+ date: 2014-12-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: liquid
@@ -94,11 +94,19 @@ files:
94
94
  - LICENSE.txt
95
95
  - README.md
96
96
  - Rakefile
97
+ - ext/liquid_c/block.c
98
+ - ext/liquid_c/block.h
97
99
  - ext/liquid_c/extconf.rb
100
+ - ext/liquid_c/lexer.c
101
+ - ext/liquid_c/lexer.h
98
102
  - ext/liquid_c/liquid.c
99
103
  - ext/liquid_c/liquid.h
104
+ - ext/liquid_c/parser.c
105
+ - ext/liquid_c/parser.h
100
106
  - ext/liquid_c/tokenizer.c
101
107
  - ext/liquid_c/tokenizer.h
108
+ - ext/liquid_c/variable.c
109
+ - ext/liquid_c/variable.h
102
110
  - lib/liquid/c.rb
103
111
  - lib/liquid/c/version.rb
104
112
  - liquid-c.gemspec
@@ -106,6 +114,7 @@ files:
106
114
  - test/liquid_test.rb
107
115
  - test/test_helper.rb
108
116
  - test/unit/tokenizer_test.rb
117
+ - test/unit/variable_test.rb
109
118
  homepage: ''
110
119
  licenses:
111
120
  - MIT
@@ -126,7 +135,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
135
  version: '0'
127
136
  requirements: []
128
137
  rubyforge_project:
129
- rubygems_version: 2.2.0
138
+ rubygems_version: 2.2.2
130
139
  signing_key:
131
140
  specification_version: 4
132
141
  summary: Liquid performance extension in C
@@ -134,3 +143,4 @@ test_files:
134
143
  - test/liquid_test.rb
135
144
  - test/test_helper.rb
136
145
  - test/unit/tokenizer_test.rb
146
+ - test/unit/variable_test.rb