liquid-c 4.0.1 → 4.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/liquid.yml +24 -2
  3. data/.gitignore +4 -0
  4. data/.rubocop.yml +14 -0
  5. data/Gemfile +14 -5
  6. data/README.md +29 -5
  7. data/Rakefile +13 -62
  8. data/ext/liquid_c/block.c +488 -60
  9. data/ext/liquid_c/block.h +28 -2
  10. data/ext/liquid_c/c_buffer.c +42 -0
  11. data/ext/liquid_c/c_buffer.h +76 -0
  12. data/ext/liquid_c/context.c +233 -0
  13. data/ext/liquid_c/context.h +70 -0
  14. data/ext/liquid_c/document_body.c +89 -0
  15. data/ext/liquid_c/document_body.h +59 -0
  16. data/ext/liquid_c/expression.c +116 -0
  17. data/ext/liquid_c/expression.h +24 -0
  18. data/ext/liquid_c/extconf.rb +19 -9
  19. data/ext/liquid_c/intutil.h +22 -0
  20. data/ext/liquid_c/lexer.c +6 -2
  21. data/ext/liquid_c/lexer.h +18 -3
  22. data/ext/liquid_c/liquid.c +76 -6
  23. data/ext/liquid_c/liquid.h +24 -1
  24. data/ext/liquid_c/parse_context.c +76 -0
  25. data/ext/liquid_c/parse_context.h +13 -0
  26. data/ext/liquid_c/parser.c +141 -65
  27. data/ext/liquid_c/parser.h +4 -2
  28. data/ext/liquid_c/raw.c +110 -0
  29. data/ext/liquid_c/raw.h +6 -0
  30. data/ext/liquid_c/resource_limits.c +279 -0
  31. data/ext/liquid_c/resource_limits.h +23 -0
  32. data/ext/liquid_c/stringutil.h +44 -0
  33. data/ext/liquid_c/tokenizer.c +149 -35
  34. data/ext/liquid_c/tokenizer.h +20 -9
  35. data/ext/liquid_c/usage.c +18 -0
  36. data/ext/liquid_c/usage.h +9 -0
  37. data/ext/liquid_c/variable.c +196 -20
  38. data/ext/liquid_c/variable.h +18 -1
  39. data/ext/liquid_c/variable_lookup.c +44 -0
  40. data/ext/liquid_c/variable_lookup.h +8 -0
  41. data/ext/liquid_c/vm.c +588 -0
  42. data/ext/liquid_c/vm.h +25 -0
  43. data/ext/liquid_c/vm_assembler.c +491 -0
  44. data/ext/liquid_c/vm_assembler.h +240 -0
  45. data/ext/liquid_c/vm_assembler_pool.c +97 -0
  46. data/ext/liquid_c/vm_assembler_pool.h +27 -0
  47. data/lib/liquid/c/compile_ext.rb +44 -0
  48. data/lib/liquid/c/version.rb +3 -1
  49. data/lib/liquid/c.rb +225 -46
  50. data/liquid-c.gemspec +16 -10
  51. data/performance/c_profile.rb +23 -0
  52. data/performance.rb +6 -4
  53. data/rakelib/compile.rake +15 -0
  54. data/rakelib/integration_test.rake +43 -0
  55. data/rakelib/performance.rake +43 -0
  56. data/rakelib/rubocop.rake +6 -0
  57. data/rakelib/unit_test.rake +14 -0
  58. data/test/integration_test.rb +11 -0
  59. data/test/liquid_test_helper.rb +21 -0
  60. data/test/test_helper.rb +14 -2
  61. data/test/unit/block_test.rb +130 -0
  62. data/test/unit/context_test.rb +83 -0
  63. data/test/unit/expression_test.rb +186 -0
  64. data/test/unit/gc_stress_test.rb +28 -0
  65. data/test/unit/raw_test.rb +19 -0
  66. data/test/unit/resource_limits_test.rb +50 -0
  67. data/test/unit/tokenizer_test.rb +90 -20
  68. data/test/unit/variable_test.rb +212 -60
  69. metadata +59 -11
  70. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,24 @@
1
+ #if !defined(LIQUID_EXPRESSION_H)
2
+ #define LIQUID_EXPRESSION_H
3
+
4
+ #include "vm_assembler.h"
5
+ #include "parser.h"
6
+
7
+ extern VALUE cLiquidCExpression;
8
+ extern const rb_data_type_t expression_data_type;
9
+
10
+ typedef struct expression {
11
+ vm_assembler_t code;
12
+ } expression_t;
13
+
14
+ extern const rb_data_type_t expression_data_type;
15
+ #define Expression_Get_Struct(obj, sval) TypedData_Get_Struct(obj, expression_t, &expression_data_type, sval)
16
+
17
+ void liquid_define_expression(void);
18
+
19
+ VALUE expression_new(VALUE klass, expression_t **expression_ptr);
20
+ VALUE expression_evaluate(VALUE self, VALUE context);
21
+ VALUE internal_expression_evaluate(expression_t *expression, VALUE context);
22
+
23
+ #endif
24
+
@@ -1,14 +1,24 @@
1
- require 'mkmf'
2
- $CFLAGS << ' -std=c99 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers'
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ $CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
5
+ append_cflags("-fvisibility=hidden")
3
6
  # In Ruby 2.6 and earlier, the Ruby headers did not have struct timespec defined
4
- valid_headers = RbConfig::CONFIG['host_os'] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
5
- pedantic = !ENV['LIQUID_C_PEDANTIC'].to_s.empty?
7
+ valid_headers = RbConfig::CONFIG["host_os"] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
8
+ pedantic = !ENV["LIQUID_C_PEDANTIC"].to_s.empty?
6
9
  if pedantic && valid_headers
7
- $CFLAGS << ' -Werror'
10
+ $CFLAGS << " -Werror"
8
11
  end
9
- compiler = RbConfig::MAKEFILE_CONFIG['CC']
10
- if ENV['DEBUG'] == 'true' && compiler =~ /gcc|g\+\+/
11
- $CFLAGS << ' -fbounds-check'
12
+ if ENV["DEBUG"] == "true"
13
+ append_cflags("-fbounds-check")
14
+ CONFIG["optflags"] = " -O0"
15
+ else
16
+ $CFLAGS << " -DNDEBUG"
12
17
  end
13
- $warnflags.gsub!(/-Wdeclaration-after-statement/, "") if $warnflags
18
+
19
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7.0") # added in 2.7
20
+ $CFLAGS << " -DHAVE_RB_HASH_BULK_INSERT"
21
+ end
22
+
23
+ $warnflags&.gsub!(/-Wdeclaration-after-statement/, "")
14
24
  create_makefile("liquid_c")
@@ -0,0 +1,22 @@
1
+ #ifndef LIQUID_INTUTIL_H
2
+ #define LIQUID_INTUTIL_H
3
+
4
+ #include <stdint.h>
5
+
6
+ static inline unsigned int bytes_to_uint24(const uint8_t *bytes)
7
+ {
8
+ return (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
9
+ }
10
+
11
+ static inline void uint24_to_bytes(unsigned int num, uint8_t *bytes)
12
+ {
13
+ assert(num < (1 << 24));
14
+
15
+ bytes[0] = num >> 16;
16
+ bytes[1] = num >> 8;
17
+ bytes[2] = num;
18
+
19
+ assert(bytes_to_uint24(bytes) == num);
20
+ }
21
+
22
+ #endif
data/ext/liquid_c/lexer.c CHANGED
@@ -1,5 +1,6 @@
1
1
  #include "liquid.h"
2
2
  #include "lexer.h"
3
+ #include "usage.h"
3
4
  #include <stdio.h>
4
5
 
5
6
  const char *symbol_names[TOKEN_END] = {
@@ -59,8 +60,6 @@ inline static const char *scan_past(const char *cur, const char *end, char targe
59
60
  const char *tok_end = str + (n); \
60
61
  token->type = (t); \
61
62
  token->val = str; \
62
- if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \
63
- if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \
64
63
  return (token->val_end = tok_end); \
65
64
  }
66
65
 
@@ -109,6 +108,11 @@ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
109
108
  }
110
109
  }
111
110
 
111
+ // Instrument for bug: https://github.com/Shopify/liquid-c/pull/120
112
+ if (c == '-' && str + 1 < end && str[1] == '.') {
113
+ usage_increment("liquid_c_negative_float_without_integer");
114
+ }
115
+
112
116
  if (ISDIGIT(c) || c == '-') {
113
117
  int has_dot = 0;
114
118
  cur = str;
data/ext/liquid_c/lexer.h CHANGED
@@ -24,9 +24,6 @@ enum lexer_token_type {
24
24
  TOKEN_END = 256
25
25
  };
26
26
 
27
- #define TOKEN_SPACE_PREFIX 0x1
28
- #define TOKEN_SPACE_SUFFIX 0x2
29
- #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX)
30
27
  #define TOKEN_FLOAT_NUMBER 0x4
31
28
 
32
29
  typedef struct lexer_token {
@@ -42,5 +39,23 @@ inline static VALUE token_to_rstr(lexer_token_t token) {
42
39
  return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding);
43
40
  }
44
41
 
42
+ inline static VALUE token_check_for_symbol(lexer_token_t token) {
43
+ return rb_check_symbol_cstr(token.val, token.val_end - token.val, utf8_encoding);
44
+ }
45
+
46
+ inline static VALUE token_to_rstr_leveraging_existing_symbol(lexer_token_t token) {
47
+ VALUE sym = token_check_for_symbol(token);
48
+ if (RB_LIKELY(sym != Qnil))
49
+ return rb_sym2str(sym);
50
+ return token_to_rstr(token);
51
+ }
52
+
53
+ inline static VALUE token_to_rsym(lexer_token_t token) {
54
+ VALUE sym = token_check_for_symbol(token);
55
+ if (RB_LIKELY(sym != Qnil))
56
+ return sym;
57
+ return rb_str_intern(token_to_rstr(token));
58
+ }
59
+
45
60
  #endif
46
61
 
@@ -3,23 +3,93 @@
3
3
  #include "variable.h"
4
4
  #include "lexer.h"
5
5
  #include "parser.h"
6
+ #include "raw.h"
7
+ #include "resource_limits.h"
8
+ #include "expression.h"
9
+ #include "document_body.h"
6
10
  #include "block.h"
11
+ #include "context.h"
12
+ #include "parse_context.h"
13
+ #include "variable_lookup.h"
14
+ #include "vm_assembler_pool.h"
15
+ #include "vm.h"
16
+ #include "usage.h"
17
+
18
+ ID id_evaluate;
19
+ ID id_to_liquid;
20
+ ID id_to_s;
21
+ ID id_call;
22
+ ID id_compile_evaluate;
23
+ ID id_ivar_line_number;
24
+
25
+ VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
26
+ VALUE cLiquidVariableLookup, cLiquidRangeLookup;
27
+ VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
7
28
 
8
- VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
9
29
  rb_encoding *utf8_encoding;
30
+ int utf8_encoding_index;
31
+
32
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *value_name)
33
+ {
34
+ rb_raise(rb_eEncCompatError, "non-UTF8 encoded %s (%"PRIsVALUE") not supported", value_name, rb_obj_encoding(string));
35
+ }
10
36
 
11
- void Init_liquid_c(void)
37
+ RUBY_FUNC_EXPORTED void Init_liquid_c(void)
12
38
  {
39
+ id_evaluate = rb_intern("evaluate");
40
+ id_to_liquid = rb_intern("to_liquid");
41
+ id_to_s = rb_intern("to_s");
42
+ id_call = rb_intern("call");
43
+ id_compile_evaluate = rb_intern("compile_evaluate");
44
+ id_ivar_line_number = rb_intern("@line_number");
45
+
13
46
  utf8_encoding = rb_utf8_encoding();
47
+ utf8_encoding_index = rb_enc_to_index(utf8_encoding);
48
+
14
49
  mLiquid = rb_define_module("Liquid");
50
+ rb_global_variable(&mLiquid);
51
+
15
52
  mLiquidC = rb_define_module_under(mLiquid, "C");
53
+ rb_global_variable(&mLiquidC);
54
+
55
+ cLiquidArgumentError = rb_const_get(mLiquid, rb_intern("ArgumentError"));
56
+ rb_global_variable(&cLiquidArgumentError);
57
+
16
58
  cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError"));
59
+ rb_global_variable(&cLiquidSyntaxError);
60
+
61
+ cMemoryError = rb_const_get(mLiquid, rb_intern("MemoryError"));
62
+ rb_global_variable(&cMemoryError);
63
+
17
64
  cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable"));
65
+ rb_global_variable(&cLiquidVariable);
66
+
18
67
  cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template"));
68
+ rb_global_variable(&cLiquidTemplate);
69
+
70
+ cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
71
+ rb_global_variable(&cLiquidBlockBody);
72
+
73
+ cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
74
+ rb_global_variable(&cLiquidVariableLookup);
75
+
76
+ cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
77
+ rb_global_variable(&cLiquidRangeLookup);
19
78
 
20
- init_liquid_tokenizer();
21
- init_liquid_parser();
22
- init_liquid_variable();
23
- init_liquid_block();
79
+ liquid_define_tokenizer();
80
+ liquid_define_parser();
81
+ liquid_define_raw();
82
+ liquid_define_resource_limits();
83
+ liquid_define_expression();
84
+ liquid_define_variable();
85
+ liquid_define_document_body();
86
+ liquid_define_block_body();
87
+ liquid_define_context();
88
+ liquid_define_parse_context();
89
+ liquid_define_variable_lookup();
90
+ liquid_define_vm_assembler_pool();
91
+ liquid_define_vm_assembler();
92
+ liquid_define_vm();
93
+ liquid_define_usage();
24
94
  }
25
95
 
@@ -5,8 +5,31 @@
5
5
  #include <ruby/encoding.h>
6
6
  #include <stdbool.h>
7
7
 
8
- extern VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
8
+ extern ID id_evaluate;
9
+ extern ID id_to_liquid;
10
+ extern ID id_to_s;
11
+ extern ID id_call;
12
+ extern ID id_compile_evaluate;
13
+ extern ID id_ivar_line_number;
14
+
15
+ extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
16
+ extern VALUE cLiquidVariableLookup, cLiquidRangeLookup;
17
+ extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
9
18
  extern rb_encoding *utf8_encoding;
19
+ extern int utf8_encoding_index;
20
+
21
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *string_name);
22
+
23
+ static inline void check_utf8_encoding(VALUE string, const char *string_name)
24
+ {
25
+ if (RB_UNLIKELY(RB_ENCODING_GET_INLINED(string) != utf8_encoding_index))
26
+ raise_non_utf8_encoding_error(string, string_name);
27
+ }
28
+
29
+ #ifndef RB_LIKELY
30
+ // RB_LIKELY added in Ruby 2.4
31
+ #define RB_LIKELY(x) (__builtin_expect(!!(x), 1))
32
+ #endif
10
33
 
11
34
  #endif
12
35
 
@@ -0,0 +1,76 @@
1
+ #include "parse_context.h"
2
+ #include "document_body.h"
3
+
4
+ static ID id_document_body, id_vm_assembler_pool;
5
+
6
+ static bool parse_context_document_body_initialized_p(VALUE self)
7
+ {
8
+ return RTEST(rb_attr_get(self, id_document_body));
9
+ }
10
+
11
+ static void parse_context_init_document_body(VALUE self)
12
+ {
13
+ VALUE document_body = document_body_new_instance();
14
+ rb_ivar_set(self, id_document_body, document_body);
15
+ }
16
+
17
+ VALUE parse_context_get_document_body(VALUE self)
18
+ {
19
+ assert(parse_context_document_body_initialized_p(self));
20
+
21
+ return rb_ivar_get(self, id_document_body);
22
+ }
23
+
24
+ vm_assembler_pool_t *parse_context_init_vm_assembler_pool(VALUE self)
25
+ {
26
+ assert(!RTEST(rb_attr_get(self, id_vm_assembler_pool)));
27
+
28
+ VALUE vm_assembler_pool_obj = vm_assembler_pool_new();
29
+ rb_ivar_set(self, id_vm_assembler_pool, vm_assembler_pool_obj);
30
+
31
+ vm_assembler_pool_t *vm_assembler_pool;
32
+ VMAssemblerPool_Get_Struct(vm_assembler_pool_obj, vm_assembler_pool);
33
+
34
+ return vm_assembler_pool;
35
+ }
36
+
37
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self)
38
+ {
39
+ VALUE obj = rb_ivar_get(self, id_vm_assembler_pool);
40
+
41
+ if (obj == Qnil) {
42
+ rb_raise(rb_eRuntimeError, "Liquid::ParseContext#start_liquid_c_parsing has not yet been called");
43
+ }
44
+
45
+ vm_assembler_pool_t *vm_assembler_pool;
46
+ VMAssemblerPool_Get_Struct(obj, vm_assembler_pool);
47
+ return vm_assembler_pool;
48
+ }
49
+
50
+ static VALUE parse_context_start_liquid_c_parsing(VALUE self)
51
+ {
52
+ if (RB_UNLIKELY(parse_context_document_body_initialized_p(self))) {
53
+ rb_raise(rb_eRuntimeError, "liquid-c parsing already started for this parse context");
54
+ }
55
+ parse_context_init_document_body(self);
56
+ parse_context_init_vm_assembler_pool(self);
57
+ return Qnil;
58
+ }
59
+
60
+ static VALUE parse_context_cleanup_liquid_c_parsing(VALUE self)
61
+ {
62
+ rb_obj_freeze(rb_ivar_get(self, id_document_body));
63
+ rb_ivar_set(self, id_document_body, Qnil);
64
+ rb_ivar_set(self, id_vm_assembler_pool, Qnil);
65
+ return Qnil;
66
+ }
67
+
68
+ void liquid_define_parse_context(void)
69
+ {
70
+ id_document_body = rb_intern("document_body");
71
+ id_vm_assembler_pool = rb_intern("vm_assembler_pool");
72
+
73
+ VALUE cLiquidParseContext = rb_const_get(mLiquid, rb_intern("ParseContext"));
74
+ rb_define_method(cLiquidParseContext, "start_liquid_c_parsing", parse_context_start_liquid_c_parsing, 0);
75
+ rb_define_method(cLiquidParseContext, "cleanup_liquid_c_parsing", parse_context_cleanup_liquid_c_parsing, 0);
76
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef LIQUID_PARSE_CONTEXT_H
2
+ #define LIQUID_PARSE_CONTEXT_H
3
+
4
+ #include <ruby.h>
5
+ #include <stdbool.h>
6
+ #include "vm_assembler_pool.h"
7
+
8
+ void liquid_define_parse_context(void);
9
+ VALUE parse_context_get_document_body(VALUE self);
10
+
11
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self);
12
+
13
+ #endif
@@ -2,8 +2,8 @@
2
2
  #include "parser.h"
3
3
  #include "lexer.h"
4
4
 
5
- static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals;
6
- static ID idToI, idEvaluate;
5
+ static VALUE empty_string;
6
+ static ID id_to_i, idEvaluate;
7
7
 
8
8
  void init_parser(parser_t *p, const char *str, const char *end)
9
9
  {
@@ -67,79 +67,133 @@ static VALUE parse_number(parser_t *p)
67
67
  return out;
68
68
  }
69
69
 
70
- static VALUE parse_range(parser_t *p)
70
+ static VALUE try_parse_constant_range(parser_t *p)
71
71
  {
72
+ parser_t saved_state = *p;
73
+
72
74
  parser_must_consume(p, TOKEN_OPEN_ROUND);
73
75
 
74
- VALUE args[2];
75
- args[0] = parse_expression(p);
76
+ VALUE begin = try_parse_constant_expression(p);
77
+ if (begin == Qundef) {
78
+ *p = saved_state;
79
+ return Qundef;
80
+ }
76
81
  parser_must_consume(p, TOKEN_DOTDOT);
77
82
 
78
- args[1] = parse_expression(p);
83
+ VALUE end = try_parse_constant_expression(p);
84
+ if (end == Qundef) {
85
+ *p = saved_state;
86
+ return Qundef;
87
+ }
79
88
  parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
89
 
81
- if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
- return rb_class_new_instance(2, args, cLiquidRangeLookup);
90
+ begin = rb_funcall(begin, id_to_i, 0);
91
+ end = rb_funcall(end, id_to_i, 0);
83
92
 
84
- return rb_class_new_instance(2, args, cRange);
93
+ bool exclude_end = false;
94
+ return rb_range_new(begin, end, exclude_end);
85
95
  }
86
96
 
87
- static VALUE parse_variable(parser_t *p)
97
+ static void parse_and_compile_range(parser_t *p, vm_assembler_t *code)
88
98
  {
89
- VALUE name, lookups = rb_ary_new(), lookup;
90
- unsigned long long command_flags = 0;
99
+ VALUE const_range = try_parse_constant_range(p);
100
+ if (const_range != Qundef) {
101
+ vm_assembler_add_push_const(code, const_range);
102
+ return;
103
+ }
91
104
 
105
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
106
+ parse_and_compile_expression(p, code);
107
+ parser_must_consume(p, TOKEN_DOTDOT);
108
+ parse_and_compile_expression(p, code);
109
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
110
+ vm_assembler_add_new_int_range(code);
111
+ }
112
+
113
+ static void parse_and_compile_variable_lookup(parser_t *p, vm_assembler_t *code)
114
+ {
92
115
  if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
- name = parse_expression(p);
116
+ parse_and_compile_expression(p, code);
94
117
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
118
+ vm_assembler_add_find_variable(code);
95
119
  } else {
96
- name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
120
+ VALUE name = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
121
+ vm_assembler_add_find_static_variable(code, name);
97
122
  }
98
123
 
99
124
  while (true) {
100
125
  if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
126
  parser_consume_any(p);
102
- lookup = parse_expression(p);
127
+ parse_and_compile_expression(p, code);
103
128
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
-
105
- rb_ary_push(lookups, lookup);
129
+ vm_assembler_add_lookup_key(code);
106
130
  } else if (p->cur.type == TOKEN_DOT) {
107
- int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
- lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
-
110
- if (has_space_affix)
111
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
-
113
- if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
- command_flags |= 1 << RARRAY_LEN(lookups);
131
+ parser_consume_any(p);
132
+ VALUE key = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
115
133
 
116
- rb_ary_push(lookups, lookup);
134
+ if (rstring_eq(key, "size") || rstring_eq(key, "first") || rstring_eq(key, "last"))
135
+ vm_assembler_add_lookup_command(code, key);
136
+ else
137
+ vm_assembler_add_lookup_const_key(code, key);
117
138
  } else {
118
139
  break;
119
140
  }
120
141
  }
142
+ }
121
143
 
122
- if (RARRAY_LEN(lookups) == 0) {
123
- VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef);
124
- if (literal != Qundef) return literal;
144
+ static VALUE try_parse_literal(parser_t *p)
145
+ {
146
+ if (p->next.type == TOKEN_DOT || p->next.type == TOKEN_OPEN_SQUARE)
147
+ return Qundef;
148
+
149
+ const char *str = p->cur.val;
150
+ long size = p->cur.val_end - str;
151
+ VALUE result = Qundef;
152
+ switch (size) {
153
+ case 3:
154
+ if (memcmp(str, "nil", size) == 0)
155
+ result = Qnil;
156
+ break;
157
+ case 4:
158
+ if (memcmp(str, "null", size) == 0) {
159
+ result = Qnil;
160
+ } else if (memcmp(str, "true", size) == 0) {
161
+ result = Qtrue;
162
+ }
163
+ break;
164
+ case 5:
165
+ switch (*str) {
166
+ case 'f':
167
+ if (memcmp(str, "false", size) == 0)
168
+ result = Qfalse;
169
+ break;
170
+ case 'b':
171
+ if (memcmp(str, "blank", size) == 0)
172
+ result = empty_string;
173
+ break;
174
+ case 'e':
175
+ if (memcmp(str, "empty", size) == 0)
176
+ result = empty_string;
177
+ break;
178
+ }
179
+ break;
125
180
  }
126
-
127
- VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
128
- return rb_class_new_instance(4, args, cLiquidVariableLookup);
181
+ if (result != Qundef)
182
+ parser_consume_any(p);
183
+ return result;
129
184
  }
130
185
 
131
- VALUE parse_expression(parser_t *p)
186
+ VALUE try_parse_constant_expression(parser_t *p)
132
187
  {
133
188
  switch (p->cur.type) {
134
189
  case TOKEN_IDENTIFIER:
135
- case TOKEN_OPEN_SQUARE:
136
- return parse_variable(p);
190
+ return try_parse_literal(p);
137
191
 
138
192
  case TOKEN_NUMBER:
139
193
  return parse_number(p);
140
194
 
141
195
  case TOKEN_OPEN_ROUND:
142
- return parse_range(p);
196
+ return try_parse_constant_range(p);
143
197
 
144
198
  case TOKEN_STRING:
145
199
  {
@@ -149,47 +203,69 @@ VALUE parse_expression(parser_t *p)
149
203
  return token_to_rstr(token);
150
204
  }
151
205
  }
152
-
153
- if (p->cur.type == TOKEN_EOS) {
154
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
155
- } else {
156
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
157
- symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
158
- }
159
- return Qnil;
206
+ return Qundef;
160
207
  }
161
208
 
162
- static VALUE rb_parse_expression(VALUE self, VALUE markup)
209
+ static void parse_and_compile_number(parser_t *p, vm_assembler_t *code)
163
210
  {
164
- StringValue(markup);
165
- char *start = RSTRING_PTR(markup);
211
+ VALUE num = parse_number(p);
212
+ if (RB_FIXNUM_P(num))
213
+ vm_assembler_add_push_fixnum(code, num);
214
+ else
215
+ vm_assembler_add_push_const(code, num);
216
+ return;
217
+ }
166
218
 
167
- parser_t p;
168
- init_parser(&p, start, start + RSTRING_LEN(markup));
219
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code)
220
+ {
221
+ switch (p->cur.type) {
222
+ case TOKEN_IDENTIFIER:
223
+ {
224
+ VALUE literal = try_parse_literal(p);
225
+ if (literal != Qundef) {
226
+ vm_assembler_add_push_literal(code, literal);
227
+ return;
228
+ }
229
+
230
+ __attribute__ ((fallthrough));
231
+ }
232
+ case TOKEN_OPEN_SQUARE:
233
+ parse_and_compile_variable_lookup(p, code);
234
+ return;
169
235
 
170
- if (p.cur.type == TOKEN_EOS)
171
- return Qnil;
236
+ case TOKEN_NUMBER:
237
+ parse_and_compile_number(p, code);
238
+ return;
172
239
 
173
- VALUE expr = parse_expression(&p);
240
+ case TOKEN_OPEN_ROUND:
241
+ parse_and_compile_range(p, code);
242
+ return;
174
243
 
175
- if (p.cur.type != TOKEN_EOS)
176
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
244
+ case TOKEN_STRING:
245
+ {
246
+ lexer_token_t token = parser_consume_any(p);
247
+ token.val++;
248
+ token.val_end--;
249
+ VALUE str = token_to_rstr(token);
250
+ vm_assembler_add_push_const(code, str);
251
+ return;
252
+ }
253
+ }
177
254
 
178
- return expr;
255
+ if (p->cur.type == TOKEN_EOS) {
256
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
257
+ } else {
258
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
259
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
260
+ }
179
261
  }
180
262
 
181
- void init_liquid_parser(void)
263
+ void liquid_define_parser(void)
182
264
  {
183
- idToI = rb_intern("to_i");
265
+ id_to_i = rb_intern("to_i");
184
266
  idEvaluate = rb_intern("evaluate");
185
267
 
186
- cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
187
- cRange = rb_const_get(rb_cObject, rb_intern("Range"));
188
- cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
189
-
190
- VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
191
- rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
192
-
193
- vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS"));
268
+ empty_string = rb_utf8_str_new_literal("");
269
+ rb_global_variable(&empty_string);
194
270
  }
195
271
 
@@ -2,6 +2,7 @@
2
2
  #define LIQUID_PARSER_H
3
3
 
4
4
  #include "lexer.h"
5
+ #include "vm_assembler.h"
5
6
 
6
7
  typedef struct parser {
7
8
  lexer_token_t cur, next;
@@ -14,9 +15,10 @@ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
15
  lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
16
  lexer_token_t parser_consume_any(parser_t *parser);
16
17
 
17
- VALUE parse_expression(parser_t *parser);
18
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code);
19
+ VALUE try_parse_constant_expression(parser_t *p);
18
20
 
19
- void init_liquid_parser(void);
21
+ void liquid_define_parser(void);
20
22
 
21
23
  #endif
22
24