liquid-c 4.0.1 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/liquid.yml +24 -2
  3. data/.gitignore +4 -0
  4. data/.rubocop.yml +14 -0
  5. data/Gemfile +14 -5
  6. data/README.md +29 -5
  7. data/Rakefile +13 -62
  8. data/ext/liquid_c/block.c +488 -60
  9. data/ext/liquid_c/block.h +28 -2
  10. data/ext/liquid_c/c_buffer.c +42 -0
  11. data/ext/liquid_c/c_buffer.h +76 -0
  12. data/ext/liquid_c/context.c +233 -0
  13. data/ext/liquid_c/context.h +70 -0
  14. data/ext/liquid_c/document_body.c +89 -0
  15. data/ext/liquid_c/document_body.h +59 -0
  16. data/ext/liquid_c/expression.c +116 -0
  17. data/ext/liquid_c/expression.h +24 -0
  18. data/ext/liquid_c/extconf.rb +19 -9
  19. data/ext/liquid_c/intutil.h +22 -0
  20. data/ext/liquid_c/lexer.c +6 -2
  21. data/ext/liquid_c/lexer.h +18 -3
  22. data/ext/liquid_c/liquid.c +76 -6
  23. data/ext/liquid_c/liquid.h +24 -1
  24. data/ext/liquid_c/parse_context.c +76 -0
  25. data/ext/liquid_c/parse_context.h +13 -0
  26. data/ext/liquid_c/parser.c +141 -65
  27. data/ext/liquid_c/parser.h +4 -2
  28. data/ext/liquid_c/raw.c +110 -0
  29. data/ext/liquid_c/raw.h +6 -0
  30. data/ext/liquid_c/resource_limits.c +279 -0
  31. data/ext/liquid_c/resource_limits.h +23 -0
  32. data/ext/liquid_c/stringutil.h +44 -0
  33. data/ext/liquid_c/tokenizer.c +149 -35
  34. data/ext/liquid_c/tokenizer.h +20 -9
  35. data/ext/liquid_c/usage.c +18 -0
  36. data/ext/liquid_c/usage.h +9 -0
  37. data/ext/liquid_c/variable.c +196 -20
  38. data/ext/liquid_c/variable.h +18 -1
  39. data/ext/liquid_c/variable_lookup.c +44 -0
  40. data/ext/liquid_c/variable_lookup.h +8 -0
  41. data/ext/liquid_c/vm.c +588 -0
  42. data/ext/liquid_c/vm.h +25 -0
  43. data/ext/liquid_c/vm_assembler.c +491 -0
  44. data/ext/liquid_c/vm_assembler.h +240 -0
  45. data/ext/liquid_c/vm_assembler_pool.c +97 -0
  46. data/ext/liquid_c/vm_assembler_pool.h +27 -0
  47. data/lib/liquid/c/compile_ext.rb +44 -0
  48. data/lib/liquid/c/version.rb +3 -1
  49. data/lib/liquid/c.rb +225 -46
  50. data/liquid-c.gemspec +16 -10
  51. data/performance/c_profile.rb +23 -0
  52. data/performance.rb +6 -4
  53. data/rakelib/compile.rake +15 -0
  54. data/rakelib/integration_test.rake +43 -0
  55. data/rakelib/performance.rake +43 -0
  56. data/rakelib/rubocop.rake +6 -0
  57. data/rakelib/unit_test.rake +14 -0
  58. data/test/integration_test.rb +11 -0
  59. data/test/liquid_test_helper.rb +21 -0
  60. data/test/test_helper.rb +14 -2
  61. data/test/unit/block_test.rb +130 -0
  62. data/test/unit/context_test.rb +83 -0
  63. data/test/unit/expression_test.rb +186 -0
  64. data/test/unit/gc_stress_test.rb +28 -0
  65. data/test/unit/raw_test.rb +19 -0
  66. data/test/unit/resource_limits_test.rb +50 -0
  67. data/test/unit/tokenizer_test.rb +90 -20
  68. data/test/unit/variable_test.rb +212 -60
  69. metadata +59 -11
  70. data/test/liquid_test.rb +0 -11
@@ -0,0 +1,24 @@
1
+ #if !defined(LIQUID_EXPRESSION_H)
2
+ #define LIQUID_EXPRESSION_H
3
+
4
+ #include "vm_assembler.h"
5
+ #include "parser.h"
6
+
7
+ extern VALUE cLiquidCExpression;
8
+ extern const rb_data_type_t expression_data_type;
9
+
10
+ typedef struct expression {
11
+ vm_assembler_t code;
12
+ } expression_t;
13
+
14
+ extern const rb_data_type_t expression_data_type;
15
+ #define Expression_Get_Struct(obj, sval) TypedData_Get_Struct(obj, expression_t, &expression_data_type, sval)
16
+
17
+ void liquid_define_expression(void);
18
+
19
+ VALUE expression_new(VALUE klass, expression_t **expression_ptr);
20
+ VALUE expression_evaluate(VALUE self, VALUE context);
21
+ VALUE internal_expression_evaluate(expression_t *expression, VALUE context);
22
+
23
+ #endif
24
+
@@ -1,14 +1,24 @@
1
- require 'mkmf'
2
- $CFLAGS << ' -std=c99 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers'
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ $CFLAGS << " -std=c11 -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
5
+ append_cflags("-fvisibility=hidden")
3
6
  # In Ruby 2.6 and earlier, the Ruby headers did not have struct timespec defined
4
- valid_headers = RbConfig::CONFIG['host_os'] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
5
- pedantic = !ENV['LIQUID_C_PEDANTIC'].to_s.empty?
7
+ valid_headers = RbConfig::CONFIG["host_os"] !~ /linux/ || Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7")
8
+ pedantic = !ENV["LIQUID_C_PEDANTIC"].to_s.empty?
6
9
  if pedantic && valid_headers
7
- $CFLAGS << ' -Werror'
10
+ $CFLAGS << " -Werror"
8
11
  end
9
- compiler = RbConfig::MAKEFILE_CONFIG['CC']
10
- if ENV['DEBUG'] == 'true' && compiler =~ /gcc|g\+\+/
11
- $CFLAGS << ' -fbounds-check'
12
+ if ENV["DEBUG"] == "true"
13
+ append_cflags("-fbounds-check")
14
+ CONFIG["optflags"] = " -O0"
15
+ else
16
+ $CFLAGS << " -DNDEBUG"
12
17
  end
13
- $warnflags.gsub!(/-Wdeclaration-after-statement/, "") if $warnflags
18
+
19
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.7.0") # added in 2.7
20
+ $CFLAGS << " -DHAVE_RB_HASH_BULK_INSERT"
21
+ end
22
+
23
+ $warnflags&.gsub!(/-Wdeclaration-after-statement/, "")
14
24
  create_makefile("liquid_c")
@@ -0,0 +1,22 @@
1
+ #ifndef LIQUID_INTUTIL_H
2
+ #define LIQUID_INTUTIL_H
3
+
4
+ #include <stdint.h>
5
+
6
+ static inline unsigned int bytes_to_uint24(const uint8_t *bytes)
7
+ {
8
+ return (bytes[0] << 16) | (bytes[1] << 8) | bytes[2];
9
+ }
10
+
11
+ static inline void uint24_to_bytes(unsigned int num, uint8_t *bytes)
12
+ {
13
+ assert(num < (1 << 24));
14
+
15
+ bytes[0] = num >> 16;
16
+ bytes[1] = num >> 8;
17
+ bytes[2] = num;
18
+
19
+ assert(bytes_to_uint24(bytes) == num);
20
+ }
21
+
22
+ #endif
data/ext/liquid_c/lexer.c CHANGED
@@ -1,5 +1,6 @@
1
1
  #include "liquid.h"
2
2
  #include "lexer.h"
3
+ #include "usage.h"
3
4
  #include <stdio.h>
4
5
 
5
6
  const char *symbol_names[TOKEN_END] = {
@@ -59,8 +60,6 @@ inline static const char *scan_past(const char *cur, const char *end, char targe
59
60
  const char *tok_end = str + (n); \
60
61
  token->type = (t); \
61
62
  token->val = str; \
62
- if (str != start) token->flags |= TOKEN_SPACE_PREFIX; \
63
- if (tok_end < end && ISSPACE(*tok_end)) token->flags |= TOKEN_SPACE_SUFFIX; \
64
63
  return (token->val_end = tok_end); \
65
64
  }
66
65
 
@@ -109,6 +108,11 @@ const char *lex_one(const char *start, const char *end, lexer_token_t *token)
109
108
  }
110
109
  }
111
110
 
111
+ // Instrument for bug: https://github.com/Shopify/liquid-c/pull/120
112
+ if (c == '-' && str + 1 < end && str[1] == '.') {
113
+ usage_increment("liquid_c_negative_float_without_integer");
114
+ }
115
+
112
116
  if (ISDIGIT(c) || c == '-') {
113
117
  int has_dot = 0;
114
118
  cur = str;
data/ext/liquid_c/lexer.h CHANGED
@@ -24,9 +24,6 @@ enum lexer_token_type {
24
24
  TOKEN_END = 256
25
25
  };
26
26
 
27
- #define TOKEN_SPACE_PREFIX 0x1
28
- #define TOKEN_SPACE_SUFFIX 0x2
29
- #define TOKEN_SPACE_AFFIX (TOKEN_SPACE_PREFIX | TOKEN_SPACE_SUFFIX)
30
27
  #define TOKEN_FLOAT_NUMBER 0x4
31
28
 
32
29
  typedef struct lexer_token {
@@ -42,5 +39,23 @@ inline static VALUE token_to_rstr(lexer_token_t token) {
42
39
  return rb_enc_str_new(token.val, token.val_end - token.val, utf8_encoding);
43
40
  }
44
41
 
42
+ inline static VALUE token_check_for_symbol(lexer_token_t token) {
43
+ return rb_check_symbol_cstr(token.val, token.val_end - token.val, utf8_encoding);
44
+ }
45
+
46
+ inline static VALUE token_to_rstr_leveraging_existing_symbol(lexer_token_t token) {
47
+ VALUE sym = token_check_for_symbol(token);
48
+ if (RB_LIKELY(sym != Qnil))
49
+ return rb_sym2str(sym);
50
+ return token_to_rstr(token);
51
+ }
52
+
53
+ inline static VALUE token_to_rsym(lexer_token_t token) {
54
+ VALUE sym = token_check_for_symbol(token);
55
+ if (RB_LIKELY(sym != Qnil))
56
+ return sym;
57
+ return rb_str_intern(token_to_rstr(token));
58
+ }
59
+
45
60
  #endif
46
61
 
@@ -3,23 +3,93 @@
3
3
  #include "variable.h"
4
4
  #include "lexer.h"
5
5
  #include "parser.h"
6
+ #include "raw.h"
7
+ #include "resource_limits.h"
8
+ #include "expression.h"
9
+ #include "document_body.h"
6
10
  #include "block.h"
11
+ #include "context.h"
12
+ #include "parse_context.h"
13
+ #include "variable_lookup.h"
14
+ #include "vm_assembler_pool.h"
15
+ #include "vm.h"
16
+ #include "usage.h"
17
+
18
+ ID id_evaluate;
19
+ ID id_to_liquid;
20
+ ID id_to_s;
21
+ ID id_call;
22
+ ID id_compile_evaluate;
23
+ ID id_ivar_line_number;
24
+
25
+ VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
26
+ VALUE cLiquidVariableLookup, cLiquidRangeLookup;
27
+ VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
7
28
 
8
- VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
9
29
  rb_encoding *utf8_encoding;
30
+ int utf8_encoding_index;
31
+
32
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *value_name)
33
+ {
34
+ rb_raise(rb_eEncCompatError, "non-UTF8 encoded %s (%"PRIsVALUE") not supported", value_name, rb_obj_encoding(string));
35
+ }
10
36
 
11
- void Init_liquid_c(void)
37
+ RUBY_FUNC_EXPORTED void Init_liquid_c(void)
12
38
  {
39
+ id_evaluate = rb_intern("evaluate");
40
+ id_to_liquid = rb_intern("to_liquid");
41
+ id_to_s = rb_intern("to_s");
42
+ id_call = rb_intern("call");
43
+ id_compile_evaluate = rb_intern("compile_evaluate");
44
+ id_ivar_line_number = rb_intern("@line_number");
45
+
13
46
  utf8_encoding = rb_utf8_encoding();
47
+ utf8_encoding_index = rb_enc_to_index(utf8_encoding);
48
+
14
49
  mLiquid = rb_define_module("Liquid");
50
+ rb_global_variable(&mLiquid);
51
+
15
52
  mLiquidC = rb_define_module_under(mLiquid, "C");
53
+ rb_global_variable(&mLiquidC);
54
+
55
+ cLiquidArgumentError = rb_const_get(mLiquid, rb_intern("ArgumentError"));
56
+ rb_global_variable(&cLiquidArgumentError);
57
+
16
58
  cLiquidSyntaxError = rb_const_get(mLiquid, rb_intern("SyntaxError"));
59
+ rb_global_variable(&cLiquidSyntaxError);
60
+
61
+ cMemoryError = rb_const_get(mLiquid, rb_intern("MemoryError"));
62
+ rb_global_variable(&cMemoryError);
63
+
17
64
  cLiquidVariable = rb_const_get(mLiquid, rb_intern("Variable"));
65
+ rb_global_variable(&cLiquidVariable);
66
+
18
67
  cLiquidTemplate = rb_const_get(mLiquid, rb_intern("Template"));
68
+ rb_global_variable(&cLiquidTemplate);
69
+
70
+ cLiquidBlockBody = rb_const_get(mLiquid, rb_intern("BlockBody"));
71
+ rb_global_variable(&cLiquidBlockBody);
72
+
73
+ cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
74
+ rb_global_variable(&cLiquidVariableLookup);
75
+
76
+ cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
77
+ rb_global_variable(&cLiquidRangeLookup);
19
78
 
20
- init_liquid_tokenizer();
21
- init_liquid_parser();
22
- init_liquid_variable();
23
- init_liquid_block();
79
+ liquid_define_tokenizer();
80
+ liquid_define_parser();
81
+ liquid_define_raw();
82
+ liquid_define_resource_limits();
83
+ liquid_define_expression();
84
+ liquid_define_variable();
85
+ liquid_define_document_body();
86
+ liquid_define_block_body();
87
+ liquid_define_context();
88
+ liquid_define_parse_context();
89
+ liquid_define_variable_lookup();
90
+ liquid_define_vm_assembler_pool();
91
+ liquid_define_vm_assembler();
92
+ liquid_define_vm();
93
+ liquid_define_usage();
24
94
  }
25
95
 
@@ -5,8 +5,31 @@
5
5
  #include <ruby/encoding.h>
6
6
  #include <stdbool.h>
7
7
 
8
- extern VALUE mLiquid, mLiquidC, cLiquidSyntaxError, cLiquidVariable, cLiquidTemplate;
8
+ extern ID id_evaluate;
9
+ extern ID id_to_liquid;
10
+ extern ID id_to_s;
11
+ extern ID id_call;
12
+ extern ID id_compile_evaluate;
13
+ extern ID id_ivar_line_number;
14
+
15
+ extern VALUE mLiquid, mLiquidC, cLiquidVariable, cLiquidTemplate, cLiquidBlockBody;
16
+ extern VALUE cLiquidVariableLookup, cLiquidRangeLookup;
17
+ extern VALUE cLiquidArgumentError, cLiquidSyntaxError, cMemoryError;
9
18
  extern rb_encoding *utf8_encoding;
19
+ extern int utf8_encoding_index;
20
+
21
+ __attribute__((noreturn)) void raise_non_utf8_encoding_error(VALUE string, const char *string_name);
22
+
23
+ static inline void check_utf8_encoding(VALUE string, const char *string_name)
24
+ {
25
+ if (RB_UNLIKELY(RB_ENCODING_GET_INLINED(string) != utf8_encoding_index))
26
+ raise_non_utf8_encoding_error(string, string_name);
27
+ }
28
+
29
+ #ifndef RB_LIKELY
30
+ // RB_LIKELY added in Ruby 2.4
31
+ #define RB_LIKELY(x) (__builtin_expect(!!(x), 1))
32
+ #endif
10
33
 
11
34
  #endif
12
35
 
@@ -0,0 +1,76 @@
1
+ #include "parse_context.h"
2
+ #include "document_body.h"
3
+
4
+ static ID id_document_body, id_vm_assembler_pool;
5
+
6
+ static bool parse_context_document_body_initialized_p(VALUE self)
7
+ {
8
+ return RTEST(rb_attr_get(self, id_document_body));
9
+ }
10
+
11
+ static void parse_context_init_document_body(VALUE self)
12
+ {
13
+ VALUE document_body = document_body_new_instance();
14
+ rb_ivar_set(self, id_document_body, document_body);
15
+ }
16
+
17
+ VALUE parse_context_get_document_body(VALUE self)
18
+ {
19
+ assert(parse_context_document_body_initialized_p(self));
20
+
21
+ return rb_ivar_get(self, id_document_body);
22
+ }
23
+
24
+ vm_assembler_pool_t *parse_context_init_vm_assembler_pool(VALUE self)
25
+ {
26
+ assert(!RTEST(rb_attr_get(self, id_vm_assembler_pool)));
27
+
28
+ VALUE vm_assembler_pool_obj = vm_assembler_pool_new();
29
+ rb_ivar_set(self, id_vm_assembler_pool, vm_assembler_pool_obj);
30
+
31
+ vm_assembler_pool_t *vm_assembler_pool;
32
+ VMAssemblerPool_Get_Struct(vm_assembler_pool_obj, vm_assembler_pool);
33
+
34
+ return vm_assembler_pool;
35
+ }
36
+
37
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self)
38
+ {
39
+ VALUE obj = rb_ivar_get(self, id_vm_assembler_pool);
40
+
41
+ if (obj == Qnil) {
42
+ rb_raise(rb_eRuntimeError, "Liquid::ParseContext#start_liquid_c_parsing has not yet been called");
43
+ }
44
+
45
+ vm_assembler_pool_t *vm_assembler_pool;
46
+ VMAssemblerPool_Get_Struct(obj, vm_assembler_pool);
47
+ return vm_assembler_pool;
48
+ }
49
+
50
+ static VALUE parse_context_start_liquid_c_parsing(VALUE self)
51
+ {
52
+ if (RB_UNLIKELY(parse_context_document_body_initialized_p(self))) {
53
+ rb_raise(rb_eRuntimeError, "liquid-c parsing already started for this parse context");
54
+ }
55
+ parse_context_init_document_body(self);
56
+ parse_context_init_vm_assembler_pool(self);
57
+ return Qnil;
58
+ }
59
+
60
+ static VALUE parse_context_cleanup_liquid_c_parsing(VALUE self)
61
+ {
62
+ rb_obj_freeze(rb_ivar_get(self, id_document_body));
63
+ rb_ivar_set(self, id_document_body, Qnil);
64
+ rb_ivar_set(self, id_vm_assembler_pool, Qnil);
65
+ return Qnil;
66
+ }
67
+
68
+ void liquid_define_parse_context(void)
69
+ {
70
+ id_document_body = rb_intern("document_body");
71
+ id_vm_assembler_pool = rb_intern("vm_assembler_pool");
72
+
73
+ VALUE cLiquidParseContext = rb_const_get(mLiquid, rb_intern("ParseContext"));
74
+ rb_define_method(cLiquidParseContext, "start_liquid_c_parsing", parse_context_start_liquid_c_parsing, 0);
75
+ rb_define_method(cLiquidParseContext, "cleanup_liquid_c_parsing", parse_context_cleanup_liquid_c_parsing, 0);
76
+ }
@@ -0,0 +1,13 @@
1
+ #ifndef LIQUID_PARSE_CONTEXT_H
2
+ #define LIQUID_PARSE_CONTEXT_H
3
+
4
+ #include <ruby.h>
5
+ #include <stdbool.h>
6
+ #include "vm_assembler_pool.h"
7
+
8
+ void liquid_define_parse_context(void);
9
+ VALUE parse_context_get_document_body(VALUE self);
10
+
11
+ vm_assembler_pool_t *parse_context_get_vm_assembler_pool(VALUE self);
12
+
13
+ #endif
@@ -2,8 +2,8 @@
2
2
  #include "parser.h"
3
3
  #include "lexer.h"
4
4
 
5
- static VALUE cLiquidRangeLookup, cLiquidVariableLookup, cRange, vLiquidExpressionLiterals;
6
- static ID idToI, idEvaluate;
5
+ static VALUE empty_string;
6
+ static ID id_to_i, idEvaluate;
7
7
 
8
8
  void init_parser(parser_t *p, const char *str, const char *end)
9
9
  {
@@ -67,79 +67,133 @@ static VALUE parse_number(parser_t *p)
67
67
  return out;
68
68
  }
69
69
 
70
- static VALUE parse_range(parser_t *p)
70
+ static VALUE try_parse_constant_range(parser_t *p)
71
71
  {
72
+ parser_t saved_state = *p;
73
+
72
74
  parser_must_consume(p, TOKEN_OPEN_ROUND);
73
75
 
74
- VALUE args[2];
75
- args[0] = parse_expression(p);
76
+ VALUE begin = try_parse_constant_expression(p);
77
+ if (begin == Qundef) {
78
+ *p = saved_state;
79
+ return Qundef;
80
+ }
76
81
  parser_must_consume(p, TOKEN_DOTDOT);
77
82
 
78
- args[1] = parse_expression(p);
83
+ VALUE end = try_parse_constant_expression(p);
84
+ if (end == Qundef) {
85
+ *p = saved_state;
86
+ return Qundef;
87
+ }
79
88
  parser_must_consume(p, TOKEN_CLOSE_ROUND);
80
89
 
81
- if (rb_respond_to(args[0], idEvaluate) || rb_respond_to(args[1], idEvaluate))
82
- return rb_class_new_instance(2, args, cLiquidRangeLookup);
90
+ begin = rb_funcall(begin, id_to_i, 0);
91
+ end = rb_funcall(end, id_to_i, 0);
83
92
 
84
- return rb_class_new_instance(2, args, cRange);
93
+ bool exclude_end = false;
94
+ return rb_range_new(begin, end, exclude_end);
85
95
  }
86
96
 
87
- static VALUE parse_variable(parser_t *p)
97
+ static void parse_and_compile_range(parser_t *p, vm_assembler_t *code)
88
98
  {
89
- VALUE name, lookups = rb_ary_new(), lookup;
90
- unsigned long long command_flags = 0;
99
+ VALUE const_range = try_parse_constant_range(p);
100
+ if (const_range != Qundef) {
101
+ vm_assembler_add_push_const(code, const_range);
102
+ return;
103
+ }
91
104
 
105
+ parser_must_consume(p, TOKEN_OPEN_ROUND);
106
+ parse_and_compile_expression(p, code);
107
+ parser_must_consume(p, TOKEN_DOTDOT);
108
+ parse_and_compile_expression(p, code);
109
+ parser_must_consume(p, TOKEN_CLOSE_ROUND);
110
+ vm_assembler_add_new_int_range(code);
111
+ }
112
+
113
+ static void parse_and_compile_variable_lookup(parser_t *p, vm_assembler_t *code)
114
+ {
92
115
  if (parser_consume(p, TOKEN_OPEN_SQUARE).type) {
93
- name = parse_expression(p);
116
+ parse_and_compile_expression(p, code);
94
117
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
118
+ vm_assembler_add_find_variable(code);
95
119
  } else {
96
- name = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
120
+ VALUE name = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
121
+ vm_assembler_add_find_static_variable(code, name);
97
122
  }
98
123
 
99
124
  while (true) {
100
125
  if (p->cur.type == TOKEN_OPEN_SQUARE) {
101
126
  parser_consume_any(p);
102
- lookup = parse_expression(p);
127
+ parse_and_compile_expression(p, code);
103
128
  parser_must_consume(p, TOKEN_CLOSE_SQUARE);
104
-
105
- rb_ary_push(lookups, lookup);
129
+ vm_assembler_add_lookup_key(code);
106
130
  } else if (p->cur.type == TOKEN_DOT) {
107
- int has_space_affix = parser_consume_any(p).flags & TOKEN_SPACE_AFFIX;
108
- lookup = token_to_rstr(parser_must_consume(p, TOKEN_IDENTIFIER));
109
-
110
- if (has_space_affix)
111
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "Unexpected dot");
112
-
113
- if (rstring_eq(lookup, "size") || rstring_eq(lookup, "first") || rstring_eq(lookup, "last"))
114
- command_flags |= 1 << RARRAY_LEN(lookups);
131
+ parser_consume_any(p);
132
+ VALUE key = token_to_rstr_leveraging_existing_symbol(parser_must_consume(p, TOKEN_IDENTIFIER));
115
133
 
116
- rb_ary_push(lookups, lookup);
134
+ if (rstring_eq(key, "size") || rstring_eq(key, "first") || rstring_eq(key, "last"))
135
+ vm_assembler_add_lookup_command(code, key);
136
+ else
137
+ vm_assembler_add_lookup_const_key(code, key);
117
138
  } else {
118
139
  break;
119
140
  }
120
141
  }
142
+ }
121
143
 
122
- if (RARRAY_LEN(lookups) == 0) {
123
- VALUE literal = rb_hash_lookup2(vLiquidExpressionLiterals, name, Qundef);
124
- if (literal != Qundef) return literal;
144
+ static VALUE try_parse_literal(parser_t *p)
145
+ {
146
+ if (p->next.type == TOKEN_DOT || p->next.type == TOKEN_OPEN_SQUARE)
147
+ return Qundef;
148
+
149
+ const char *str = p->cur.val;
150
+ long size = p->cur.val_end - str;
151
+ VALUE result = Qundef;
152
+ switch (size) {
153
+ case 3:
154
+ if (memcmp(str, "nil", size) == 0)
155
+ result = Qnil;
156
+ break;
157
+ case 4:
158
+ if (memcmp(str, "null", size) == 0) {
159
+ result = Qnil;
160
+ } else if (memcmp(str, "true", size) == 0) {
161
+ result = Qtrue;
162
+ }
163
+ break;
164
+ case 5:
165
+ switch (*str) {
166
+ case 'f':
167
+ if (memcmp(str, "false", size) == 0)
168
+ result = Qfalse;
169
+ break;
170
+ case 'b':
171
+ if (memcmp(str, "blank", size) == 0)
172
+ result = empty_string;
173
+ break;
174
+ case 'e':
175
+ if (memcmp(str, "empty", size) == 0)
176
+ result = empty_string;
177
+ break;
178
+ }
179
+ break;
125
180
  }
126
-
127
- VALUE args[4] = {Qfalse, name, lookups, INT2FIX(command_flags)};
128
- return rb_class_new_instance(4, args, cLiquidVariableLookup);
181
+ if (result != Qundef)
182
+ parser_consume_any(p);
183
+ return result;
129
184
  }
130
185
 
131
- VALUE parse_expression(parser_t *p)
186
+ VALUE try_parse_constant_expression(parser_t *p)
132
187
  {
133
188
  switch (p->cur.type) {
134
189
  case TOKEN_IDENTIFIER:
135
- case TOKEN_OPEN_SQUARE:
136
- return parse_variable(p);
190
+ return try_parse_literal(p);
137
191
 
138
192
  case TOKEN_NUMBER:
139
193
  return parse_number(p);
140
194
 
141
195
  case TOKEN_OPEN_ROUND:
142
- return parse_range(p);
196
+ return try_parse_constant_range(p);
143
197
 
144
198
  case TOKEN_STRING:
145
199
  {
@@ -149,47 +203,69 @@ VALUE parse_expression(parser_t *p)
149
203
  return token_to_rstr(token);
150
204
  }
151
205
  }
152
-
153
- if (p->cur.type == TOKEN_EOS) {
154
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
155
- } else {
156
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
157
- symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
158
- }
159
- return Qnil;
206
+ return Qundef;
160
207
  }
161
208
 
162
- static VALUE rb_parse_expression(VALUE self, VALUE markup)
209
+ static void parse_and_compile_number(parser_t *p, vm_assembler_t *code)
163
210
  {
164
- StringValue(markup);
165
- char *start = RSTRING_PTR(markup);
211
+ VALUE num = parse_number(p);
212
+ if (RB_FIXNUM_P(num))
213
+ vm_assembler_add_push_fixnum(code, num);
214
+ else
215
+ vm_assembler_add_push_const(code, num);
216
+ return;
217
+ }
166
218
 
167
- parser_t p;
168
- init_parser(&p, start, start + RSTRING_LEN(markup));
219
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code)
220
+ {
221
+ switch (p->cur.type) {
222
+ case TOKEN_IDENTIFIER:
223
+ {
224
+ VALUE literal = try_parse_literal(p);
225
+ if (literal != Qundef) {
226
+ vm_assembler_add_push_literal(code, literal);
227
+ return;
228
+ }
229
+
230
+ __attribute__ ((fallthrough));
231
+ }
232
+ case TOKEN_OPEN_SQUARE:
233
+ parse_and_compile_variable_lookup(p, code);
234
+ return;
169
235
 
170
- if (p.cur.type == TOKEN_EOS)
171
- return Qnil;
236
+ case TOKEN_NUMBER:
237
+ parse_and_compile_number(p, code);
238
+ return;
172
239
 
173
- VALUE expr = parse_expression(&p);
240
+ case TOKEN_OPEN_ROUND:
241
+ parse_and_compile_range(p, code);
242
+ return;
174
243
 
175
- if (p.cur.type != TOKEN_EOS)
176
- rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p.cur.type]);
244
+ case TOKEN_STRING:
245
+ {
246
+ lexer_token_t token = parser_consume_any(p);
247
+ token.val++;
248
+ token.val_end--;
249
+ VALUE str = token_to_rstr(token);
250
+ vm_assembler_add_push_const(code, str);
251
+ return;
252
+ }
253
+ }
177
254
 
178
- return expr;
255
+ if (p->cur.type == TOKEN_EOS) {
256
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s] is not a valid expression", symbol_names[p->cur.type]);
257
+ } else {
258
+ rb_enc_raise(utf8_encoding, cLiquidSyntaxError, "[:%s, \"%.*s\"] is not a valid expression",
259
+ symbol_names[p->cur.type], (int)(p->cur.val_end - p->cur.val), p->cur.val);
260
+ }
179
261
  }
180
262
 
181
- void init_liquid_parser(void)
263
+ void liquid_define_parser(void)
182
264
  {
183
- idToI = rb_intern("to_i");
265
+ id_to_i = rb_intern("to_i");
184
266
  idEvaluate = rb_intern("evaluate");
185
267
 
186
- cLiquidRangeLookup = rb_const_get(mLiquid, rb_intern("RangeLookup"));
187
- cRange = rb_const_get(rb_cObject, rb_intern("Range"));
188
- cLiquidVariableLookup = rb_const_get(mLiquid, rb_intern("VariableLookup"));
189
-
190
- VALUE cLiquidExpression = rb_const_get(mLiquid, rb_intern("Expression"));
191
- rb_define_singleton_method(cLiquidExpression, "c_parse", rb_parse_expression, 1);
192
-
193
- vLiquidExpressionLiterals = rb_const_get(cLiquidExpression, rb_intern("LITERALS"));
268
+ empty_string = rb_utf8_str_new_literal("");
269
+ rb_global_variable(&empty_string);
194
270
  }
195
271
 
@@ -2,6 +2,7 @@
2
2
  #define LIQUID_PARSER_H
3
3
 
4
4
  #include "lexer.h"
5
+ #include "vm_assembler.h"
5
6
 
6
7
  typedef struct parser {
7
8
  lexer_token_t cur, next;
@@ -14,9 +15,10 @@ lexer_token_t parser_must_consume(parser_t *parser, unsigned char type);
14
15
  lexer_token_t parser_consume(parser_t *parser, unsigned char type);
15
16
  lexer_token_t parser_consume_any(parser_t *parser);
16
17
 
17
- VALUE parse_expression(parser_t *parser);
18
+ void parse_and_compile_expression(parser_t *p, vm_assembler_t *code);
19
+ VALUE try_parse_constant_expression(parser_t *p);
18
20
 
19
- void init_liquid_parser(void);
21
+ void liquid_define_parser(void);
20
22
 
21
23
  #endif
22
24