liquid2 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 42973b8aae08cf4321586ac4cdaf39ecab29e0a2a4b7f26aed278291e41b7645
4
- data.tar.gz: 9c2bca82b7f589cfdccd4a2a5c091ac3cd32613e0c40354b6313d23a6c35bbec
3
+ metadata.gz: 3a68d0ef0f934b9b4fd68d99591e5b0faf9df0e4d408e35c4df1aa2b7b98f4a1
4
+ data.tar.gz: 41d881fe5f30b1f390e2c8297e36ca08f6eb70c1b70225f8418ba255f6297759
5
5
  SHA512:
6
- metadata.gz: fbb3917b6b68ba37aaffbf158aac61d85a577ddf244b12fdad9eaa99e5ea54a0f94b255b3d21381514bb42bbb7c3543c247b7bba38255e95dacc195aedf2f10a
7
- data.tar.gz: a61fd0b11ff0d4ed92bdcd1b8eca60b5997f6881caad131132256f561a0c039cdc33e758601b7dd9da0e02534f53c21a38da6c09e38bc947a4a93834a0413210
6
+ metadata.gz: 53ad1737b2ae742366a0fc26e038c971d18a4f500ce104faac21a94547ac61a9926a5683a5150539e1b968d144b2cb15aa93823db163cbaa07d785b2e9ed3c31
7
+ data.tar.gz: 25e214ff840aacacb4ffed35160295d8fd7dd04ea301c62aec2a490d4c5d54ba72b73f9a7318b2bc0fb1f8c3ed7eea26a737ba5f8ea182b3a36e44996a8b06f4
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## [0.3.1] - 25-06-24
2
+
3
+ - Added support for custom markup delimiters. See [#16](https://github.com/jg-rp/ruby-liquid2/pull/16).
4
+ - Added the `range` filter. `range` is an array slicing filter that takes optional start and end indexes, and an optional step argument, any of which can be negative. See [#18](https://github.com/jg-rp/ruby-liquid2/pull/18).
5
+
1
6
  ## [0.3.0] - 25-05-29
2
7
 
3
8
  - Fixed static analysis of lambda expressions (arrow functions). Previously we were not including lambda parameters in the scope of the expression. See [#12](https://github.com/jg-rp/ruby-liquid2/issues/12).
data/README.md CHANGED
@@ -36,7 +36,7 @@ Liquid templates for Ruby, with some extra features.
36
36
  Add `'liquid2'` to your Gemfile:
37
37
 
38
38
  ```
39
- gem 'liquid2', '~> 0.3.0'
39
+ gem 'liquid2', '~> 0.3.1'
40
40
  ```
41
41
 
42
42
  Or
@@ -231,7 +231,13 @@ Integer and float literals can use scientific notation, like `1.2e3` or `1e-2`.
231
231
 
232
232
  Liquid2 includes implementations of `{% extends %}` and `{% block %}` for template inheritance, `{% with %}` for block scoped variables and `{% macro %}` and `{% call %}` for defining parameterized blocks.
233
233
 
234
- There's also built-in implementations of `sort_numeric` and `json` filters.
234
+ The following filters are included in Liquid2's default environment:
235
+
236
+ - `sort_numeric` - Sorts array elements by runs of digits found in their string representation.
237
+ - `json` - Outputs objects serialized in JSON format.
238
+ - `range`- An alternative to the standard `slice` filter that takes optional start and stop indexes, and an optional step, all of which can be negative.
239
+
240
+ See [Tags and filters](#tags-and-filters) for how to add, remove or alias tags and/or filters from your own Liquid2 environment.
235
241
 
236
242
  ## API
237
243
 
@@ -38,14 +38,20 @@ require_relative "nodes/tags/with"
38
38
  module Liquid2
39
39
  # Template parsing and rendering configuration.
40
40
  #
41
- # A Liquid::Environment is where you might register custom tags and filters,
41
+ # A Liquid2::Environment is where you might register custom tags and filters,
42
42
  # or store global context data that should be available to all templates.
43
43
  #
44
44
  # `Liquid2.parse(source)` is equivalent to `Liquid2::Environment.new.parse(source)`.
45
45
  class Environment
46
46
  attr_reader :tags, :local_namespace_limit, :context_depth_limit, :loop_iteration_limit,
47
47
  :output_stream_limit, :filters, :suppress_blank_control_flow_blocks,
48
- :shorthand_indexes, :falsy_undefined, :arithmetic_operators
48
+ :shorthand_indexes, :falsy_undefined, :arithmetic_operators, :markup_comment_prefix,
49
+ :markup_comment_suffix, :markup_out_end, :markup_out_start, :markup_tag_end,
50
+ :markup_tag_start, :re_tag_name, :re_word, :re_int, :re_float,
51
+ :re_double_quote_string_special, :re_single_quote_string_special, :re_markup_start,
52
+ :re_markup_end, :re_markup_end_chars, :re_up_to_markup_start, :re_punctuation,
53
+ :re_up_to_inline_comment_end, :re_up_to_raw_end, :re_block_comment_chunk,
54
+ :re_up_to_doc_end, :re_line_statement_comment
49
55
 
50
56
  # @param context_depth_limit [Integer] The maximum number of times a render context can
51
57
  # be extended or copied before a `Liquid2::LiquidResourceLimitError`` is raised.
@@ -59,8 +65,23 @@ module Liquid2
59
65
  # `Liquid2::LiquidResourceLimitError`` is raised.
60
66
  # @param loop_iteration_limit [Integer?] The maximum number of loop iterations allowed
61
67
  # before a `LiquidResourceLimitError` is raised.
68
+ # @param markup_comment_prefix [String] The string of characters that indicate the start of a
69
+ # Liquid comment. This should include a single trailing `#`. Additional, variable length
70
+ # hashes will be handled by the tokenizer. It is not possible to change comment syntax to not
71
+ # use `#`.
72
+ # @param markup_comment_suffix [String] The string of characters that indicate the end of a
73
+ # Liquid comment, excluding any hashes.
74
+ # @param markup_out_end [String] The string of characters that indicate the end of a Liquid
75
+ # output statement.
76
+ # @param markup_out_start [String] The string of characters that indicate the start of a Liquid
77
+ # output statement.
78
+ # @param markup_tag_end [String] The string of characters that indicate the end of a Liquid tag.
79
+ # @param markup_tag_start [String] The string of characters that indicate the start of a Liquid
80
+ # tag.
62
81
  # @param output_stream_limit [Integer?] The maximum number of bytes that can be written
63
82
  # to a template's output buffer before a `LiquidResourceLimitError` is raised.
83
+ # @param parser [singleton(Parser)] `Liquid2::Parser` or a subclass of it.
84
+ # @param scanner [singleton(Scanner)] `Liquid2::Scanner` or a subclass of it.
64
85
  # @param shorthand_indexes [bool] When `true`, allow shorthand dotted array indexes as
65
86
  # well as bracketed indexes in variable paths. Defaults to `false`.
66
87
  # @param suppress_blank_control_flow_blocks [bool] When `true`, suppress blank control
@@ -70,15 +91,23 @@ module Liquid2
70
91
  def initialize(
71
92
  arithmetic_operators: false,
72
93
  context_depth_limit: 30,
94
+ falsy_undefined: true,
73
95
  globals: nil,
74
96
  loader: nil,
75
97
  local_namespace_limit: nil,
76
98
  loop_iteration_limit: nil,
99
+ markup_comment_prefix: "{#",
100
+ markup_comment_suffix: "}",
101
+ markup_out_end: "}}",
102
+ markup_out_start: "{{",
103
+ markup_tag_end: "%}",
104
+ markup_tag_start: "{%",
77
105
  output_stream_limit: nil,
106
+ parser: Parser,
107
+ scanner: Scanner,
78
108
  shorthand_indexes: false,
79
109
  suppress_blank_control_flow_blocks: true,
80
- undefined: Undefined,
81
- falsy_undefined: true
110
+ undefined: Undefined
82
111
  )
83
112
  # A mapping of tag names to objects responding to `parse(token, parser)`.
84
113
  @tags = {}
@@ -116,9 +145,17 @@ module Liquid2
116
145
  # before a `LiquidResourceLimitError` is raised.
117
146
  @output_stream_limit = output_stream_limit
118
147
 
148
+ # Liquid2::Scanner or a subclass of it. This is used to tokenize Liquid source
149
+ # text before parsing it.
150
+ @scanner = scanner
151
+
152
+ # Liquid2::Parser or a subclass of it. The parser takes tokens from the scanner
153
+ # and produces an abstract syntax tree.
154
+ @parser = parser
155
+
119
156
  # We reuse the same string scanner when parsing templates for improved performance.
120
157
  # TODO: Is this going to cause issues in multi threaded environments?
121
- @scanner = StringScanner.new("")
158
+ @string_scanner = StringScanner.new("")
122
159
 
123
160
  # When `true`, allow shorthand dotted array indexes as well as bracketed indexes
124
161
  # in variable paths. Defaults to `false`.
@@ -136,6 +173,31 @@ module Liquid2
136
173
  # raise an error when tested for truthiness.
137
174
  @falsy_undefined = falsy_undefined
138
175
 
176
+ # The string of characters that indicate the start of a Liquid output statement.
177
+ @markup_out_start = markup_out_start
178
+
179
+ # The string of characters that indicate the end of a Liquid output statement.
180
+ @markup_out_end = markup_out_end
181
+
182
+ # The string of characters that indicate the start of a Liquid tag.
183
+ @markup_tag_start = markup_tag_start
184
+
185
+ # The string of characters that indicate the end of a Liquid tag.
186
+ @markup_tag_end = markup_tag_end
187
+
188
+ # The string of characters that indicate the start of a Liquid comment. This should
189
+ # include a single trailing `#`. Additional, variable length hashes will be handled
190
+ # by the tokenizer. It is not possible to change comment syntax to not use `#`.
191
+ @markup_comment_prefix = markup_comment_prefix
192
+
193
+ # The string of characters that indicate the end of a Liquid comment, excluding any
194
+ # hashes.
195
+ @markup_comment_suffix = markup_comment_suffix
196
+
197
+ # You might need to override `setup_scanner` if you've specified custom markup
198
+ # delimiters and they conflict with standard punctuation.
199
+ setup_scanner
200
+
139
201
  # Override `setup_tags_and_filters` in environment subclasses to configure custom
140
202
  # tags and/or filters.
141
203
  setup_tags_and_filters
@@ -145,11 +207,13 @@ module Liquid2
145
207
  # @param source [String] template source text.
146
208
  # @return [Template]
147
209
  def parse(source, name: "", path: nil, up_to_date: nil, globals: nil, overlay: nil)
148
- Template.new(self,
149
- source,
150
- Parser.parse(self, source, scanner: @scanner),
151
- name: name, path: path, up_to_date: up_to_date,
152
- globals: make_globals(globals), overlay: overlay)
210
+ Template.new(
211
+ self,
212
+ source,
213
+ @parser.new(self, @scanner.tokenize(self, source, @string_scanner), source.length).parse,
214
+ name: name, path: path, up_to_date: up_to_date,
215
+ globals: make_globals(globals), overlay: overlay
216
+ )
153
217
  rescue LiquidError => e
154
218
  e.source = source unless e.source
155
219
  e.template_name = name unless e.template_name || name.empty?
@@ -262,6 +326,7 @@ module Liquid2
262
326
  register_filter("newline_to_br", Liquid2::Filters.method(:newline_to_br))
263
327
  register_filter("plus", Liquid2::Filters.method(:plus))
264
328
  register_filter("prepend", Liquid2::Filters.method(:prepend))
329
+ register_filter("range", Liquid2::Filters.method(:better_slice))
265
330
  register_filter("reject", Liquid2::Filters.method(:reject))
266
331
  register_filter("remove_first", Liquid2::Filters.method(:remove_first))
267
332
  register_filter("remove_last", Liquid2::Filters.method(:remove_last))
@@ -292,6 +357,51 @@ module Liquid2
292
357
  register_filter("where", Liquid2::Filters.method(:where))
293
358
  end
294
359
 
360
+ # Compile regular expressions for use by the tokenizer attached to this environment.
361
+ def setup_scanner
362
+ # A regex pattern matching Liquid tag names. Should include `#` for inline comments.
363
+ @re_tag_name = /(?:[a-z][a-z_0-9]*|#)/
364
+
365
+ # A regex pattern matching keywords and/or variable/path names. Replace this if
366
+ # you want to disable Unicode characters in identifiers, for example.
367
+ @re_word = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
368
+
369
+ # Patterns matching literal integers and floats, possibly in scientific notation.
370
+ # You could simplify these to disable scientific notation.
371
+ @re_int = /-?\d+(?:[eE]\+?\d+)?/
372
+ @re_float = /((?:-?\d+\.\d+(?:[eE][+-]?\d+)?)|(-?\d+[eE]-\d+))/
373
+
374
+ # Patterns matching escape sequences, interpolation and end of string in string literals.
375
+ # You could remove `\$` from these to disable string interpolation.
376
+ @re_double_quote_string_special = /[\\"\$]/
377
+ @re_single_quote_string_special = /[\\'\$]/
378
+
379
+ # rubocop: disable Layout/LineLength
380
+
381
+ # A regex pattern matching the start of some Liquid markup. Could be the start of an
382
+ # output statement, tag or comment. Traditionally `{{`, `{%` and `{#`, respectively.
383
+ @re_markup_start = /#{Regexp.escape(@markup_out_start)}|#{Regexp.escape(@markup_tag_start)}|#{Regexp.escape(@markup_comment_prefix)}/
384
+
385
+ # A regex pattern matching the end of some Liquid markup. Could be the end of
386
+ # an output statement or tag. Traditionally `}}`, `%}`, respectively.
387
+ @re_markup_end = /#{Regexp.escape(@markup_out_end)}|#{Regexp.escape(@markup_tag_end)}/
388
+
389
+ # A regex pattern matching any one of the possible characters ending some Liquid
390
+ # markup. This is used to detect incomplete and malformed markup and provide
391
+ # helpful error messages.
392
+ @re_markup_end_chars = /[#{Regexp.escape((@markup_out_end + @markup_tag_end).each_char.uniq.join)}]/
393
+
394
+ @re_up_to_markup_start = /(?=#{Regexp.escape(@markup_out_start)}|#{Regexp.escape(@markup_tag_start)}|#{Regexp.escape(@markup_comment_prefix)})/
395
+ @re_punctuation = %r{(?!#{@re_markup_end})(\?|\[|\]|\|{1,2}|\.{1,2}|,|:|\(|\)|[<>=!]+|[+\-%*/]+(?!#{@re_markup_end_chars}))}
396
+ @re_up_to_inline_comment_end = /(?=([+\-~])?#{Regexp.escape(@markup_tag_end)})/
397
+ @re_up_to_raw_end = /(?=(#{Regexp.escape(@markup_tag_start)}[+\-~]?\s*endraw\s*[+\-~]?#{Regexp.escape(@markup_tag_end)}))/
398
+ @re_block_comment_chunk = /(#{Regexp.escape(@markup_tag_start)}[+\-~]?\s*(comment|raw|endcomment|endraw)\s*[+\-~]?#{Regexp.escape(@markup_tag_end)})/
399
+ @re_up_to_doc_end = /(?=(#{Regexp.escape(@markup_tag_start)}[+\-~]?\s*enddoc\s*[+\-~]?#{Regexp.escape(@markup_tag_end)}))/
400
+ @re_line_statement_comment = /(?=([\r\n]+|-?#{Regexp.escape(@markup_tag_end)}))/
401
+
402
+ # rubocop: enable Layout/LineLength
403
+ end
404
+
295
405
  def undefined(name, node: nil)
296
406
  @undefined.new(name, node: node)
297
407
  end
@@ -13,5 +13,45 @@ module Liquid2
13
13
  Liquid2.to_s(left).slice(to_integer(start), to_integer(length)) || ""
14
14
  end
15
15
  end
16
+
17
+ def self.better_slice(
18
+ left,
19
+ start_ = :undefined, stop_ = :undefined, step_ = :undefined,
20
+ start: :undefined, stop: :undefined, step: :undefined
21
+ )
22
+ # Give priority to keyword arguments, default to nil if neither are given.
23
+ start = start_ == :undefined ? nil : start_ if start == :undefined
24
+ stop = stop_ == :undefined ? nil : stop_ if stop == :undefined
25
+ step = step_ == :undefined ? nil : step_ if step == :undefined
26
+
27
+ step = to_integer(step || 1)
28
+ length = left.length
29
+ return [] if length.zero? || step.zero?
30
+
31
+ start = to_integer(start) unless start.nil?
32
+ stop = to_integer(stop) unless stop.nil?
33
+
34
+ normalized_start = if start.nil?
35
+ step.negative? ? length - 1 : 0
36
+ elsif start&.negative?
37
+ [length + start, 0].max
38
+ else
39
+ [start, length - 1].min
40
+ end
41
+
42
+ normalized_stop = if stop.nil?
43
+ step.negative? ? -1 : length
44
+ elsif stop&.negative?
45
+ [length + stop, -1].max
46
+ else
47
+ [stop, length].min
48
+ end
49
+
50
+ # This does not work with Ruby 3.1
51
+ # left[(normalized_start...normalized_stop).step(step)]
52
+ #
53
+ # But this does.
54
+ (normalized_start...normalized_stop).step(step).map { |i| left[i] }
55
+ end
16
56
  end
17
57
  end
@@ -25,11 +25,12 @@ module Liquid2
25
25
  # Liquid template parser.
26
26
  class Parser
27
27
  # Parse Liquid template text into a syntax tree.
28
+ # @param env [Environment]
28
29
  # @param source [String]
29
30
  # @return [Array[Node | String]]
30
31
  def self.parse(env, source, scanner: nil)
31
32
  new(env,
32
- Liquid2::Scanner.tokenize(source, scanner || StringScanner.new("")),
33
+ Liquid2::Scanner.tokenize(env, source, scanner || StringScanner.new("")),
33
34
  source.length).parse
34
35
  end
35
36
 
@@ -824,15 +825,7 @@ module Liquid2
824
825
  return parse_partial_arrow_function(expr)
825
826
  end
826
827
 
827
- unless TERMINATE_GROUPED_EXPRESSION.member?(kind)
828
- unless BINARY_OPERATORS.member?(kind)
829
- raise LiquidSyntaxError.new("expected an infix operator, found #{kind}", current)
830
- end
831
-
832
- expr = parse_infix_expression(expr)
833
- end
834
-
835
- eat(:token_rparen)
828
+ eat(:token_rparen, "unbalanced parentheses")
836
829
  expr
837
830
  end
838
831
 
@@ -12,14 +12,6 @@ module Liquid2
12
12
  class Scanner
13
13
  attr_reader :tokens
14
14
 
15
- RE_LINE_SPACE = /[ \t]+/
16
- RE_WORD = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
17
- RE_INT = /-?\d+(?:[eE]\+?\d+)?/
18
- RE_FLOAT = /((?:-?\d+\.\d+(?:[eE][+-]?\d+)?)|(-?\d+[eE]-\d+))/
19
- RE_PUNCTUATION = %r{\?|\[|\]|\|{1,2}|\.{1,2}|,|:|\(|\)|[<>=!]+|[+\-%*/]+(?![\}%])}
20
- RE_SINGLE_QUOTE_STRING_SPECIAL = /[\\'\$]/
21
- RE_DOUBLE_QUOTE_STRING_SPECIAL = /[\\"\$]/
22
-
23
15
  # Keywords and symbols that get their own token kind.
24
16
  TOKEN_MAP = {
25
17
  "true" => :token_true,
@@ -68,15 +60,16 @@ module Liquid2
68
60
  "**" => :token_pow
69
61
  }.freeze
70
62
 
71
- def self.tokenize(source, scanner)
72
- lexer = new(source, scanner)
63
+ def self.tokenize(env, source, scanner)
64
+ lexer = new(env, source, scanner)
73
65
  lexer.run
74
66
  lexer.tokens
75
67
  end
76
68
 
69
+ # @param env [Environment]
77
70
  # @param source [String]
78
71
  # @param scanner [StringScanner]
79
- def initialize(source, scanner)
72
+ def initialize(env, source, scanner)
80
73
  @source = source
81
74
  @scanner = scanner
82
75
  @scanner.string = @source
@@ -84,8 +77,33 @@ module Liquid2
84
77
  # A pointer to the start of the current token.
85
78
  @start = 0
86
79
 
87
- # Tokens are arrays of (kind, value, start index)
80
+ # Tokens are arrays of (kind, value, start index).
81
+ # Sometimes we set value to `nil` when the symbol is unambiguous.
88
82
  @tokens = [] # : Array[[Symbol, String?, Integer]]
83
+
84
+ @s_out_start = env.markup_out_start
85
+ @s_out_end = env.markup_out_end
86
+ @s_tag_start = env.markup_tag_start
87
+ @s_tag_end = env.markup_tag_end
88
+ @s_comment_prefix = env.markup_comment_prefix
89
+ @s_comment_suffix = env.markup_comment_suffix
90
+
91
+ @re_tag_name = env.re_tag_name
92
+ @re_word = env.re_word
93
+ @re_int = env.re_int
94
+ @re_float = env.re_float
95
+ @re_double_quote_string_special = env.re_double_quote_string_special
96
+ @re_single_quote_string_special = env.re_single_quote_string_special
97
+ @re_markup_start = env.re_markup_start
98
+ @re_markup_end = env.re_markup_end
99
+ @re_markup_end_chars = env.re_markup_end_chars
100
+ @re_up_to_markup_start = env.re_up_to_markup_start
101
+ @re_punctuation = env.re_punctuation
102
+ @re_up_to_inline_comment_end = env.re_up_to_inline_comment_end
103
+ @re_up_to_raw_end = env.re_up_to_raw_end
104
+ @re_block_comment_chunk = env.re_block_comment_chunk
105
+ @re_up_to_doc_end = env.re_up_to_doc_end
106
+ @re_line_statement_comment = env.re_line_statement_comment
89
107
  end
90
108
 
91
109
  def run
@@ -108,14 +126,13 @@ module Liquid2
108
126
  end
109
127
 
110
128
  def skip_line_trivia
111
- @start = @scanner.pos if @scanner.skip(RE_LINE_SPACE)
129
+ @start = @scanner.pos if @scanner.skip(/[ \t]+/)
112
130
  end
113
131
 
114
132
  def accept_whitespace_control
115
133
  ch = @scanner.peek(1)
116
134
 
117
- case ch
118
- when "-", "+", "~"
135
+ if ch == "-" || ch == "+" || ch == "~" # rubocop: disable Style/MultipleComparison
119
136
  @scanner.pos += 1
120
137
  @tokens << [:token_whitespace_control, ch, @start]
121
138
  @start = @scanner.pos
@@ -126,22 +143,22 @@ module Liquid2
126
143
  end
127
144
 
128
145
  def lex_markup
129
- case @scanner.scan(/\{[\{%#]/)
130
- when "{#"
146
+ case @scanner.scan(@re_markup_start)
147
+ when @s_comment_prefix
131
148
  :lex_comment
132
- when "{{"
149
+ when @s_out_start
133
150
  @tokens << [:token_output_start, nil, @start]
134
151
  @start = @scanner.pos
135
152
  accept_whitespace_control
136
153
  skip_trivia
137
154
  :lex_expression
138
- when "{%"
155
+ when @s_tag_start
139
156
  @tokens << [:token_tag_start, nil, @start]
140
157
  @start = @scanner.pos
141
158
  accept_whitespace_control
142
159
  skip_trivia
143
160
 
144
- if (tag_name = @scanner.scan(/(?:[a-z][a-z_0-9]*|#)/))
161
+ if (tag_name = @scanner.scan(@re_tag_name))
145
162
  @tokens << [:token_tag_name, tag_name, @start]
146
163
  @start = @scanner.pos
147
164
 
@@ -173,8 +190,7 @@ module Liquid2
173
190
  :lex_expression
174
191
  end
175
192
  else
176
- if @scanner.skip_until(/\{[\{%#]/)
177
- @scanner.pos -= 2
193
+ if @scanner.skip_until(@re_up_to_markup_start)
178
194
  @tokens << [:token_other, @source.byteslice(@start...@scanner.pos), @start]
179
195
  @start = @scanner.pos
180
196
  :lex_markup
@@ -192,26 +208,27 @@ module Liquid2
192
208
  def lex_expression
193
209
  loop do
194
210
  skip_trivia
195
- if (value = @scanner.scan(RE_FLOAT))
211
+ if (value = @scanner.scan(@re_float))
196
212
  @tokens << [:token_float, value, @start]
197
213
  @start = @scanner.pos
198
- elsif (value = @scanner.scan(RE_INT))
214
+ elsif (value = @scanner.scan(@re_int))
199
215
  @tokens << [:token_int, value, @start]
200
216
  @start = @scanner.pos
201
- elsif (value = @scanner.scan(RE_PUNCTUATION))
217
+ elsif (value = @scanner.scan(@re_punctuation))
202
218
  @tokens << [TOKEN_MAP[value] || :token_unknown, value, @start]
203
219
  @start = @scanner.pos
204
- elsif (value = @scanner.scan(RE_WORD))
220
+ elsif (value = @scanner.scan(@re_word))
205
221
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
206
222
  @start = @scanner.pos
207
223
  else
208
224
  case @scanner.get_byte
209
225
  when "'"
210
226
  @start = @scanner.pos
211
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
227
+ scan_string("'", :token_single_quote_string, @re_single_quote_string_special)
212
228
  when "\""
213
229
  @start = @scanner.pos
214
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
230
+ scan_string("\"", :token_double_quote_string,
231
+ @re_double_quote_string_special)
215
232
  else
216
233
  @scanner.pos -= 1
217
234
  break
@@ -222,17 +239,17 @@ module Liquid2
222
239
  accept_whitespace_control
223
240
 
224
241
  # Miro benchmarks show no performance gain using scan_byte and peek_byte over scan here.
225
- case @scanner.scan(/[\}%]\}/)
226
- when "}}"
242
+ case @scanner.scan(@re_markup_end)
243
+ when @s_out_end
227
244
  @tokens << [:token_output_end, nil, @start]
228
- when "%}"
245
+ when @s_tag_end
229
246
  @tokens << [:token_tag_end, nil, @start]
230
247
  else
231
248
  # Unexpected token
232
249
  return nil if @scanner.eos?
233
250
 
234
- if (ch = @scanner.scan(/[\}%]/))
235
- raise LiquidSyntaxError.new("missing \"}\" or \"%\" detected",
251
+ if (ch = @scanner.scan(@re_markup_end_chars))
252
+ raise LiquidSyntaxError.new("missing markup delimiter detected",
236
253
  [:token_unknown, ch, @start])
237
254
  end
238
255
 
@@ -255,8 +272,7 @@ module Liquid2
255
272
 
256
273
  wc = accept_whitespace_control
257
274
 
258
- if @scanner.skip_until(/([+\-~]?)(\#{#{hash_count}}\})/)
259
- @scanner.pos -= @scanner[0]&.length || 0
275
+ if @scanner.skip_until(/(?=([+\-~]?)(\#{#{hash_count}}#{Regexp.escape(@s_comment_suffix)}))/)
260
276
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
261
277
  @start = @scanner.pos
262
278
 
@@ -282,18 +298,17 @@ module Liquid2
282
298
  end
283
299
 
284
300
  def lex_inside_inline_comment
285
- if @scanner.skip_until(/([+\-~])?%\}/)
286
- @scanner.pos -= @scanner.captures&.first.nil? ? 2 : 3
301
+ if @scanner.skip_until(@re_up_to_inline_comment_end)
287
302
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
288
303
  @start = @scanner.pos
289
304
  end
290
305
 
291
306
  accept_whitespace_control
292
307
 
293
- case @scanner.scan(/[\}%]\}/)
294
- when "}}"
308
+ case @scanner.scan(@re_markup_end)
309
+ when @s_out_end
295
310
  @tokens << [:token_output_end, nil, @start]
296
- when "%}"
311
+ when @s_tag_end
297
312
  @tokens << [:token_tag_end, nil, @start]
298
313
  else
299
314
  # Unexpected token
@@ -310,17 +325,16 @@ module Liquid2
310
325
  skip_trivia
311
326
  accept_whitespace_control
312
327
 
313
- case @scanner.scan(/[\}%]\}/)
314
- when "}}"
328
+ case @scanner.scan(@re_markup_end)
329
+ when @s_out_end
315
330
  @tokens << [:token_output_end, nil, @start]
316
331
  @start = @scanner.pos
317
- when "%}"
332
+ when @s_tag_end
318
333
  @tokens << [:token_tag_end, nil, @start]
319
334
  @start = @scanner.pos
320
335
  end
321
336
 
322
- if @scanner.skip_until(/(\{%[+\-~]?\s*endraw\s*[+\-~]?%\})/)
323
- @scanner.pos -= @scanner.captures&.first&.length || raise
337
+ if @scanner.skip_until(@re_up_to_raw_end)
324
338
  @tokens << [:token_raw, @source.byteslice(@start...@scanner.pos), @start]
325
339
  @start = @scanner.pos
326
340
  end
@@ -332,11 +346,11 @@ module Liquid2
332
346
  skip_trivia
333
347
  accept_whitespace_control
334
348
 
335
- case @scanner.scan(/[\}%]\}/)
336
- when "}}"
349
+ case @scanner.scan(@re_markup_end)
350
+ when @s_out_end
337
351
  @tokens << [:token_output_end, nil, @start]
338
352
  @start = @scanner.pos
339
- when "%}"
353
+ when @s_tag_end
340
354
  @tokens << [:token_tag_end, nil, @start]
341
355
  @start = @scanner.pos
342
356
  end
@@ -345,9 +359,7 @@ module Liquid2
345
359
  raw_depth = 0
346
360
 
347
361
  loop do
348
- unless @scanner.skip_until(/(\{%[+\-~]?\s*(comment|raw|endcomment|endraw)\s*[+\-~]?%\})/)
349
- break
350
- end
362
+ break unless @scanner.skip_until(@re_block_comment_chunk)
351
363
 
352
364
  tag_name = @scanner.captures&.last || raise
353
365
 
@@ -380,17 +392,16 @@ module Liquid2
380
392
  skip_trivia
381
393
  accept_whitespace_control
382
394
 
383
- case @scanner.scan(/[\}%]\}/)
384
- when "}}"
395
+ case @scanner.scan(@re_markup_end)
396
+ when @s_out_end
385
397
  @tokens << [:token_output_end, nil, @start]
386
398
  @start = @scanner.pos
387
- when "%}"
399
+ when @s_tag_end
388
400
  @tokens << [:token_tag_end, nil, @start]
389
401
  @start = @scanner.pos
390
402
  end
391
403
 
392
- if @scanner.skip_until(/(\{%[+\-~]?\s*enddoc\s*[+\-~]?%\})/)
393
- @scanner.pos -= @scanner.captures&.first&.length || raise
404
+ if @scanner.skip_until(@re_up_to_doc_end)
394
405
  @tokens << [:token_doc, @source.byteslice(@start...@scanner.pos), @start]
395
406
  @start = @scanner.pos
396
407
  end
@@ -401,21 +412,19 @@ module Liquid2
401
412
  def lex_line_statements
402
413
  skip_trivia # Leading newlines are OK
403
414
 
404
- if (tag_name = @scanner.scan(/(?:[a-z][a-z_0-9]*|#)/))
415
+ if (tag_name = @scanner.scan(@re_tag_name))
405
416
  @tokens << [:token_tag_start, nil, @start]
406
417
  @tokens << [:token_tag_name, tag_name, @start]
407
418
  @start = @scanner.pos
408
419
 
409
- if tag_name == "#" && @scanner.scan_until(/([\r\n]+|-?%\})/)
410
- @scanner.pos -= @scanner.captures&.first&.length || raise
420
+ if tag_name == "#" && @scanner.scan_until(@re_line_statement_comment)
411
421
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
412
422
  @start = @scanner.pos
413
423
  @tokens << [:token_tag_end, nil, @start]
414
424
  :lex_line_statements
415
425
 
416
- elsif tag_name == "comment" && @scanner.scan_until(/(endcomment)/)
426
+ elsif tag_name == "comment" && @scanner.scan_until(/(?=endcomment)/)
417
427
  @tokens << [:token_tag_end, nil, @start]
418
- @scanner.pos -= @scanner.captures&.first&.length || raise
419
428
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
420
429
  @start = @scanner.pos
421
430
  :lex_line_statements
@@ -424,11 +433,11 @@ module Liquid2
424
433
  end
425
434
  else
426
435
  accept_whitespace_control
427
- case @scanner.scan(/[\}%]\}/)
428
- when "}}"
436
+ case @scanner.scan(@re_markup_end)
437
+ when @s_out_end
429
438
  @tokens << [:token_output_end, nil, @start]
430
439
  @start = @scanner.pos
431
- when "%}"
440
+ when @s_tag_end
432
441
  @tokens << [:token_tag_end, nil, @start]
433
442
  @start = @scanner.pos
434
443
  end
@@ -444,26 +453,26 @@ module Liquid2
444
453
  case @scanner.get_byte
445
454
  when "'"
446
455
  @start = @scanner.pos
447
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
456
+ scan_string("'", :token_single_quote_string, @re_single_quote_string_special)
448
457
  when "\""
449
458
  @start = @scanner.pos
450
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
459
+ scan_string("\"", :token_double_quote_string, @re_double_quote_string_special)
451
460
  when nil
452
461
  # End of scanner. Unclosed expression or string literal.
453
462
  break
454
463
 
455
464
  else
456
465
  @scanner.pos -= 1
457
- if (value = @scanner.scan(RE_FLOAT))
466
+ if (value = @scanner.scan(@re_float))
458
467
  @tokens << [:token_float, value, @start]
459
468
  @start = @scanner.pos
460
- elsif (value = @scanner.scan(RE_INT))
469
+ elsif (value = @scanner.scan(@re_int))
461
470
  @tokens << [:token_int, value, @start]
462
471
  @start = @scanner.pos
463
- elsif (value = @scanner.scan(RE_PUNCTUATION))
472
+ elsif (value = @scanner.scan(@re_punctuation))
464
473
  @tokens << [TOKEN_MAP[value] || raise, nil, @start]
465
474
  @start = @scanner.pos
466
- elsif (value = @scanner.scan(RE_WORD))
475
+ elsif (value = @scanner.scan(@re_word))
467
476
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
468
477
  @start = @scanner.pos
469
478
  elsif @scanner.scan(/(\r?\n)+/)
@@ -475,11 +484,11 @@ module Liquid2
475
484
  # End of the line statement and enclosing `liquid` tag.
476
485
  @tokens << [:token_tag_end, nil, @start]
477
486
  accept_whitespace_control
478
- case @scanner.scan(/[\}%]\}/)
479
- when "}}"
487
+ case @scanner.scan(@re_markup_end)
488
+ when @s_out_end
480
489
  @tokens << [:token_output_end, nil, @start]
481
490
  @start = @scanner.pos
482
- when "%}"
491
+ when @s_tag_end
483
492
  @tokens << [:token_tag_end, nil, @start]
484
493
  @start = @scanner.pos
485
494
  end
@@ -536,10 +545,12 @@ module Liquid2
536
545
  case @scanner.get_byte
537
546
  when "'"
538
547
  @start = @scanner.pos
539
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
548
+ scan_string("'", :token_single_quote_string,
549
+ @re_single_quote_string_special)
540
550
  when "\""
541
551
  @start = @scanner.pos
542
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
552
+ scan_string("\"", :token_double_quote_string,
553
+ @re_double_quote_string_special)
543
554
  when "}"
544
555
  @tokens << [:token_string_interpol_end, nil, @start]
545
556
  @start = @scanner.pos
@@ -550,16 +561,16 @@ module Liquid2
550
561
  [symbol, nil, start_of_string])
551
562
  else
552
563
  @scanner.pos -= 1
553
- if (value = @scanner.scan(RE_FLOAT))
564
+ if (value = @scanner.scan(@re_float))
554
565
  @tokens << [:token_float, value, @start]
555
566
  @start = @scanner.pos
556
- elsif (value = @scanner.scan(RE_INT))
567
+ elsif (value = @scanner.scan(@re_int))
557
568
  @tokens << [:token_int, value, @start]
558
569
  @start = @scanner.pos
559
- elsif (value = @scanner.scan(RE_PUNCTUATION))
570
+ elsif (value = @scanner.scan(@re_punctuation))
560
571
  @tokens << [TOKEN_MAP[value] || raise, nil, @start]
561
572
  @start = @scanner.pos
562
- elsif (value = @scanner.scan(RE_WORD))
573
+ elsif (value = @scanner.scan(@re_word))
563
574
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
564
575
  @start = @scanner.pos
565
576
  else
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Liquid2
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -48,17 +48,11 @@ env = fixture.env
48
48
  source = fixture.templates["index.liquid"]
49
49
  template = env.get_template("index.liquid")
50
50
 
51
- # scanner = StringScanner.new("")
52
-
53
51
  Benchmark.ips do |x|
54
52
  # Configure the number of seconds used during
55
53
  # the warmup phase (default 2) and calculation phase (default 5)
56
54
  x.config(warmup: 2, time: 5)
57
55
 
58
- # x.report("tokenize (#{fixture.name}):") do
59
- # Liquid2::Scanner.tokenize(source, scanner)
60
- # end
61
-
62
56
  x.report("parse (#{fixture.name}):") do
63
57
  env.parse(source)
64
58
  end
data/sig/liquid2.rbs CHANGED
@@ -82,10 +82,76 @@ module Liquid2
82
82
 
83
83
  @globals: Hash[String, untyped]?
84
84
 
85
- @scanner: StringScanner
85
+ @scanner: singleton(Scanner)
86
+
87
+ @parser: singleton(Parser)
88
+
89
+ @string_scanner: StringScanner
86
90
 
87
91
  @arithmetic_operators: bool
88
92
 
93
+ # The string of characters that indicate the start of a Liquid output statement.
94
+ @markup_out_start: String
95
+
96
+ # The string of characters that indicate the end of a Liquid output statement.
97
+ @markup_out_end: String
98
+
99
+ # The string of characters that indicate the start of a Liquid tag.
100
+ @markup_tag_start: String
101
+
102
+ # The string of characters that indicate the end of a Liquid tag.
103
+ @markup_tag_end: String
104
+
105
+ # The string of characters that indicate the start of a Liquid comment. This should
106
+ # include a single trailing `#`. Additional, variable length hashes will be handled
107
+ # by the tokenizer. It is not possible to change comment syntax to not use `#`.
108
+ @markup_comment_prefix: String
109
+
110
+ # The string of characters that indicate the end of a Liquid comment, excluding any
111
+ # hashes.
112
+ @markup_comment_suffix: String
113
+
114
+ # A regex pattern matching Liquid tag names. Should include `#` for inline comments.
115
+ @re_tag_name: Regexp
116
+
117
+ @re_word: Regexp
118
+
119
+ @re_int: Regexp
120
+
121
+ @re_float: Regexp
122
+
123
+ @re_double_quote_string_special: Regexp
124
+
125
+ @re_single_quote_string_special: Regexp
126
+
127
+ # A regex pattern matching the start of some Liquid markup. Could be the start of an
128
+ # output statement, tag or comment. Traditionally `{{`, `{%` and `{#`, respectively.
129
+ @re_markup_start: Regexp
130
+
131
+ # A regex pattern matching the end of some Liquid markup. Could be the end of
132
+ # an output statement or tag. Traditionally `}}`, `%}`, respectively.
133
+ # respectively.
134
+ @re_markup_end: Regexp
135
+
136
+ # A regex pattern matching any one of the possible characters ending some Liquid
137
+ # markup. This is used to detect incomplete and malformed markup and provide
138
+ # helpful error messages.
139
+ @re_markup_end_chars: Regexp
140
+
141
+ @re_up_to_markup_start: Regexp
142
+
143
+ @re_punctuation: Regexp
144
+
145
+ @re_up_to_inline_comment_end: Regexp
146
+
147
+ @re_up_to_raw_end: Regexp
148
+
149
+ @re_block_comment_chunk: Regexp
150
+
151
+ @re_up_to_doc_end: Regexp
152
+
153
+ @re_line_statement_comment: Regexp
154
+
89
155
  attr_reader tags: Hash[String, _Tag]
90
156
 
91
157
  attr_reader local_namespace_limit: Integer?
@@ -106,7 +172,51 @@ module Liquid2
106
172
 
107
173
  attr_reader arithmetic_operators: bool
108
174
 
109
- def initialize: (?context_depth_limit: ::Integer, ?globals: Hash[String, untyped]?, ?loader: TemplateLoader?, ?local_namespace_limit: Integer?, ?loop_iteration_limit: Integer?, ?output_stream_limit: Integer?, ?shorthand_indexes: bool, ?suppress_blank_control_flow_blocks: bool, ?undefined: singleton(Undefined), ?falsy_undefined: bool) -> void
175
+ attr_reader markup_comment_prefix: String
176
+
177
+ attr_reader markup_comment_suffix: String
178
+
179
+ attr_reader markup_out_end: String
180
+
181
+ attr_reader markup_out_start: String
182
+
183
+ attr_reader markup_tag_end: String
184
+
185
+ attr_reader markup_tag_start: String
186
+
187
+ attr_reader re_tag_name: Regexp
188
+
189
+ attr_reader re_word: Regexp
190
+
191
+ attr_reader re_int: Regexp
192
+
193
+ attr_reader re_float: Regexp
194
+
195
+ attr_reader re_double_quote_string_special: Regexp
196
+
197
+ attr_reader re_single_quote_string_special: Regexp
198
+
199
+ attr_reader re_markup_start: Regexp
200
+
201
+ attr_reader re_markup_end: Regexp
202
+
203
+ attr_reader re_markup_end_chars: Regexp
204
+
205
+ attr_reader re_up_to_markup_start: Regexp
206
+
207
+ attr_reader re_punctuation: Regexp
208
+
209
+ attr_reader re_up_to_inline_comment_end: Regexp
210
+
211
+ attr_reader re_up_to_raw_end: Regexp
212
+
213
+ attr_reader re_block_comment_chunk: Regexp
214
+
215
+ attr_reader re_up_to_doc_end: Regexp
216
+
217
+ attr_reader re_line_statement_comment: Regexp
218
+
219
+ def initialize: (?arithmetic_operators: bool, ?context_depth_limit: ::Integer, ?falsy_undefined: bool, ?globals: untyped?, ?loader: TemplateLoader?, ?local_namespace_limit: Integer?, ?loop_iteration_limit: Integer?, ?markup_comment_prefix: ::String, ?markup_comment_suffix: ::String, ?markup_out_end: ::String, ?markup_out_start: ::String, ?markup_tag_end: ::String, ?markup_tag_start: ::String, ?output_stream_limit: Integer?, ?parser: singleton(Parser), ?scanner: singleton(Scanner), ?shorthand_indexes: bool, ?suppress_blank_control_flow_blocks: bool, ?undefined: singleton(Undefined)) -> void
110
220
 
111
221
  # @param source [String] template source text.
112
222
  # @return [Template]
@@ -136,6 +246,8 @@ module Liquid2
136
246
  def delete_tag: (String name) -> (_Tag | nil)
137
247
 
138
248
  def setup_tags_and_filters: () -> void
249
+
250
+ def setup_scanner: () -> void
139
251
 
140
252
  def undefined: (String name, ?node: _HasToken?) -> Undefined
141
253
 
@@ -171,37 +283,59 @@ module Liquid2
171
283
  # A pointer to the start of the current token.
172
284
  @start: Integer
173
285
 
174
- # Tokens are arrays of (kind, value, start index)
175
- @tokens: Array[[Symbol, String?, Integer]]
286
+ @s_out_start: String
176
287
 
177
- attr_reader tokens: Array[[Symbol, String?, Integer]]
288
+ @s_out_end: String
289
+
290
+ @s_tag_start: String
291
+
292
+ @s_tag_end: String
293
+
294
+ @s_comment_prefix: String
295
+
296
+ @s_comment_suffix: String
297
+
298
+ @re_tag_name: Regexp
299
+
300
+ @re_word: Regexp
178
301
 
179
- RE_MARKUP_START: ::Regexp
302
+ @re_int: Regexp
180
303
 
181
- RE_WHITESPACE: ::Regexp
304
+ @re_float: Regexp
182
305
 
183
- RE_LINE_SPACE: ::Regexp
306
+ @re_double_quote_string_special: Regexp
184
307
 
185
- RE_WORD: ::Regexp
308
+ @re_single_quote_string_special: Regexp
186
309
 
187
- RE_INT: ::Regexp
310
+ @re_markup_start: Regexp
188
311
 
189
- RE_FLOAT: ::Regexp
312
+ @re_markup_end: Regexp
190
313
 
191
- RE_PUNCTUATION: ::Regexp
314
+ @re_markup_end_chars: Regexp
192
315
 
193
- RE_SINGLE_QUOTE_STRING_SPECIAL: ::Regexp
316
+ @re_up_to_markup_start: Regexp
194
317
 
195
- RE_DOUBLE_QUOTE_STRING_SPECIAL: ::Regexp
318
+ @re_punctuation: Regexp
319
+
320
+ @re_up_to_inline_comment_end: Regexp
321
+ @re_up_to_raw_end: Regexp
322
+ @re_block_comment_chunk: Regexp
323
+ @re_up_to_doc_end: Regexp
324
+ @re_line_statement_comment: Regexp
325
+
326
+ # Tokens are arrays of (kind, value, start index)
327
+ @tokens: Array[[Symbol, String?, Integer]]
328
+
329
+ attr_reader tokens: Array[[Symbol, String?, Integer]]
196
330
 
197
331
  # Keywords and symbols that get their own token kind.
198
332
  TOKEN_MAP: Hash[String, Symbol]
199
333
 
200
- def self.tokenize: (String source, StringScanner scanner) -> Array[[Symbol, String?, Integer]]
334
+ def self.tokenize: (Environment env, String source, StringScanner scanner) -> Array[[Symbol, String?, Integer]]
201
335
 
202
336
  # @param source [String]
203
337
  # @param scanner [StringScanner]
204
- def initialize: (String source, StringScanner scanner) -> void
338
+ def initialize: (Environment env, String source, StringScanner scanner) -> void
205
339
 
206
340
  def run: () -> void
207
341
 
@@ -1760,6 +1894,8 @@ module Liquid2
1760
1894
 
1761
1895
  # Return the subsequence of _left_ starting at _start_ up to _length_.
1762
1896
  def self.slice: (untyped left, untyped start, ?untyped length) -> untyped
1897
+
1898
+ def self.better_slice: (untyped left, ?untyped start_, ?untyped stop_, ?untyped step_, ?start: untyped, ?stop: untyped, ?step: untyped) -> untyped
1763
1899
 
1764
1900
  # Return _left_ with all characters converted to uppercase.
1765
1901
  # Coerce _left_ to a string if it is not one already.
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: liquid2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Prior
metadata.gz.sig CHANGED
Binary file