liquid2 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,14 +12,6 @@ module Liquid2
12
12
  class Scanner
13
13
  attr_reader :tokens
14
14
 
15
- RE_LINE_SPACE = /[ \t]+/
16
- RE_WORD = /[\u0080-\uFFFFa-zA-Z_][\u0080-\uFFFFa-zA-Z0-9_-]*/
17
- RE_INT = /-?\d+(?:[eE]\+?\d+)?/
18
- RE_FLOAT = /((?:-?\d+\.\d+(?:[eE][+-]?\d+)?)|(-?\d+[eE]-\d+))/
19
- RE_PUNCTUATION = /\?|\[|\]|\|{1,2}|\.{1,2}|,|:|\(|\)|[<>=!]+/
20
- RE_SINGLE_QUOTE_STRING_SPECIAL = /[\\'\$]/
21
- RE_DOUBLE_QUOTE_STRING_SPECIAL = /[\\"\$]/
22
-
23
15
  # Keywords and symbols that get their own token kind.
24
16
  TOKEN_MAP = {
25
17
  "true" => :token_true,
@@ -58,18 +50,26 @@ module Liquid2
58
50
  ">=" => :token_ge,
59
51
  "==" => :token_eq,
60
52
  "!=" => :token_ne,
61
- "=>" => :token_arrow
53
+ "=>" => :token_arrow,
54
+ "+" => :token_plus,
55
+ "-" => :token_minus,
56
+ "%" => :token_mod,
57
+ "*" => :token_times,
58
+ "/" => :token_divide,
59
+ "//" => :token_floor_div,
60
+ "**" => :token_pow
62
61
  }.freeze
63
62
 
64
- def self.tokenize(source, scanner)
65
- lexer = new(source, scanner)
63
+ def self.tokenize(env, source, scanner)
64
+ lexer = new(env, source, scanner)
66
65
  lexer.run
67
66
  lexer.tokens
68
67
  end
69
68
 
69
+ # @param env [Environment]
70
70
  # @param source [String]
71
71
  # @param scanner [StringScanner]
72
- def initialize(source, scanner)
72
+ def initialize(env, source, scanner)
73
73
  @source = source
74
74
  @scanner = scanner
75
75
  @scanner.string = @source
@@ -77,8 +77,33 @@ module Liquid2
77
77
  # A pointer to the start of the current token.
78
78
  @start = 0
79
79
 
80
- # Tokens are arrays of (kind, value, start index)
80
+ # Tokens are arrays of (kind, value, start index).
81
+ # Sometimes we set value to `nil` when the symbol is unambiguous.
81
82
  @tokens = [] # : Array[[Symbol, String?, Integer]]
83
+
84
+ @s_out_start = env.markup_out_start
85
+ @s_out_end = env.markup_out_end
86
+ @s_tag_start = env.markup_tag_start
87
+ @s_tag_end = env.markup_tag_end
88
+ @s_comment_prefix = env.markup_comment_prefix
89
+ @s_comment_suffix = env.markup_comment_suffix
90
+
91
+ @re_tag_name = env.re_tag_name
92
+ @re_word = env.re_word
93
+ @re_int = env.re_int
94
+ @re_float = env.re_float
95
+ @re_double_quote_string_special = env.re_double_quote_string_special
96
+ @re_single_quote_string_special = env.re_single_quote_string_special
97
+ @re_markup_start = env.re_markup_start
98
+ @re_markup_end = env.re_markup_end
99
+ @re_markup_end_chars = env.re_markup_end_chars
100
+ @re_up_to_markup_start = env.re_up_to_markup_start
101
+ @re_punctuation = env.re_punctuation
102
+ @re_up_to_inline_comment_end = env.re_up_to_inline_comment_end
103
+ @re_up_to_raw_end = env.re_up_to_raw_end
104
+ @re_block_comment_chunk = env.re_block_comment_chunk
105
+ @re_up_to_doc_end = env.re_up_to_doc_end
106
+ @re_line_statement_comment = env.re_line_statement_comment
82
107
  end
83
108
 
84
109
  def run
@@ -101,14 +126,13 @@ module Liquid2
101
126
  end
102
127
 
103
128
  def skip_line_trivia
104
- @start = @scanner.pos if @scanner.skip(RE_LINE_SPACE)
129
+ @start = @scanner.pos if @scanner.skip(/[ \t]+/)
105
130
  end
106
131
 
107
132
  def accept_whitespace_control
108
133
  ch = @scanner.peek(1)
109
134
 
110
- case ch
111
- when "-", "+", "~"
135
+ if ch == "-" || ch == "+" || ch == "~" # rubocop: disable Style/MultipleComparison
112
136
  @scanner.pos += 1
113
137
  @tokens << [:token_whitespace_control, ch, @start]
114
138
  @start = @scanner.pos
@@ -119,22 +143,22 @@ module Liquid2
119
143
  end
120
144
 
121
145
  def lex_markup
122
- case @scanner.scan(/\{[\{%#]/)
123
- when "{#"
146
+ case @scanner.scan(@re_markup_start)
147
+ when @s_comment_prefix
124
148
  :lex_comment
125
- when "{{"
149
+ when @s_out_start
126
150
  @tokens << [:token_output_start, nil, @start]
127
151
  @start = @scanner.pos
128
152
  accept_whitespace_control
129
153
  skip_trivia
130
154
  :lex_expression
131
- when "{%"
155
+ when @s_tag_start
132
156
  @tokens << [:token_tag_start, nil, @start]
133
157
  @start = @scanner.pos
134
158
  accept_whitespace_control
135
159
  skip_trivia
136
160
 
137
- if (tag_name = @scanner.scan(/(?:[a-z][a-z_0-9]*|#)/))
161
+ if (tag_name = @scanner.scan(@re_tag_name))
138
162
  @tokens << [:token_tag_name, tag_name, @start]
139
163
  @start = @scanner.pos
140
164
 
@@ -166,8 +190,7 @@ module Liquid2
166
190
  :lex_expression
167
191
  end
168
192
  else
169
- if @scanner.skip_until(/\{[\{%#]/)
170
- @scanner.pos -= 2
193
+ if @scanner.skip_until(@re_up_to_markup_start)
171
194
  @tokens << [:token_other, @source.byteslice(@start...@scanner.pos), @start]
172
195
  @start = @scanner.pos
173
196
  :lex_markup
@@ -185,26 +208,27 @@ module Liquid2
185
208
  def lex_expression
186
209
  loop do
187
210
  skip_trivia
188
- if (value = @scanner.scan(RE_FLOAT))
211
+ if (value = @scanner.scan(@re_float))
189
212
  @tokens << [:token_float, value, @start]
190
213
  @start = @scanner.pos
191
- elsif (value = @scanner.scan(RE_INT))
214
+ elsif (value = @scanner.scan(@re_int))
192
215
  @tokens << [:token_int, value, @start]
193
216
  @start = @scanner.pos
194
- elsif (value = @scanner.scan(RE_PUNCTUATION))
217
+ elsif (value = @scanner.scan(@re_punctuation))
195
218
  @tokens << [TOKEN_MAP[value] || :token_unknown, value, @start]
196
219
  @start = @scanner.pos
197
- elsif (value = @scanner.scan(RE_WORD))
220
+ elsif (value = @scanner.scan(@re_word))
198
221
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
199
222
  @start = @scanner.pos
200
223
  else
201
224
  case @scanner.get_byte
202
225
  when "'"
203
226
  @start = @scanner.pos
204
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
227
+ scan_string("'", :token_single_quote_string, @re_single_quote_string_special)
205
228
  when "\""
206
229
  @start = @scanner.pos
207
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
230
+ scan_string("\"", :token_double_quote_string,
231
+ @re_double_quote_string_special)
208
232
  else
209
233
  @scanner.pos -= 1
210
234
  break
@@ -215,17 +239,17 @@ module Liquid2
215
239
  accept_whitespace_control
216
240
 
217
241
  # Miro benchmarks show no performance gain using scan_byte and peek_byte over scan here.
218
- case @scanner.scan(/[\}%]\}/)
219
- when "}}"
242
+ case @scanner.scan(@re_markup_end)
243
+ when @s_out_end
220
244
  @tokens << [:token_output_end, nil, @start]
221
- when "%}"
245
+ when @s_tag_end
222
246
  @tokens << [:token_tag_end, nil, @start]
223
247
  else
224
248
  # Unexpected token
225
249
  return nil if @scanner.eos?
226
250
 
227
- if (ch = @scanner.scan(/[\}%]/))
228
- raise LiquidSyntaxError.new("missing \"}\" or \"%\" detected",
251
+ if (ch = @scanner.scan(@re_markup_end_chars))
252
+ raise LiquidSyntaxError.new("missing markup delimiter detected",
229
253
  [:token_unknown, ch, @start])
230
254
  end
231
255
 
@@ -248,8 +272,7 @@ module Liquid2
248
272
 
249
273
  wc = accept_whitespace_control
250
274
 
251
- if @scanner.skip_until(/([+\-~]?)(\#{#{hash_count}}\})/)
252
- @scanner.pos -= @scanner[0]&.length || 0
275
+ if @scanner.skip_until(/(?=([+\-~]?)(\#{#{hash_count}}#{Regexp.escape(@s_comment_suffix)}))/)
253
276
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
254
277
  @start = @scanner.pos
255
278
 
@@ -275,18 +298,17 @@ module Liquid2
275
298
  end
276
299
 
277
300
  def lex_inside_inline_comment
278
- if @scanner.skip_until(/([+\-~])?%\}/)
279
- @scanner.pos -= @scanner.captures&.first.nil? ? 2 : 3
301
+ if @scanner.skip_until(@re_up_to_inline_comment_end)
280
302
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
281
303
  @start = @scanner.pos
282
304
  end
283
305
 
284
306
  accept_whitespace_control
285
307
 
286
- case @scanner.scan(/[\}%]\}/)
287
- when "}}"
308
+ case @scanner.scan(@re_markup_end)
309
+ when @s_out_end
288
310
  @tokens << [:token_output_end, nil, @start]
289
- when "%}"
311
+ when @s_tag_end
290
312
  @tokens << [:token_tag_end, nil, @start]
291
313
  else
292
314
  # Unexpected token
@@ -303,17 +325,16 @@ module Liquid2
303
325
  skip_trivia
304
326
  accept_whitespace_control
305
327
 
306
- case @scanner.scan(/[\}%]\}/)
307
- when "}}"
328
+ case @scanner.scan(@re_markup_end)
329
+ when @s_out_end
308
330
  @tokens << [:token_output_end, nil, @start]
309
331
  @start = @scanner.pos
310
- when "%}"
332
+ when @s_tag_end
311
333
  @tokens << [:token_tag_end, nil, @start]
312
334
  @start = @scanner.pos
313
335
  end
314
336
 
315
- if @scanner.skip_until(/(\{%[+\-~]?\s*endraw\s*[+\-~]?%\})/)
316
- @scanner.pos -= @scanner.captures&.first&.length || raise
337
+ if @scanner.skip_until(@re_up_to_raw_end)
317
338
  @tokens << [:token_raw, @source.byteslice(@start...@scanner.pos), @start]
318
339
  @start = @scanner.pos
319
340
  end
@@ -325,11 +346,11 @@ module Liquid2
325
346
  skip_trivia
326
347
  accept_whitespace_control
327
348
 
328
- case @scanner.scan(/[\}%]\}/)
329
- when "}}"
349
+ case @scanner.scan(@re_markup_end)
350
+ when @s_out_end
330
351
  @tokens << [:token_output_end, nil, @start]
331
352
  @start = @scanner.pos
332
- when "%}"
353
+ when @s_tag_end
333
354
  @tokens << [:token_tag_end, nil, @start]
334
355
  @start = @scanner.pos
335
356
  end
@@ -338,9 +359,7 @@ module Liquid2
338
359
  raw_depth = 0
339
360
 
340
361
  loop do
341
- unless @scanner.skip_until(/(\{%[+\-~]?\s*(comment|raw|endcomment|endraw)\s*[+\-~]?%\})/)
342
- break
343
- end
362
+ break unless @scanner.skip_until(@re_block_comment_chunk)
344
363
 
345
364
  tag_name = @scanner.captures&.last || raise
346
365
 
@@ -373,17 +392,16 @@ module Liquid2
373
392
  skip_trivia
374
393
  accept_whitespace_control
375
394
 
376
- case @scanner.scan(/[\}%]\}/)
377
- when "}}"
395
+ case @scanner.scan(@re_markup_end)
396
+ when @s_out_end
378
397
  @tokens << [:token_output_end, nil, @start]
379
398
  @start = @scanner.pos
380
- when "%}"
399
+ when @s_tag_end
381
400
  @tokens << [:token_tag_end, nil, @start]
382
401
  @start = @scanner.pos
383
402
  end
384
403
 
385
- if @scanner.skip_until(/(\{%[+\-~]?\s*enddoc\s*[+\-~]?%\})/)
386
- @scanner.pos -= @scanner.captures&.first&.length || raise
404
+ if @scanner.skip_until(@re_up_to_doc_end)
387
405
  @tokens << [:token_doc, @source.byteslice(@start...@scanner.pos), @start]
388
406
  @start = @scanner.pos
389
407
  end
@@ -394,21 +412,19 @@ module Liquid2
394
412
  def lex_line_statements
395
413
  skip_trivia # Leading newlines are OK
396
414
 
397
- if (tag_name = @scanner.scan(/(?:[a-z][a-z_0-9]*|#)/))
415
+ if (tag_name = @scanner.scan(@re_tag_name))
398
416
  @tokens << [:token_tag_start, nil, @start]
399
417
  @tokens << [:token_tag_name, tag_name, @start]
400
418
  @start = @scanner.pos
401
419
 
402
- if tag_name == "#" && @scanner.scan_until(/([\r\n]+|-?%\})/)
403
- @scanner.pos -= @scanner.captures&.first&.length || raise
420
+ if tag_name == "#" && @scanner.scan_until(@re_line_statement_comment)
404
421
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
405
422
  @start = @scanner.pos
406
423
  @tokens << [:token_tag_end, nil, @start]
407
424
  :lex_line_statements
408
425
 
409
- elsif tag_name == "comment" && @scanner.scan_until(/(endcomment)/)
426
+ elsif tag_name == "comment" && @scanner.scan_until(/(?=endcomment)/)
410
427
  @tokens << [:token_tag_end, nil, @start]
411
- @scanner.pos -= @scanner.captures&.first&.length || raise
412
428
  @tokens << [:token_comment, @source.byteslice(@start...@scanner.pos), @start]
413
429
  @start = @scanner.pos
414
430
  :lex_line_statements
@@ -417,11 +433,11 @@ module Liquid2
417
433
  end
418
434
  else
419
435
  accept_whitespace_control
420
- case @scanner.scan(/[\}%]\}/)
421
- when "}}"
436
+ case @scanner.scan(@re_markup_end)
437
+ when @s_out_end
422
438
  @tokens << [:token_output_end, nil, @start]
423
439
  @start = @scanner.pos
424
- when "%}"
440
+ when @s_tag_end
425
441
  @tokens << [:token_tag_end, nil, @start]
426
442
  @start = @scanner.pos
427
443
  end
@@ -437,26 +453,26 @@ module Liquid2
437
453
  case @scanner.get_byte
438
454
  when "'"
439
455
  @start = @scanner.pos
440
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
456
+ scan_string("'", :token_single_quote_string, @re_single_quote_string_special)
441
457
  when "\""
442
458
  @start = @scanner.pos
443
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
459
+ scan_string("\"", :token_double_quote_string, @re_double_quote_string_special)
444
460
  when nil
445
461
  # End of scanner. Unclosed expression or string literal.
446
462
  break
447
463
 
448
464
  else
449
465
  @scanner.pos -= 1
450
- if (value = @scanner.scan(RE_FLOAT))
466
+ if (value = @scanner.scan(@re_float))
451
467
  @tokens << [:token_float, value, @start]
452
468
  @start = @scanner.pos
453
- elsif (value = @scanner.scan(RE_INT))
469
+ elsif (value = @scanner.scan(@re_int))
454
470
  @tokens << [:token_int, value, @start]
455
471
  @start = @scanner.pos
456
- elsif (value = @scanner.scan(RE_PUNCTUATION))
472
+ elsif (value = @scanner.scan(@re_punctuation))
457
473
  @tokens << [TOKEN_MAP[value] || raise, nil, @start]
458
474
  @start = @scanner.pos
459
- elsif (value = @scanner.scan(RE_WORD))
475
+ elsif (value = @scanner.scan(@re_word))
460
476
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
461
477
  @start = @scanner.pos
462
478
  elsif @scanner.scan(/(\r?\n)+/)
@@ -468,11 +484,11 @@ module Liquid2
468
484
  # End of the line statement and enclosing `liquid` tag.
469
485
  @tokens << [:token_tag_end, nil, @start]
470
486
  accept_whitespace_control
471
- case @scanner.scan(/[\}%]\}/)
472
- when "}}"
487
+ case @scanner.scan(@re_markup_end)
488
+ when @s_out_end
473
489
  @tokens << [:token_output_end, nil, @start]
474
490
  @start = @scanner.pos
475
- when "%}"
491
+ when @s_tag_end
476
492
  @tokens << [:token_tag_end, nil, @start]
477
493
  @start = @scanner.pos
478
494
  end
@@ -529,10 +545,12 @@ module Liquid2
529
545
  case @scanner.get_byte
530
546
  when "'"
531
547
  @start = @scanner.pos
532
- scan_string("'", :token_single_quote_string, RE_SINGLE_QUOTE_STRING_SPECIAL)
548
+ scan_string("'", :token_single_quote_string,
549
+ @re_single_quote_string_special)
533
550
  when "\""
534
551
  @start = @scanner.pos
535
- scan_string("\"", :token_double_quote_string, RE_DOUBLE_QUOTE_STRING_SPECIAL)
552
+ scan_string("\"", :token_double_quote_string,
553
+ @re_double_quote_string_special)
536
554
  when "}"
537
555
  @tokens << [:token_string_interpol_end, nil, @start]
538
556
  @start = @scanner.pos
@@ -543,16 +561,16 @@ module Liquid2
543
561
  [symbol, nil, start_of_string])
544
562
  else
545
563
  @scanner.pos -= 1
546
- if (value = @scanner.scan(RE_FLOAT))
564
+ if (value = @scanner.scan(@re_float))
547
565
  @tokens << [:token_float, value, @start]
548
566
  @start = @scanner.pos
549
- elsif (value = @scanner.scan(RE_INT))
567
+ elsif (value = @scanner.scan(@re_int))
550
568
  @tokens << [:token_int, value, @start]
551
569
  @start = @scanner.pos
552
- elsif (value = @scanner.scan(RE_PUNCTUATION))
570
+ elsif (value = @scanner.scan(@re_punctuation))
553
571
  @tokens << [TOKEN_MAP[value] || raise, nil, @start]
554
572
  @start = @scanner.pos
555
- elsif (value = @scanner.scan(RE_WORD))
573
+ elsif (value = @scanner.scan(@re_word))
556
574
  @tokens << [TOKEN_MAP[value] || :token_word, value, @start]
557
575
  @start = @scanner.pos
558
576
  else
@@ -28,6 +28,8 @@ module Liquid2
28
28
  def to_s = ""
29
29
  def to_i = 0
30
30
  def to_f = 0.0
31
+ def -@ = self
32
+ def +@ = self
31
33
  def each(...) = Enumerator.new {} # rubocop:disable Lint/EmptyBlock
32
34
  def each_with_index(...) = Enumerator.new {} # rubocop:disable Lint/EmptyBlock
33
35
  def join(...) = ""
@@ -102,6 +104,14 @@ module Liquid2
102
104
  raise UndefinedError.new(@message, @node.token)
103
105
  end
104
106
 
107
+ def +@
108
+ self
109
+ end
110
+
111
+ def -@
112
+ self
113
+ end
114
+
105
115
  def each(...)
106
116
  raise UndefinedError.new(@message, @node.token)
107
117
  end
@@ -115,7 +125,7 @@ module Liquid2
115
125
  end
116
126
 
117
127
  def to_liquid(_context)
118
- raise UndefinedError.new(@message, @node.token)
128
+ self
119
129
  end
120
130
 
121
131
  def poke
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Liquid2
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -48,17 +48,11 @@ env = fixture.env
48
48
  source = fixture.templates["index.liquid"]
49
49
  template = env.get_template("index.liquid")
50
50
 
51
- # scanner = StringScanner.new("")
52
-
53
51
  Benchmark.ips do |x|
54
52
  # Configure the number of seconds used during
55
53
  # the warmup phase (default 2) and calculation phase (default 5)
56
54
  x.config(warmup: 2, time: 5)
57
55
 
58
- # x.report("tokenize (#{fixture.name}):") do
59
- # Liquid2::Scanner.tokenize(source, scanner)
60
- # end
61
-
62
56
  x.report("parse (#{fixture.name}):") do
63
57
  env.parse(source)
64
58
  end