orb_template 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +5 -0
  3. data/CODE_OF_CONDUCT.md +132 -0
  4. data/LICENSE.txt +21 -0
  5. data/Makefile +45 -0
  6. data/README.md +429 -0
  7. data/Rakefile +15 -0
  8. data/lib/orb/ast/abstract_node.rb +27 -0
  9. data/lib/orb/ast/attribute.rb +51 -0
  10. data/lib/orb/ast/block_node.rb +26 -0
  11. data/lib/orb/ast/control_expression_node.rb +27 -0
  12. data/lib/orb/ast/newline_node.rb +22 -0
  13. data/lib/orb/ast/printing_expression_node.rb +29 -0
  14. data/lib/orb/ast/private_comment_node.rb +22 -0
  15. data/lib/orb/ast/public_comment_node.rb +22 -0
  16. data/lib/orb/ast/root_node.rb +11 -0
  17. data/lib/orb/ast/tag_node.rb +208 -0
  18. data/lib/orb/ast/text_node.rb +22 -0
  19. data/lib/orb/ast.rb +19 -0
  20. data/lib/orb/document.rb +19 -0
  21. data/lib/orb/errors.rb +40 -0
  22. data/lib/orb/parser.rb +182 -0
  23. data/lib/orb/patterns.rb +40 -0
  24. data/lib/orb/rails_derp.rb +138 -0
  25. data/lib/orb/rails_template.rb +101 -0
  26. data/lib/orb/railtie.rb +9 -0
  27. data/lib/orb/render_context.rb +36 -0
  28. data/lib/orb/template.rb +72 -0
  29. data/lib/orb/temple/attributes_compiler.rb +114 -0
  30. data/lib/orb/temple/compiler.rb +204 -0
  31. data/lib/orb/temple/engine.rb +40 -0
  32. data/lib/orb/temple/filters.rb +132 -0
  33. data/lib/orb/temple/generators.rb +108 -0
  34. data/lib/orb/temple/identity.rb +16 -0
  35. data/lib/orb/temple/parser.rb +46 -0
  36. data/lib/orb/temple.rb +16 -0
  37. data/lib/orb/token.rb +47 -0
  38. data/lib/orb/tokenizer.rb +757 -0
  39. data/lib/orb/tokenizer2.rb +591 -0
  40. data/lib/orb/utils/erb.rb +40 -0
  41. data/lib/orb/utils/orb.rb +12 -0
  42. data/lib/orb/version.rb +5 -0
  43. data/lib/orb.rb +50 -0
  44. metadata +89 -0
@@ -0,0 +1,757 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module ORB
6
+ class Tokenizer
7
+ attr_reader :tokens, :errors
8
+
9
+ SPACE_CHARS = [" ", "\s", "\t", "\r", "\n", "\f"].freeze
10
+ NAME_STOP_CHARS = SPACE_CHARS + [">", "/", "="]
11
+ UNQUOTED_VALUE_INVALID_CHARS = ['"', "'", "=", "<", "`"].freeze
12
+ UNQUOTED_VALUE_STOP_CHARS = SPACE_CHARS + [">"]
13
+ BLOCK_NAME_STOP_CHARS = SPACE_CHARS + ["}"]
14
+ START_TAG_START = "<"
15
+ START_TAG_END = ">"
16
+ START_TAG_END_SELF_CLOSING = "/>"
17
+ END_TAG_START = "</"
18
+ END_TAG_END = ">"
19
+ COMMENT_START = "<!--"
20
+ COMMENT_END = "-->"
21
+ PCOMMENT_START = "{!--"
22
+ PCOMMENT_END = "--}"
23
+ PEXPRESSION_START = "{{"
24
+ PEXPRESSION_END = "}}"
25
+ NPEXPRESSION_START = "{%"
26
+ NPEXPRESSION_END = "%}"
27
+ START_BLOCK_START = "{#"
28
+ START_BLOCK_END = "}"
29
+ END_BLOCK_START = "{/"
30
+ END_BLOCK_END = "}"
31
+ ERB_START = "<%"
32
+ ERB_END = "%>"
33
+ ATTRIBUTE_ASSIGN = "="
34
+ SINGLE_QUOTE = "'"
35
+ DOUBLE_QUOTE = '"'
36
+ BRACE_OPEN = "{"
37
+ BRACE_CLOSE = "}"
38
+ CR = "\r"
39
+ NL = "\n"
40
+ CRLF = "\r\n"
41
+
42
+ IGNORED_BODY_TAGS = %w[script style].freeze
43
+ VOID_ELEMENTS = %w[area base br col command embed hr img input keygen link meta param source track wbr].freeze
44
+
45
+ # For error messages
46
+ HUMAN_READABLE_STATE_NAMES = {
47
+ initial: "Input",
48
+ comment: "Comment",
49
+ pcomment: "ORB Comment",
50
+ tag_open: "Tag",
51
+ tag_close: "Closing Tag",
52
+ tag_name: "Tag Name",
53
+ maybe_tag_open_end: "Tag",
54
+ maybe_tag_close_end: "Closing Tag",
55
+ tag_attribute: "Attribute",
56
+ attribute_maybe_value: "Attribute Value",
57
+ attribute_value_begin: "Attribute Value",
58
+ attribute_value_double_quote: "Attribute Value",
59
+ attribute_value_single_quote: "Attribute Value",
60
+ attribute_value_expression: "Attribute Value",
61
+ block_open: "Block",
62
+ maybe_block_end: "Block",
63
+ block_close: "Block",
64
+ pexpression: "Expression",
65
+ npexpression: "Expression",
66
+ erb_expression: "Expression",
67
+ }.freeze
68
+
69
+ def initialize(source, opts = {})
70
+ @source = source
71
+ @tokens = []
72
+ @errors = []
73
+
74
+ # Options
75
+ @file = opts.fetch(:file, :nofile)
76
+ @line = opts.fetch(:line, 1)
77
+ @column = opts.fetch(:column, 1)
78
+ @indentation = opts.fetch(:indentation, 0)
79
+ @raise_errors = opts.fetch(:raise_errors, false)
80
+
81
+ # State
82
+ @cursor = 0
83
+ @buffer = StringIO.new
84
+ @current_line = @line
85
+ @current_column = @column
86
+ @column_offset = @indentation + 1
87
+ @embedded_expression = false
88
+ clear_braces
89
+ clear_attributes
90
+ transition_to(:initial)
91
+ end
92
+
93
+ # Main entry point, and only public method. Tokenize the source string and return the tokens.
94
+ # If any errors are encountered during tokenization, this method will raise the first error.
95
+ def tokenize!
96
+ next_token while @cursor < @source.length
97
+
98
+ # Write out any remaining text in the buffer
99
+ text = consume_buffer
100
+ @tokens << Token.new(:text, text) unless text.strip.empty?
101
+
102
+ # Run checks to ensure the tokenizer state is valid, report any errors
103
+ check_tokenizer_state
104
+ check_for_unclosed_blocks_or_tags
105
+
106
+ @tokens
107
+ end
108
+
109
+ alias_method :tokenize, :tokenize!
110
+
111
+ private
112
+
113
+ # -------------------------------------------------------------------------
114
+ # Dispatch
115
+ # -------------------------------------------------------------------------
116
+
117
+ # Transitions to the appropriate tokenization method based on the current state
118
+ # or terminates the tokenizer if an invalid state is reached.
119
+ #
120
+ # rubocop:disable Metrics/CyclomaticComplexity
121
+ def next_token
122
+ debug "STATE: #{@state}"
123
+
124
+ case @state
125
+ when :initial
126
+ tokenize_text
127
+ when :public_comment
128
+ tokenize_comment
129
+ when :pcomment
130
+ tokenize_pcomment
131
+ when :tag_open
132
+ tokenize_tag_open
133
+ when :tag_close
134
+ tokenize_tag_close
135
+ when :tag_name
136
+ tokenize_tag_name
137
+ when :maybe_tag_open_end
138
+ tokenize_maybe_tag_open_end
139
+ when :maybe_tag_close_end
140
+ tokenize_maybe_tag_close_end
141
+ when :tag_attribute
142
+ tokenize_tag_attribute
143
+ when :attribute_maybe_value
144
+ tokenize_attribute_maybe_value
145
+ when :attribute_value_begin
146
+ tokenize_attribute_value_begin
147
+ when :attribute_value_double_quote
148
+ tokenize_attribute_value_double_quote
149
+ when :attribute_value_single_quote
150
+ tokenize_attribute_value_single_quote
151
+ when :attribute_value_expression
152
+ tokenize_attribute_value_expression
153
+ when :block_open
154
+ tokenize_block_open
155
+ when :maybe_block_end
156
+ tokenize_maybe_block_end
157
+ when :block_close
158
+ tokenize_block_close
159
+ when :pexpression
160
+ tokenize_pexpression
161
+ when :npexpression
162
+ tokenize_npexpression
163
+ when :erb
164
+ tokenize_erb
165
+ else
166
+ terminate
167
+ end
168
+ end
169
+ # rubocop:enable Metrics/CyclomaticComplexity
170
+
171
+ # -------------------------------------------------------------------------
172
+ # Initial State
173
+ # -------------------------------------------------------------------------
174
+
175
+ # This is the main state transition method, invoked when the tokenizer is in the :initial state.
176
+ # In this state, we either transition to a specific token state based on the text lookahead,
177
+ # or we consume the next character (appending it to the buffer), and stay in the :initial state.
178
+ def tokenize_text
179
+ text = @source[@cursor..]
180
+
181
+ if text.start_with?(CRLF)
182
+ consume(CRLF, newline: true)
183
+ elsif text.start_with?(NL)
184
+ consume(NL, newline: true)
185
+ elsif text.start_with?(COMMENT_START)
186
+ consume(COMMENT_START, skip: true)
187
+ add_text_node_from_buffer_and_clear
188
+ transition_to(:public_comment)
189
+ elsif text.start_with?(PCOMMENT_START)
190
+ consume(PCOMMENT_START, skip: true)
191
+ add_text_node_from_buffer_and_clear
192
+ transition_to(:pcomment)
193
+ elsif text.start_with?(END_TAG_START)
194
+ consume(END_TAG_START, skip: true)
195
+ add_text_node_from_buffer_and_clear
196
+ transition_to(:tag_close)
197
+ elsif text.start_with?(START_TAG_START)
198
+ consume(START_TAG_START, skip: true)
199
+ add_text_node_from_buffer_and_clear
200
+ transition_to(:tag_open)
201
+ elsif text.start_with?(START_BLOCK_START)
202
+ consume(START_BLOCK_START, skip: true)
203
+ add_text_node_from_buffer_and_clear
204
+ transition_to(:block_open)
205
+ elsif text.start_with?(END_BLOCK_START)
206
+ consume(END_BLOCK_START, skip: true)
207
+ add_text_node_from_buffer_and_clear
208
+ transition_to(:block_close)
209
+ elsif text.start_with?(PEXPRESSION_START)
210
+ consume(PEXPRESSION_START, skip: true)
211
+ add_text_node_from_buffer_and_clear
212
+ transition_to(:pexpression)
213
+ elsif text.start_with?(NPEXPRESSION_START)
214
+ consume(NPEXPRESSION_START, skip: true)
215
+ add_text_node_from_buffer_and_clear
216
+ transition_to(:npexpression)
217
+ else
218
+ consume(text[0])
219
+ end
220
+ end
221
+
222
+ # -------------------------------------------------------------------------
223
+ # Comments
224
+ # -------------------------------------------------------------------------
225
+
226
+ # Public (regular HTML) comment
227
+ # In this state, we consume characters until the look-ahead sees the next COMMENT_END (`-->`).
228
+ # Whitespace characters are included in the comment.
229
+ def tokenize_comment
230
+ text = @source[@cursor..]
231
+ syntax_error!("Expected closing '-->'") if text.empty?
232
+
233
+ if text.start_with?(CRLF)
234
+ consume(CRLF, newline: true)
235
+ elsif text.start_with?(NL)
236
+ consume(NL, newline: true)
237
+ elsif text.start_with?(COMMENT_END)
238
+ consume(COMMENT_END, skip: true)
239
+ content = consume_buffer
240
+ @tokens << Token.new(:public_comment, content)
241
+ transition_to(:initial)
242
+ else
243
+ consume(text[0])
244
+ end
245
+ end
246
+
247
+ # Private (ORB) comment
248
+ # In this state, we consume characters until the look-ahead sees the next PCOMMENT_END (`--}`).
249
+ # Whitespace characters are included in the comment.
250
+ def tokenize_pcomment
251
+ text = @source[@cursor..]
252
+ syntax_error!("Expected closing '--}'") if text.empty?
253
+
254
+ if text.start_with?(CRLF)
255
+ consume(CRLF, newline: true)
256
+ elsif text.start_with?(NL)
257
+ consume(NL, newline: true)
258
+ elsif text.start_with?(PCOMMENT_END)
259
+ consume(PCOMMENT_END, skip: true)
260
+ content = consume_buffer
261
+ @tokens << Token.new(:private_comment, content)
262
+ transition_to(:initial)
263
+ else
264
+ consume(text[0])
265
+ end
266
+ end
267
+
268
+ # -------------------------------------------------------------------------
269
+ # Tags
270
+ # -------------------------------------------------------------------------
271
+
272
+ # The tokenizer look-ahead saw a START_TAG_START, landing us in this state.
273
+ def tokenize_tag_open
274
+ # Read the tag name from the input.
275
+ name = tokenize_tag_name
276
+
277
+ # Push a new :tag_open token on the @tokens stack
278
+ @tokens << Token.new(:tag_open, name)
279
+
280
+ # Advance the state to :maybe_tag_open_end
281
+ transition_to(:maybe_tag_open_end)
282
+ end
283
+
284
+ # The tokenizer look-ahead saw a END_TAG_START, landing us in this state.
285
+ def tokenize_tag_close
286
+ # Read the tag name from the input.
287
+ name = tokenize_tag_name
288
+
289
+ # Push a new :tag_close token on the @tokens stack
290
+ @tokens << Token.new(:tag_close, name)
291
+
292
+ # Advance the state to :maybe_tag_close_end
293
+ transition_to(:maybe_tag_close_end)
294
+ end
295
+
296
+ # Recurses to read the tag name from the source, character by character.
297
+ def tokenize_tag_name
298
+ text = @source[@cursor..]
299
+ syntax_error("Unexpected end of input: expected a tag name instead") if text.empty?
300
+
301
+ # We are finished reading the tag name, if we encounter a NAME_STOP_CHAR
302
+ # otherwise, we continue to consume characters and recurse.
303
+ if NAME_STOP_CHARS.include?(text[0])
304
+ consume_buffer
305
+
306
+ else
307
+ consume(text[0])
308
+ tokenize_tag_name
309
+ end
310
+ end
311
+
312
+ # In this state, we are tokenizing the tag definition until we reach the end of the tag.
313
+ # If the tag is self-closing, it will end with `/>`. Otherwise, it will end with `>`.
314
+ # While in this tokenization state, we skip any whitespace characters.
315
+ # Any character we encounter that is neither whitespace nor the end of the tag is considered
316
+ # an attribute and transitions the tokenizer to the `tag_attribute` state.
317
+ def tokenize_maybe_tag_open_end
318
+ text = @source[@cursor..]
319
+ syntax_error!("Unexpected end of input: did you miss a '>' or '/>'?") if text.empty?
320
+
321
+ if text.start_with?(CRLF)
322
+ consume(CRLF, newline: true, skip: true)
323
+ elsif text.start_with?(NL)
324
+ consume(NL, newline: true, skip: true)
325
+ elsif SPACE_CHARS.include?(text[0])
326
+ consume(text[0], skip: true)
327
+ elsif text.start_with?(START_TAG_END_SELF_CLOSING)
328
+ consume(START_TAG_END_SELF_CLOSING, skip: true)
329
+ current_token.set_meta(:self_closing, true)
330
+ current_token.set_meta(:attributes, @attributes)
331
+ clear_attributes
332
+ transition_to(:initial)
333
+ elsif text.start_with?(START_TAG_END)
334
+ consume(START_TAG_END, skip: true)
335
+ current_token.set_meta(:self_closing, VOID_ELEMENTS.include?(current_token.value))
336
+ current_token.set_meta(:attributes, @attributes)
337
+ clear_attributes
338
+ transition_to(:initial)
339
+ elsif text.start_with?(START_TAG_START)
340
+ syntax_error!("Unexpected start of new tag: did you miss a '>' or '/>'?")
341
+ else
342
+ transition_to(:tag_attribute)
343
+ end
344
+ end
345
+
346
+ # In this state, we're looking for the end of the closing tag, which must be `>`.
347
+ # If the next character is `>`, we transition to the `initial` state.
348
+ # Otherwise, we raise a parse error.
349
+ def tokenize_maybe_tag_close_end
350
+ text = @source[@cursor..]
351
+ if text.start_with?(END_TAG_END)
352
+ consume(END_TAG_END, skip: true)
353
+ transition_to(:initial)
354
+ else
355
+ syntax_error!("Syntax error: you must close a tag with '>'")
356
+ end
357
+ end
358
+
359
+ # -------------------------------------------------------------------------
360
+ # Tag Attributes
361
+ # -------------------------------------------------------------------------
362
+
363
+ # In this state, we begin the process of tokenizing an attribute.
364
+ # We start by reading the attribute name and assuming a value of 'true',
365
+ # which is the default for HTML5 attributes without an assigned value.
366
+ # After reading the attribute name, the tokenizer transitions to the
367
+ # `attribute_maybe_value` state.
368
+ def tokenize_tag_attribute
369
+ name = tokenize_tag_name
370
+ @attributes << [name, :bool, true]
371
+
372
+ transition_to(:attribute_maybe_value)
373
+ end
374
+
375
+ # In this state, we attempt to determine whether an attribute value is present.
376
+ # If an attribute value is present, the next character will be `=`. If it is not,
377
+ # we transition to the `maybe_tag_open_end` state.
378
+ # In case an attribute value is present, we transition to the `attribute_value_begin` state.
379
+ # As usual, we skip over any whitespace characters.
380
+ def tokenize_attribute_maybe_value
381
+ text = @source[@cursor..]
382
+ if text.start_with?(CRLF)
383
+ consume(CRLF, newline: true, skip: true)
384
+ elsif text.start_with?(NL)
385
+ consume(NL, newline: true, skip: true)
386
+ elsif SPACE_CHARS.include?(text[0])
387
+ consume(text[0], skip: true)
388
+ elsif text.start_with?(ATTRIBUTE_ASSIGN)
389
+ consume(ATTRIBUTE_ASSIGN, skip: true)
390
+ transition_to(:attribute_value_begin)
391
+ else
392
+ transition_to(:maybe_tag_open_end)
393
+ end
394
+ end
395
+
396
+ # Attribute Values
397
+
398
+ # In this state, we begin the process of tokenizing an attribute value, skipping any whitespace.
399
+ # The first character of the attribute value will determine the type of value we're dealing with.
400
+ # - if the first character is a double quote, we transition to the `attribute_value_double_quote` state.
401
+ # - if the first character is a single quote, we transition to the `attribute_value_single_quote` state.
402
+ # - if the first character is a `{`, we transition to the `attribute_value_expression` state.
403
+ #
404
+ # TODO: we do not support unquoted attribute values at the moment.
405
+ def tokenize_attribute_value_begin
406
+ text = @source[@cursor..]
407
+ if text.start_with?(CRLF)
408
+ consume(CRLF, newline: true, skip: true)
409
+ elsif text.start_with?(NL)
410
+ consume(NL, newline: true, skip: true)
411
+ elsif SPACE_CHARS.include?(text[0])
412
+ consume(text[0], skip: true)
413
+ elsif text.start_with?(DOUBLE_QUOTE)
414
+ consume(DOUBLE_QUOTE, skip: true)
415
+ transition_to(:attribute_value_double_quote)
416
+ elsif text.start_with?(SINGLE_QUOTE)
417
+ consume(SINGLE_QUOTE, skip: true)
418
+ transition_to(:attribute_value_single_quote)
419
+ elsif text.start_with?(BRACE_OPEN)
420
+ consume(BRACE_OPEN, skip: true)
421
+ expr = tokenize_attribute_value_expression
422
+ current_attribute[1] = :expr
423
+ current_attribute[2] = expr
424
+ transition_to(:maybe_tag_open_end)
425
+ else
426
+ syntax_error!("Unexpected character '#{text[0]}' in attribute value definition.")
427
+ end
428
+ end
429
+
430
+ # The attribute value is a dynamic expression, which is enclosed in `{}`.
431
+ # During this state, we consume characters until we reach the closing `}`.
432
+ # We keep track of the number of opening and closing braces on the @braces stack
433
+ # to ensure we have a balanced expression and don't exit too early.
434
+ def tokenize_attribute_value_expression
435
+ text = @source[@cursor..]
436
+ syntax_error!("Unexpected end of input: expected closing `}`") if text.empty?
437
+
438
+ if text.start_with?(CRLF)
439
+ consume(CRLF, newline: true)
440
+ tokenize_attribute_value_expression
441
+ elsif text.start_with?(NL)
442
+ consume(NL, newline: true)
443
+ tokenize_attribute_value_expression
444
+ elsif text.start_with?(BRACE_CLOSE) && @braces.empty?
445
+ consume(BRACE_CLOSE, skip: true)
446
+ expr = consume_buffer
447
+ clear_braces
448
+ expr
449
+ elsif text.start_with?(BRACE_CLOSE)
450
+ consume(BRACE_CLOSE)
451
+ @braces.pop
452
+ tokenize_attribute_value_expression
453
+ elsif text.start_with?(BRACE_OPEN)
454
+ consume(BRACE_OPEN)
455
+ @braces << BRACE_OPEN
456
+ tokenize_attribute_value_expression
457
+ else
458
+ consume(text[0])
459
+ tokenize_attribute_value_expression
460
+ end
461
+ end
462
+
463
+ # The attribute value is enclosed in double quotes ("").
464
+ # If we encounter double curly braces `{{`, we set the `@embedded_expression` flag to true
465
+ # While this flag is set, we consume double quotes as regular characters.
466
+ # Encountering a closing double curly brace `}}` will clear the flag, and the next double quote
467
+ # will be treated as the end of the attribute value.
468
+ #
469
+ # TODO: currently, we ignore the embedded expression and treat it as a regular string.
470
+ # TODO: how should we handle escaped double quotes?
471
+ def tokenize_attribute_value_double_quote
472
+ text = @source[@cursor..]
473
+ syntax_error!("Unexpected end of input: expected closing `\"`") if text.empty?
474
+
475
+ if text.start_with?(CRLF)
476
+ consume(CRLF, newline: true, skip: true)
477
+ elsif text.start_with?(NL)
478
+ consume(NL, newline: true, skip: true)
479
+ elsif text.start_with?(PEXPRESSION_START)
480
+ consume(PEXPRESSION_START)
481
+ @embedded_expression = true
482
+ elsif text.start_with?(PEXPRESSION_END)
483
+ consume(PEXPRESSION_END)
484
+ @embedded_expression = false
485
+ elsif text.start_with?(DOUBLE_QUOTE) && @embedded_expression
486
+ consume(DOUBLE_QUOTE)
487
+ elsif text.start_with?(DOUBLE_QUOTE)
488
+ consume(DOUBLE_QUOTE, skip: true)
489
+ value = consume_buffer
490
+ current_attribute[1] = :str
491
+ current_attribute[2] = value
492
+ transition_to(:maybe_tag_open_end)
493
+ else
494
+ consume(text[0])
495
+ end
496
+ end
497
+
498
+ # The attribute value is enclosed in single quotes ('').
499
+ # If we encounter double curly braces `{{`, we set the `@embedded_expression` flag to true
500
+ # While this flag is set, we consume single quotes as regular characters.
501
+ # Encountering a closing double curly brace `}}` will clear the flag, and the next single quote
502
+ # will be treated as the end of the attribute value.
503
+ #
504
+ # TODO: currently, we ignore the embedded expression and treat it as a regular string.
505
+ # TODO: how should we handle escaped single quotes?
506
+ def tokenize_attribute_value_single_quote
507
+ text = @source[@cursor..]
508
+ syntax_error!("Parse error: expected closing `'`") if text.empty?
509
+
510
+ if text.start_with?(CRLF)
511
+ consume(CRLF, newline: true, skip: true)
512
+ elsif text.start_with?(NL)
513
+ consume(NL, newline: true, skip: true)
514
+ elsif text.start_with?(PEXPRESSION_START)
515
+ consume(PEXPRESSION_START)
516
+ @embedded_expression = true
517
+ elsif text.start_with?(PEXPRESSION_END)
518
+ consume(PEXPRESSION_END)
519
+ @embedded_expression = false
520
+ elsif text.start_with?(SINGLE_QUOTE) && @embedded_expression
521
+ consume(SINGLE_QUOTE)
522
+ elsif text.start_with?(SINGLE_QUOTE)
523
+ consume(SINGLE_QUOTE, skip: true)
524
+ value = consume_buffer
525
+ current_attribute[1] = :str
526
+ current_attribute[2] = value
527
+ transition_to(:maybe_tag_open_end)
528
+ else
529
+ consume(text[0])
530
+ end
531
+ end
532
+
533
+ # -------------------------------------------------------------------------
534
+ # Expressions
535
+ # -------------------------------------------------------------------------
536
+
537
+ # The lookahead in :initial state saw an opening double curly brace `{{`, landing us in this state.
538
+ # We consume characters until we reach the closing double curly brace `}}`.
539
+ # During this state, we keep track of the number of opening and closing braces on the @braces stack
540
+ # to ensure we have a balanced expression and don't exit too early.
541
+ def tokenize_pexpression
542
+ text = @source[@cursor..]
543
+ if text.start_with?(CRLF)
544
+ consume(CRLF, newline: true)
545
+ elsif text.start_with?(NL)
546
+ consume(NL, newline: true)
547
+ elsif text.start_with?(PEXPRESSION_END) && @braces.empty?
548
+ consume(PEXPRESSION_END, skip: true)
549
+ value = consume_buffer.strip
550
+ @tokens << Token.new(:printing_expression, value)
551
+ transition_to(:initial)
552
+ elsif text.start_with?(BRACE_CLOSE)
553
+ consume(BRACE_CLOSE)
554
+ @braces.pop
555
+ elsif text.start_with?(BRACE_OPEN)
556
+ consume(BRACE_OPEN)
557
+ @braces << BRACE_OPEN
558
+ else
559
+ consume(text[0])
560
+ end
561
+ end
562
+
563
+ # The lookahead in :initial state saw an opening curly brace and an percent `{%`, landing us in this state.
564
+ # We consume characters until we reach the closing percent and curly brace `%}`.
565
+ # During this state, we keep track of the number of opening and closing braces on the @braces stack
566
+ # to ensure we have a balanced expression and don't exit too early.
567
+ def tokenize_npexpression
568
+ text = @source[@cursor..]
569
+ if text.start_with?(CRLF)
570
+ consume(CRLF, newline: true)
571
+ elsif text.start_with?(NL)
572
+ consume(NL, newline: true)
573
+ elsif text.start_with?(NPEXPRESSION_END) && @braces.empty?
574
+ consume(NPEXPRESSION_END, skip: true)
575
+ value = consume_buffer.strip
576
+ @tokens << Token.new(:control_expression, value)
577
+ transition_to(:initial)
578
+ elsif text.start_with?(BRACE_CLOSE)
579
+ consume(BRACE_CLOSE)
580
+ @braces.pop
581
+ elsif text.start_with?(BRACE_OPEN)
582
+ consume(BRACE_OPEN)
583
+ @braces << BRACE_OPEN
584
+ else
585
+ consume(text[0])
586
+ end
587
+ end
588
+
589
+ # -------------------------------------------------------------------------
590
+ # Blocks
591
+ # -------------------------------------------------------------------------
592
+
593
+ # The lookahead in :initial state saw a block expression `{#`, landing us in this state.
594
+ def tokenize_block_open
595
+ block_name = tokenize_block_name
596
+ @tokens << Token.new(:block_open, block_name)
597
+ transition_to(:maybe_block_end)
598
+ end
599
+
600
+ # In this state, we consume characters until we reach the end of the block expression.
601
+ # we keep track of the number of opening and closing braces on the @braces stack
602
+ # to ensure we have a balanced expression and don't exit too early.
603
+ def tokenize_maybe_block_end
604
+ text = @source[@cursor..]
605
+ if text.start_with?(CRLF)
606
+ consume(CRLF, newline: true)
607
+ elsif text.start_with?(NL)
608
+ consume(NL, newline: true)
609
+ elsif text.start_with?(BRACE_CLOSE) && @braces.empty?
610
+ consume(BRACE_CLOSE, skip: true)
611
+ block_expr = consume_buffer.strip
612
+ current_token.set_meta(:expression, block_expr)
613
+ clear_braces
614
+ transition_to(:initial)
615
+ elsif text.start_with?(BRACE_CLOSE)
616
+ consume(BRACE_CLOSE)
617
+ @braces.pop
618
+ elsif text.start_with?(BRACE_OPEN)
619
+ consume(BRACE_OPEN)
620
+ braces << BRACE_OPEN
621
+ else
622
+ consume(text[0])
623
+ end
624
+ end
625
+
626
+ # The lookahead in :initial state saw a block end expression `{/`, landing us in this state.
627
+ # We first read the ending block name, then expect to see a closing `}`. Otherwise, we raise a parse error.
628
+ def tokenize_block_close
629
+ block_name = tokenize_block_name
630
+
631
+ text = @source[@cursor..]
632
+
633
+ if text[0] == END_BLOCK_END
634
+ consume(END_BLOCK_END, skip: true)
635
+ @tokens << Token.new(:block_close, block_name)
636
+ transition_to(:initial)
637
+ else
638
+ syntax_error!("Expected block end: did you miss a `}`?")
639
+ end
640
+ end
641
+
642
+ # Recurses to read the block name from the source, character by character.
643
+ def tokenize_block_name
644
+ text = @source[@cursor..]
645
+ syntax_error!("Unexpected end of input: expected a block name") if text.empty?
646
+
647
+ # Finished reading the block name
648
+ if BLOCK_NAME_STOP_CHARS.include?(text[0])
649
+ consume_buffer.strip
650
+
651
+ else
652
+ consume(text[0])
653
+ tokenize_block_name
654
+ end
655
+ end
656
+
657
+ # -------------------------------------------------------------------------
658
+ # Helpers
659
+ # -------------------------------------------------------------------------
660
+
661
+ def transition_to(state)
662
+ @state = state
663
+ end
664
+
665
+ def consume(str, newline: false, skip: false)
666
+ @buffer << str unless skip
667
+ @cursor += str.length
668
+ if newline
669
+ @current_column = @column_offset
670
+ @current_line += 1
671
+ else
672
+ @current_column += str.length
673
+ end
674
+ end
675
+
676
+ def consume_buffer
677
+ result = @buffer.string.clone
678
+ @buffer = StringIO.new
679
+ result
680
+ end
681
+
682
+ def add_text_node_from_buffer_and_clear(remove_whitespace = false)
683
+ content = consume_buffer
684
+ if remove_whitespace
685
+ @tokens << Token.new(:text, content) unless content.strip.empty?
686
+ else
687
+ @tokens << Token.new(:text, content) unless content.empty?
688
+ end
689
+ end
690
+
691
+ def clear_attributes
692
+ @attributes = []
693
+ end
694
+
695
+ def clear_braces
696
+ @braces = []
697
+ end
698
+
699
+ def terminate
700
+ debug "TERMINATED!"
701
+ @cursor = @source.length
702
+ end
703
+
704
+ def current_token
705
+ @tokens.last
706
+ end
707
+
708
+ def current_attribute
709
+ @attributes.last
710
+ end
711
+
712
+ def debug(msg)
713
+ Rails.logger.debug { "[DEBUG:#{caller.length}] #{msg}" } if ENV.fetch("DEBUG", false)
714
+ end
715
+
716
+ def error(message)
717
+ debug "ERROR: #{message}"
718
+ @errors << message
719
+ terminate
720
+ end
721
+
722
+ def check_tokenizer_state
723
+ return if @state == :initial
724
+
725
+ syntax_error!("Parse error: unexpected end of #{HUMAN_READABLE_STATE_NAMES.fetch(@state, 'input')}")
726
+ end
727
+
728
+ def check_for_unclosed_blocks_or_tags
729
+ tags = []
730
+ blocks = []
731
+
732
+ # Walk the token stream and keep track of unclosed tags and blocks
733
+ @tokens.each do |token|
734
+ if token.type == :tag_open && !token.meta[:self_closing]
735
+ tags << token
736
+ elsif token.type == :tag_close
737
+ tags.pop
738
+ elsif token.type == :block_open
739
+ blocks << token
740
+ elsif token.type == :block_close
741
+ blocks.pop
742
+ end
743
+ end
744
+
745
+ syntax_error!("Unexpected end of input: found unclosed tags! #{tags}") unless tags.empty?
746
+
747
+ syntax_error!("Unexpected end of input: found an unclosed ##{blocks.first.value} block.") unless blocks.empty?
748
+
749
+ true
750
+ end
751
+
752
+ # Helper for raising exceptions during tokenization
753
+ def syntax_error!(message)
754
+ raise ORB::SyntaxError.new(message, @current_line)
755
+ end
756
+ end
757
+ end