w_syntax_tree-erb 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,741 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module ERB
5
+ class Parser
6
+ # This is the parent class of any kind of errors that will be raised by
7
+ # the parser.
8
+ class ParseError < StandardError
9
+ end
10
+
11
+ # This error occurs when a certain token is expected in a certain place
12
+ # but is not found. Sometimes this is handled internally because some
13
+ # elements are optional. Other times it is not and it is raised to end the
14
+ # parsing process.
15
+ class MissingTokenError < ParseError
16
+ end
17
+
18
+ attr_reader :source, :tokens
19
+
20
+ def initialize(source)
21
+ @source = source
22
+ @tokens = make_tokens
23
+ end
24
+
25
+ def parse
26
+ doctype = maybe { parse_doctype }
27
+ elements = many { parse_any_tag }
28
+
29
+ location =
30
+ elements.first.location.to(elements.last.location) if elements.any?
31
+
32
+ Document.new(elements: [doctype].compact + elements, location: location)
33
+ end
34
+
35
+ def debug_tokens
36
+ @tokens.each do |key, value, index, line|
37
+ puts("#{key} #{value.inspect} #{index} #{line}")
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def parse_any_tag
44
+ atleast do
45
+ maybe { parse_html_comment } || maybe { parse_erb_tag } ||
46
+ maybe { consume(:erb_comment) } || maybe { parse_html_element } ||
47
+ maybe { parse_blank_line } || maybe { parse_chardata }
48
+ end
49
+ end
50
+
51
+ def make_tokens
52
+ Enumerator.new do |enum|
53
+ index = 0
54
+ line = 1
55
+ state = %i[outside]
56
+
57
+ while index < source.length
58
+ case state.last
59
+ in :outside
60
+ case source[index..]
61
+ when /\A\n{2,}/
62
+ # two or more newlines should be ONE blank line
63
+ enum.yield :blank_line, $&, index, line
64
+ line += $&.count("\n")
65
+ when /\A(?: |\t|\n|\r\n)+/m
66
+ # whitespace
67
+ # enum.yield :whitespace, $&, index, line
68
+ line += $&.count("\n")
69
+ when /\A<!--(.|\r?\n)*?-->/m
70
+ # comments
71
+ # <!-- this is a comment -->
72
+ enum.yield :html_comment, $&, index, line
73
+ line += $&.count("\n")
74
+ when /\A<!DOCTYPE/, /\A<!doctype/
75
+ # document type tags
76
+ # <!DOCTYPE
77
+ enum.yield :doctype, $&, index, line
78
+ state << :inside
79
+ when /\A<%#.*%>/
80
+ # An ERB-comment
81
+ # <%# this is an ERB comment %>
82
+ enum.yield :erb_comment, $&, index, line
83
+ when /\A<%={1,2}/, /\A<%-/, /\A<%/
84
+ # the beginning of an ERB tag
85
+ # <%
86
+ # <%=, <%==
87
+ enum.yield :erb_open, $&, index, line
88
+ state << :erb_start
89
+ line += $&.count("\n")
90
+ when %r{\A</}
91
+ # the beginning of a closing tag
92
+ # </
93
+ enum.yield :slash_open, $&, index, line
94
+ state << :inside
95
+ when /\A</
96
+ # the beginning of an opening tag
97
+ # <
98
+ enum.yield :open, $&, index, line
99
+ state << :inside
100
+ when /\A[^<]+/
101
+ # plain text content
102
+ # abc
103
+ enum.yield :text, $&, index, line
104
+ else
105
+ raise ParseError,
106
+ "Unexpected character at #{index}: #{source[index]}"
107
+ end
108
+ in :erb_start
109
+ case source[index..]
110
+ when /\A\s*if/
111
+ # if statement
112
+ enum.yield :erb_if, $&, index, line
113
+ state.pop
114
+ state << :erb
115
+ when /\A\s*unless/
116
+ enum.yield :erb_unless, $&, index, line
117
+ state.pop
118
+ state << :erb
119
+ when /\A\s*elsif/
120
+ enum.yield :erb_elsif, $&, index, line
121
+ state.pop
122
+ state << :erb
123
+ when /\A\s*else/
124
+ enum.yield :erb_else, $&, index, line
125
+ state.pop
126
+ state << :erb
127
+ when /\A\s*case/
128
+ enum.yield :erb_case, $&, index, line
129
+ state.pop
130
+ state << :erb
131
+ when /\A\s*when/
132
+ enum.yield :erb_when, $&, index, line
133
+ state.pop
134
+ state << :erb
135
+ when /\A\s*end/
136
+ enum.yield :erb_end, $&, index, line
137
+ state.pop
138
+ state << :erb
139
+ else
140
+ # If we get here, then we did not have any special
141
+ # keyword in the erb-tag.
142
+ state.pop
143
+ state << :erb
144
+ next
145
+ end
146
+ in :erb
147
+ case source[index..]
148
+ when /\A[\n]+/
149
+ # newlines
150
+ enum.yield :erb_code, $&, index, line
151
+ line += $&.count("\n")
152
+ when /\Ado\b(\s*\|[\w\s,]+\|)?\s*-?%>/
153
+ enum.yield :erb_do_close, $&, index, line
154
+ state.pop
155
+ when /\A-?%>/
156
+ enum.yield :erb_close, $&, index, line
157
+ state.pop
158
+ when /\A[\p{L}\w]*\b/
159
+ # Split by word boundary while parsing the code
160
+ # This allows us to separate what_to_do vs do
161
+ enum.yield :erb_code, $&, index, line
162
+ else
163
+ enum.yield :erb_code, source[index], index, line
164
+ index += 1
165
+ next
166
+ end
167
+ in :string_single_quote
168
+ case source[index..]
169
+ when /\A(?: |\t|\n|\r\n)+/m
170
+ # whitespace
171
+ enum.yield :whitespace, $&, index, line
172
+ line += $&.count("\n")
173
+ when /\A\'/
174
+ # the end of a quoted string
175
+ enum.yield :string_close_single_quote, $&, index, line
176
+ state.pop
177
+ when /\A<%[=]?/
178
+ # the beginning of an ERB tag
179
+ # <%
180
+ enum.yield :erb_open, $&, index, line
181
+ state << :erb
182
+ when /\A[^<']+/
183
+ # plain text content
184
+ # abc
185
+ enum.yield :text, $&, index, line
186
+ else
187
+ raise ParseError,
188
+ "Unexpected character in string at #{index}: #{source[index]}"
189
+ end
190
+ in :string_double_quote
191
+ case source[index..]
192
+ when /\A(?: |\t|\n|\r\n)+/m
193
+ # whitespace
194
+ enum.yield :whitespace, $&, index, line
195
+ line += $&.count("\n")
196
+ when /\A\"/
197
+ # the end of a quoted string
198
+ enum.yield :string_close_double_quote, $&, index, line
199
+ state.pop
200
+ when /\A<%[=]?/
201
+ # the beginning of an ERB tag
202
+ # <%
203
+ enum.yield :erb_open, $&, index, line
204
+ state << :erb
205
+ when /\A[^<"]+/
206
+ # plain text content
207
+ # abc
208
+ enum.yield :text, $&, index, line
209
+ else
210
+ raise ParseError,
211
+ "Unexpected character in string at #{index}: #{source[index]}"
212
+ end
213
+ in :inside
214
+ case source[index..]
215
+ when /\A[ \t\r\n]+/
216
+ # whitespace
217
+ line += $&.count("\n")
218
+ when /\A-?%>/
219
+ # the end of an ERB tag
220
+ # -%> or %>
221
+ enum.yield :erb_close, $&, index, line
222
+ state.pop
223
+ when /\A>/
224
+ # the end of a tag
225
+ # >
226
+ enum.yield :close, $&, index, line
227
+ state.pop
228
+ when /\A\?>/
229
+ # the end of a tag
230
+ # ?>
231
+ enum.yield :special_close, $&, index, line
232
+ state.pop
233
+ when %r{\A/>}
234
+ # the end of a self-closing tag
235
+ enum.yield :slash_close, $&, index, line
236
+ state.pop
237
+ when %r{\A/}
238
+ # a forward slash
239
+ # /
240
+ enum.yield :slash, $&, index, line
241
+ when /\A=/
242
+ # an equals sign
243
+ # =
244
+ enum.yield :equals, $&, index, line
245
+ when /\A[@#]*[:\w\.\-\_]+\b/
246
+ # a name for an element or an attribute
247
+ # strong, vue-component-kebab, VueComponentPascal
248
+ # abc, #abc, @abc, :abc
249
+ enum.yield :name, $&, index, line
250
+ when /\A<%/
251
+ # the beginning of an ERB tag
252
+ # <%
253
+ enum.yield :erb_open, $&, index, line
254
+ state << :erb
255
+ when /\A"/
256
+ # the beginning of a string
257
+ enum.yield :string_open_double_quote, $&, index, line
258
+ state << :string_double_quote
259
+ when /\A'/
260
+ # the beginning of a string
261
+ enum.yield :string_open_single_quote, $&, index, line
262
+ state << :string_single_quote
263
+ else
264
+ raise ParseError,
265
+ "Unexpected character at #{index}: #{source[index]}"
266
+ end
267
+ end
268
+
269
+ index += $&.length
270
+ end
271
+
272
+ enum.yield :EOF, nil, index, line
273
+ end
274
+ end
275
+
276
+ # If the next token in the list of tokens matches the expected type, then
277
+ # we're going to create a new Token, advance the token enumerator, and
278
+ # return the new Token. Otherwise we're going to raise a
279
+ # MissingTokenError.
280
+ def consume(expected)
281
+ type, value, index, line = tokens.peek
282
+
283
+ if expected != type
284
+ raise MissingTokenError, "expected #{expected} got #{type}"
285
+ end
286
+
287
+ tokens.next
288
+
289
+ Token.new(
290
+ type: type,
291
+ value: value,
292
+ location:
293
+ Location.new(
294
+ start_char: index,
295
+ end_char: index + value.length,
296
+ start_line: line,
297
+ end_line: line + value.count("\n")
298
+ )
299
+ )
300
+ end
301
+
302
+ # We're going to yield to the block which should attempt to consume some
303
+ # number of tokens. If any of them are missing, then we're going to return
304
+ # nil from this block.
305
+ def maybe
306
+ yield
307
+ rescue MissingTokenError
308
+ end
309
+
310
+ # We're going to attempt to parse everything by yielding to the block. If
311
+ # nothing is returned by the block, then we're going to raise an error.
312
+ # Otherwise we'll return the value returned by the block.
313
+ def atleast
314
+ result = yield
315
+ raise MissingTokenError if result.nil?
316
+ result
317
+ end
318
+
319
+ # We're going to attempt to parse with the block many times. We'll stop
320
+ # parsing once we get an error back from the block.
321
+ def many
322
+ items = []
323
+
324
+ loop do
325
+ begin
326
+ items << yield
327
+ rescue MissingTokenError
328
+ break
329
+ end
330
+ end
331
+
332
+ items
333
+ end
334
+
335
+ def parse_until_erb(classes:)
336
+ items = []
337
+
338
+ loop do
339
+ result = parse_any_tag
340
+ items << result
341
+ break if classes.any? { |cls| result.is_a?(cls) }
342
+ end
343
+
344
+ items
345
+ end
346
+
347
+ def parse_html_opening_tag
348
+ opening = consume(:open)
349
+ name = consume(:name)
350
+ if name.value =~ /\A[@:#]/
351
+ raise ParseError, "Invalid html-tag name #{name}"
352
+ end
353
+ attributes = many { parse_html_attribute }
354
+
355
+ closing =
356
+ atleast do
357
+ maybe { consume(:close) } || maybe { consume(:slash_close) }
358
+ end
359
+
360
+ HtmlNode::OpeningTag.new(
361
+ opening: opening,
362
+ name: name,
363
+ attributes: attributes,
364
+ closing: closing,
365
+ location: opening.location.to(closing.location)
366
+ )
367
+ end
368
+
369
+ def parse_html_closing
370
+ opening = consume(:slash_open)
371
+ name = consume(:name)
372
+ closing = consume(:close)
373
+
374
+ HtmlNode::ClosingTag.new(
375
+ opening: opening,
376
+ name: name,
377
+ closing: closing,
378
+ location: opening.location.to(closing.location)
379
+ )
380
+ end
381
+
382
+ def parse_html_element
383
+ opening = parse_html_opening_tag
384
+
385
+ if opening.closing.value == ">"
386
+ elements = many { parse_any_tag }
387
+ closing = maybe { parse_html_closing }
388
+
389
+ if closing.nil?
390
+ raise(
391
+ ParseError,
392
+ "Missing closing tag for <#{opening.name.value}> at #{opening.location}"
393
+ )
394
+ end
395
+
396
+ if closing.name.value != opening.name.value
397
+ raise(
398
+ ParseError,
399
+ "Expected closing tag for <#{opening.name.value}> but got <#{closing.name.value}> at #{closing.location}"
400
+ )
401
+ end
402
+
403
+ HtmlNode.new(
404
+ opening: opening,
405
+ elements: elements,
406
+ closing: closing,
407
+ location: opening.location.to(closing.location)
408
+ )
409
+ else
410
+ HtmlNode.new(opening: opening, location: opening.location)
411
+ end
412
+ end
413
+
414
+ def parse_erb_case(erb_node)
415
+ elements =
416
+ maybe { parse_until_erb(classes: [ErbCaseWhen, ErbElse, ErbEnd]) } ||
417
+ []
418
+
419
+ erb_tag = elements.pop
420
+
421
+ unless erb_tag.is_a?(ErbCaseWhen) || erb_tag.is_a?(ErbElse) ||
422
+ erb_tag.is_a?(ErbEnd)
423
+ raise(
424
+ ParseError,
425
+ "Found no matching erb-tag to the if-tag at #{erb_node.location}"
426
+ )
427
+ end
428
+
429
+ case erb_node.keyword.type
430
+ when :erb_case
431
+ ErbCase.new(
432
+ opening: erb_node,
433
+ elements: elements,
434
+ closing: erb_tag,
435
+ location: erb_node.location.to(erb_tag.location)
436
+ )
437
+ when :erb_when
438
+ ErbCaseWhen.new(
439
+ opening: erb_node,
440
+ elements: elements,
441
+ closing: erb_tag,
442
+ location: erb_node.location.to(erb_tag.location)
443
+ )
444
+ else
445
+ raise(
446
+ ParseError,
447
+ "Found no matching when- or else-tag to the case-tag at #{erb_node.location}"
448
+ )
449
+ end
450
+ end
451
+
452
+ def parse_erb_if(erb_node)
453
+ elements =
454
+ maybe { parse_until_erb(classes: [ErbElsif, ErbElse, ErbEnd]) } || []
455
+
456
+ erb_tag = elements.pop
457
+
458
+ unless erb_tag.is_a?(ErbControl) || erb_tag.is_a?(ErbEnd)
459
+ raise(
460
+ ParseError,
461
+ "Found no matching erb-tag to the if-tag at #{erb_node.location}"
462
+ )
463
+ end
464
+
465
+ case erb_node.keyword.type
466
+ when :erb_if
467
+ ErbIf.new(
468
+ opening: erb_node,
469
+ elements: elements,
470
+ closing: erb_tag,
471
+ location: erb_node.location.to(erb_tag.location)
472
+ )
473
+ when :erb_unless
474
+ ErbUnless.new(
475
+ opening: erb_node,
476
+ elements: elements,
477
+ closing: erb_tag,
478
+ location: erb_node.location.to(erb_tag.location)
479
+ )
480
+ when :erb_elsif
481
+ ErbElsif.new(
482
+ opening: erb_node,
483
+ elements: elements,
484
+ closing: erb_tag,
485
+ location: erb_node.location.to(erb_tag.location)
486
+ )
487
+ else
488
+ raise(
489
+ ParseError,
490
+ "Found no matching elsif- or else-tag to the if-tag at #{erb_node.location}"
491
+ )
492
+ end
493
+ end
494
+
495
+ def parse_erb_else(erb_node)
496
+ elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
497
+
498
+ erb_end = elements.pop
499
+
500
+ unless erb_end.is_a?(ErbEnd)
501
+ raise(
502
+ ParseError,
503
+ "Found no matching end-tag for the else-tag at #{erb_node.location}"
504
+ )
505
+ end
506
+
507
+ ErbElse.new(
508
+ opening: erb_node,
509
+ elements: elements,
510
+ closing: erb_end,
511
+ location: erb_node.location.to(erb_end.location)
512
+ )
513
+ end
514
+
515
+ def parse_erb_end(erb_node)
516
+ ErbEnd.new(
517
+ opening_tag: erb_node.opening_tag,
518
+ keyword: erb_node.keyword,
519
+ content: nil,
520
+ closing_tag: erb_node.closing_tag,
521
+ location: erb_node.location
522
+ )
523
+ end
524
+
525
+ def parse_erb_tag
526
+ opening_tag = consume(:erb_open)
527
+ keyword =
528
+ maybe { consume(:erb_if) } || maybe { consume(:erb_unless) } ||
529
+ maybe { consume(:erb_elsif) } || maybe { consume(:erb_else) } ||
530
+ maybe { consume(:erb_end) } || maybe { consume(:erb_case) } ||
531
+ maybe { consume(:erb_when) }
532
+
533
+ content = parse_until_erb_close
534
+ closing_tag = content.pop
535
+
536
+ if !closing_tag.is_a?(ErbClose)
537
+ raise(
538
+ ParseError,
539
+ "Found no matching closing tag for the erb-tag at #{opening_tag.location}"
540
+ )
541
+ end
542
+
543
+ erb_node =
544
+ ErbNode.new(
545
+ opening_tag: opening_tag,
546
+ keyword: keyword,
547
+ content: content,
548
+ closing_tag: closing_tag,
549
+ location: opening_tag.location.to(closing_tag.location)
550
+ )
551
+
552
+ case keyword&.type
553
+ when :erb_if, :erb_unless, :erb_elsif
554
+ parse_erb_if(erb_node)
555
+ when :erb_case, :erb_when
556
+ parse_erb_case(erb_node)
557
+ when :erb_else
558
+ parse_erb_else(erb_node)
559
+ when :erb_end
560
+ parse_erb_end(erb_node)
561
+ else
562
+ if closing_tag.is_a?(ErbDoClose)
563
+ elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
564
+ erb_end = elements.pop
565
+
566
+ unless erb_end.is_a?(ErbEnd)
567
+ raise(
568
+ ParseError,
569
+ "Found no matching end-tag for the do-tag at #{erb_node.location}"
570
+ )
571
+ end
572
+
573
+ ErbBlock.new(
574
+ opening: erb_node,
575
+ elements: elements,
576
+ closing: erb_end,
577
+ location: erb_node.location.to(erb_end.location)
578
+ )
579
+ else
580
+ erb_node
581
+ end
582
+ end
583
+ rescue MissingTokenError => error
584
+ # If we have parsed tokens that we cannot process after we parsed <%, we should throw a ParseError
585
+ # and not let it be handled by a `maybe`.
586
+ if opening_tag
587
+ raise(
588
+ ParseError,
589
+ "Could not parse ERB-tag at #{opening_tag.location}"
590
+ )
591
+ else
592
+ raise(error)
593
+ end
594
+ end
595
+
596
+ def parse_until_erb_close
597
+ items = []
598
+
599
+ loop do
600
+ result =
601
+ atleast do
602
+ maybe { parse_erb_do_close } || maybe { parse_erb_close } ||
603
+ maybe { consume(:erb_code) }
604
+ end
605
+ items << result
606
+
607
+ break if result.is_a?(ErbClose)
608
+ end
609
+
610
+ items
611
+ end
612
+
613
+ def parse_blank_line
614
+ blank_line = consume(:blank_line)
615
+
616
+ CharData.new(value: blank_line, location: blank_line.location)
617
+ end
618
+
619
+ def parse_erb_close
620
+ closing = consume(:erb_close)
621
+
622
+ ErbClose.new(location: closing.location, closing: closing)
623
+ end
624
+
625
+ def parse_erb_do_close
626
+ closing = consume(:erb_do_close)
627
+
628
+ ErbDoClose.new(location: closing.location, closing: closing)
629
+ end
630
+
631
+ def parse_html_string
632
+ opening =
633
+ maybe { consume(:string_open_double_quote) } ||
634
+ maybe { consume(:string_open_single_quote) }
635
+
636
+ if opening.nil?
637
+ value = consume(:name)
638
+
639
+ return(
640
+ HtmlString.new(
641
+ opening: nil,
642
+ contents: [value],
643
+ closing: nil,
644
+ location: value.location
645
+ )
646
+ )
647
+ end
648
+
649
+ contents =
650
+ many do
651
+ atleast do
652
+ maybe { consume(:text) } || maybe { consume(:whitespace) } ||
653
+ maybe { parse_erb_tag }
654
+ end
655
+ end
656
+
657
+ closing =
658
+ if opening.type == :string_open_double_quote
659
+ consume(:string_close_double_quote)
660
+ else
661
+ consume(:string_close_single_quote)
662
+ end
663
+
664
+ HtmlString.new(
665
+ opening: opening,
666
+ contents: contents,
667
+ closing: closing,
668
+ location: opening.location.to(closing.location)
669
+ )
670
+ end
671
+
672
+ def parse_html_attribute
673
+ key = consume(:name)
674
+ equals = maybe { consume(:equals) }
675
+
676
+ if equals.nil?
677
+ HtmlAttribute.new(
678
+ key: key,
679
+ equals: nil,
680
+ value: nil,
681
+ location: key.location
682
+ )
683
+ else
684
+ value = parse_html_string
685
+
686
+ HtmlAttribute.new(
687
+ key: key,
688
+ equals: equals,
689
+ value: value,
690
+ location: key.location.to(value.location)
691
+ )
692
+ end
693
+ end
694
+
695
+ def parse_chardata
696
+ values =
697
+ many do
698
+ atleast do
699
+ maybe { consume(:string_open_double_quote) } ||
700
+ maybe { consume(:string_open_single_quote) } ||
701
+ maybe { consume(:string_close_double_quote) } ||
702
+ maybe { consume(:string_close_single_quote) } ||
703
+ maybe { consume(:text) } || maybe { consume(:whitespace) }
704
+ end
705
+ end
706
+
707
+ token =
708
+ if values.size > 1
709
+ Token.new(
710
+ type: :text,
711
+ value: values.map(&:value).join(""),
712
+ location: values.first.location.to(values.last.location)
713
+ )
714
+ else
715
+ values.first
716
+ end
717
+
718
+ CharData.new(value: token, location: token.location) if token
719
+ end
720
+
721
+ def parse_doctype
722
+ opening = consume(:doctype)
723
+ name = consume(:name)
724
+ closing = consume(:close)
725
+
726
+ Doctype.new(
727
+ opening: opening,
728
+ name: name,
729
+ closing: closing,
730
+ location: opening.location.to(closing.location)
731
+ )
732
+ end
733
+
734
+ def parse_html_comment
735
+ comment = consume(:html_comment)
736
+
737
+ HtmlComment.new(token: comment, location: comment.location)
738
+ end
739
+ end
740
+ end
741
+ end